refactor: update billing calculations to use quota per unit

- Adjusted billing calculations in tests and core logic to incorporate a new QuotaPerUnit field. - Modified estimated quota calculations to reflect changes in tiered billing logic. - Updated related tests to ensure accuracy with the new quota calculations. - Enhanced dynamic pricing components to align with updated billing expressions.
2026-03-16 20:11:55 +08:00
parent f0589cc478
commit f6c0852da9
10 changed files with 160 additions and 132 deletions
@@ -19,6 +19,8 @@ const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h *
 // Expression with request probes
 const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`

+const testQuotaPerUnit = 500_000.0
+
 func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
 	return &billingexpr.BillingSnapshot{
 		BillingMode:               "tiered_expr",
@@ -27,14 +29,16 @@ func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int)
 		GroupRatio:                groupRatio,
 		EstimatedPromptTokens:     estPrompt,
 		EstimatedCompletionTokens: estCompletion,
+		QuotaPerUnit:              testQuotaPerUnit,
 	}
 }

 func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
 	snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
 	cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
-	snap.EstimatedQuotaBeforeGroup = cost
-	snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(cost * groupRatio)
+	quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
+	snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
+	snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
 	snap.EstimatedTier = trace.MatchedTier
 	return &relaycommon.RelayInfo{
 		TieredBillingSnapshot: snap,
@@ -56,7 +60,8 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
 			GroupRatio:                1.0,
 			EstimatedPromptTokens:     100,
 			EstimatedCompletionTokens: 0,
-			EstimatedQuotaAfterGroup:  100,
+			EstimatedQuotaAfterGroup:  50,
+			QuotaPerUnit:              testQuotaPerUnit,
 		},
 		BillingRequestInput: &billingexpr.RequestInput{
 			Body: []byte(`{"service_tier":"fast"}`),
@@ -67,8 +72,9 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle to apply")
 	}
-	if quota != 200 {
-		t.Fatalf("quota = %d, want 200", quota)
+	// fast: p*2 = 200; quota = 200 / 1M * 500K = 100
+	if quota != 100 {
+		t.Fatalf("quota = %d, want 100", quota)
 	}
 	if result == nil || result.MatchedTier != "fast" {
 		t.Fatalf("matched tier = %v, want fast", result)
@@ -111,9 +117,9 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 2000 + 5000 = 7000
-	if quota != 7000 {
-		t.Fatalf("quota = %d, want 7000", quota)
+	// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
+	if quota != 3500 {
+		t.Fatalf("quota = %d, want 3500", quota)
 	}
 	if quota != info.FinalPreConsumedQuota {
 		t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
@@ -122,7 +128,7 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {

 func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
 	info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
-	preConsumed := info.FinalPreConsumedQuota // 7000
+	preConsumed := info.FinalPreConsumedQuota // 3500

 	// Actual usage is higher than estimated
 	params := billingexpr.TokenParams{P: 2000, C: 1000}
@@ -130,9 +136,9 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 4000 + 10000 = 14000
-	if quota != 14000 {
-		t.Fatalf("quota = %d, want 14000", quota)
+	// p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
+	if quota != 7000 {
+		t.Fatalf("quota = %d, want 7000", quota)
 	}
 	if quota <= preConsumed {
 		t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
@@ -141,7 +147,7 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {

 func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
 	info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
-	preConsumed := info.FinalPreConsumedQuota // 7000
+	preConsumed := info.FinalPreConsumedQuota // 3500

 	// Actual usage is lower than estimated
 	params := billingexpr.TokenParams{P: 100, C: 50}
@@ -149,9 +155,9 @@ func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 200 + 500 = 700
-	if quota != 700 {
-		t.Fatalf("quota = %d, want 700", quota)
+	// p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
+	if quota != 350 {
+		t.Fatalf("quota = %d, want 350", quota)
 	}
 	if quota >= preConsumed {
 		t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
@@ -170,9 +176,9 @@ func TestTryTieredSettle_ExactBoundary(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// standard: p*1.5 + c*7.5 = 300000 + 7500 = 307500
-	if quota != 307500 {
-		t.Fatalf("quota = %d, want 307500", quota)
+	// standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
+	if quota != 153750 {
+		t.Fatalf("quota = %d, want 153750", quota)
 	}
 	if result.MatchedTier != "standard" {
 		t.Fatalf("tier = %s, want standard", result.MatchedTier)
@@ -187,9 +193,9 @@ func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// long_context: p*3 + c*11.25 = 600003 + 11250 = 611253
-	if quota != 611253 {
-		t.Fatalf("quota = %d, want 611253", quota)
+	// long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
+	if quota != 305627 {
+		t.Fatalf("quota = %d, want 305627", quota)
 	}
 	if result.MatchedTier != "long_context" {
 		t.Fatalf("tier = %s, want long_context", result.MatchedTier)
@@ -221,9 +227,9 @@ func TestTryTieredSettle_HugeTokens(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 = 20000000 + 50000000 = 70000000
-	if quota != 70000000 {
-		t.Fatalf("quota = %d, want 70000000", quota)
+	// p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
+	if quota != 35000000 {
+		t.Fatalf("quota = %d, want 35000000", quota)
 	}
 }

@@ -235,23 +241,23 @@ func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
 	if !ok1 {
 		t.Fatal("expected tiered settle")
 	}
-	// p*2 + c*10 + cr*0.2 + cc*2.5 + cc1h*4 = 2000 + 5000 + 0 + 0 + 0 = 7000
+	// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500

 	// With cache tokens
 	ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
 	if !ok2 {
 		t.Fatal("expected tiered settle")
 	}
-	// 2000 + 5000 + 10000*0.2 + 5000*2.5 + 2000*4 = 2000 + 5000 + 2000 + 12500 + 8000 = 29500
+	// 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750

 	if quota2 <= quota1 {
 		t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
 	}
-	if quota1 != 7000 {
-		t.Fatalf("no-cache quota = %d, want 7000", quota1)
+	if quota1 != 3500 {
+		t.Fatalf("no-cache quota = %d, want 3500", quota1)
 	}
-	if quota2 != 29500 {
-		t.Fatalf("cache quota = %d, want 29500", quota2)
+	if quota2 != 14750 {
+		t.Fatalf("cache quota = %d, want 14750", quota2)
 	}
 }

@@ -269,9 +275,9 @@ func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// fast: p*4 + c*20 = 4000 + 10000 = 14000
-	if quota != 14000 {
-		t.Fatalf("quota = %d, want 14000", quota)
+	// fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
+	if quota != 7000 {
+		t.Fatalf("quota = %d, want 7000", quota)
 	}
 	if result.MatchedTier != "fast" {
 		t.Fatalf("tier = %s, want fast", result.MatchedTier)
@@ -286,9 +292,9 @@ func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// normal: p*2 + c*10 = 2000 + 5000 = 7000
-	if quota != 7000 {
-		t.Fatalf("quota = %d, want 7000", quota)
+	// normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
+	if quota != 3500 {
+		t.Fatalf("quota = %d, want 3500", quota)
 	}
 	if result.MatchedTier != "normal" {
 		t.Fatalf("tier = %s, want normal", result.MatchedTier)
@@ -306,9 +312,9 @@ func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
 	if !ok {
 		t.Fatal("expected tiered settle")
 	}
-	// cost = 7000, after group = round(7000 * 1.5) = 10500
-	if quota != 10500 {
-		t.Fatalf("quota = %d, want 10500", quota)
+	// exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
+	if quota != 5250 {
+		t.Fatalf("quota = %d, want 5250", quota)
 	}
 }