refactor: update billing calculations to use quota per unit
- Adjusted billing calculations in tests and core logic to incorporate a new QuotaPerUnit field. - Modified estimated quota calculations to reflect changes in tiered billing logic. - Updated related tests to ensure accuracy with the new quota calculations. - Enhanced dynamic pricing components to align with updated billing expressions.
This commit is contained in:
@@ -19,6 +19,8 @@ const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h *
|
||||
// Expression with request probes
|
||||
const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`
|
||||
|
||||
const testQuotaPerUnit = 500_000.0
|
||||
|
||||
func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
|
||||
return &billingexpr.BillingSnapshot{
|
||||
BillingMode: "tiered_expr",
|
||||
@@ -27,14 +29,16 @@ func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int)
|
||||
GroupRatio: groupRatio,
|
||||
EstimatedPromptTokens: estPrompt,
|
||||
EstimatedCompletionTokens: estCompletion,
|
||||
QuotaPerUnit: testQuotaPerUnit,
|
||||
}
|
||||
}
|
||||
|
||||
func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
|
||||
snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
|
||||
cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
|
||||
snap.EstimatedQuotaBeforeGroup = cost
|
||||
snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(cost * groupRatio)
|
||||
quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
|
||||
snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
|
||||
snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
|
||||
snap.EstimatedTier = trace.MatchedTier
|
||||
return &relaycommon.RelayInfo{
|
||||
TieredBillingSnapshot: snap,
|
||||
@@ -56,7 +60,8 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
|
||||
GroupRatio: 1.0,
|
||||
EstimatedPromptTokens: 100,
|
||||
EstimatedCompletionTokens: 0,
|
||||
EstimatedQuotaAfterGroup: 100,
|
||||
EstimatedQuotaAfterGroup: 50,
|
||||
QuotaPerUnit: testQuotaPerUnit,
|
||||
},
|
||||
BillingRequestInput: &billingexpr.RequestInput{
|
||||
Body: []byte(`{"service_tier":"fast"}`),
|
||||
@@ -67,8 +72,9 @@ func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle to apply")
|
||||
}
|
||||
if quota != 200 {
|
||||
t.Fatalf("quota = %d, want 200", quota)
|
||||
// fast: p*2 = 200; quota = 200 / 1M * 500K = 100
|
||||
if quota != 100 {
|
||||
t.Fatalf("quota = %d, want 100", quota)
|
||||
}
|
||||
if result == nil || result.MatchedTier != "fast" {
|
||||
t.Fatalf("matched tier = %v, want fast", result)
|
||||
@@ -111,9 +117,9 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// p*2 + c*10 = 2000 + 5000 = 7000
|
||||
if quota != 7000 {
|
||||
t.Fatalf("quota = %d, want 7000", quota)
|
||||
// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
|
||||
if quota != 3500 {
|
||||
t.Fatalf("quota = %d, want 3500", quota)
|
||||
}
|
||||
if quota != info.FinalPreConsumedQuota {
|
||||
t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
|
||||
@@ -122,7 +128,7 @@ func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
|
||||
|
||||
func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
|
||||
info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
|
||||
preConsumed := info.FinalPreConsumedQuota // 7000
|
||||
preConsumed := info.FinalPreConsumedQuota // 3500
|
||||
|
||||
// Actual usage is higher than estimated
|
||||
params := billingexpr.TokenParams{P: 2000, C: 1000}
|
||||
@@ -130,9 +136,9 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// p*2 + c*10 = 4000 + 10000 = 14000
|
||||
if quota != 14000 {
|
||||
t.Fatalf("quota = %d, want 14000", quota)
|
||||
// p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
|
||||
if quota != 7000 {
|
||||
t.Fatalf("quota = %d, want 7000", quota)
|
||||
}
|
||||
if quota <= preConsumed {
|
||||
t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
|
||||
@@ -141,7 +147,7 @@ func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
|
||||
|
||||
func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
|
||||
info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
|
||||
preConsumed := info.FinalPreConsumedQuota // 7000
|
||||
preConsumed := info.FinalPreConsumedQuota // 3500
|
||||
|
||||
// Actual usage is lower than estimated
|
||||
params := billingexpr.TokenParams{P: 100, C: 50}
|
||||
@@ -149,9 +155,9 @@ func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// p*2 + c*10 = 200 + 500 = 700
|
||||
if quota != 700 {
|
||||
t.Fatalf("quota = %d, want 700", quota)
|
||||
// p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
|
||||
if quota != 350 {
|
||||
t.Fatalf("quota = %d, want 350", quota)
|
||||
}
|
||||
if quota >= preConsumed {
|
||||
t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
|
||||
@@ -170,9 +176,9 @@ func TestTryTieredSettle_ExactBoundary(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// standard: p*1.5 + c*7.5 = 300000 + 7500 = 307500
|
||||
if quota != 307500 {
|
||||
t.Fatalf("quota = %d, want 307500", quota)
|
||||
// standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
|
||||
if quota != 153750 {
|
||||
t.Fatalf("quota = %d, want 153750", quota)
|
||||
}
|
||||
if result.MatchedTier != "standard" {
|
||||
t.Fatalf("tier = %s, want standard", result.MatchedTier)
|
||||
@@ -187,9 +193,9 @@ func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// long_context: p*3 + c*11.25 = 600003 + 11250 = 611253
|
||||
if quota != 611253 {
|
||||
t.Fatalf("quota = %d, want 611253", quota)
|
||||
// long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
|
||||
if quota != 305627 {
|
||||
t.Fatalf("quota = %d, want 305627", quota)
|
||||
}
|
||||
if result.MatchedTier != "long_context" {
|
||||
t.Fatalf("tier = %s, want long_context", result.MatchedTier)
|
||||
@@ -221,9 +227,9 @@ func TestTryTieredSettle_HugeTokens(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// p*2 + c*10 = 20000000 + 50000000 = 70000000
|
||||
if quota != 70000000 {
|
||||
t.Fatalf("quota = %d, want 70000000", quota)
|
||||
// p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
|
||||
if quota != 35000000 {
|
||||
t.Fatalf("quota = %d, want 35000000", quota)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,23 +241,23 @@ func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
|
||||
if !ok1 {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// p*2 + c*10 + cr*0.2 + cc*2.5 + cc1h*4 = 2000 + 5000 + 0 + 0 + 0 = 7000
|
||||
// p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
|
||||
|
||||
// With cache tokens
|
||||
ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
|
||||
if !ok2 {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// 2000 + 5000 + 10000*0.2 + 5000*2.5 + 2000*4 = 2000 + 5000 + 2000 + 12500 + 8000 = 29500
|
||||
// 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750
|
||||
|
||||
if quota2 <= quota1 {
|
||||
t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
|
||||
}
|
||||
if quota1 != 7000 {
|
||||
t.Fatalf("no-cache quota = %d, want 7000", quota1)
|
||||
if quota1 != 3500 {
|
||||
t.Fatalf("no-cache quota = %d, want 3500", quota1)
|
||||
}
|
||||
if quota2 != 29500 {
|
||||
t.Fatalf("cache quota = %d, want 29500", quota2)
|
||||
if quota2 != 14750 {
|
||||
t.Fatalf("cache quota = %d, want 14750", quota2)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,9 +275,9 @@ func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// fast: p*4 + c*20 = 4000 + 10000 = 14000
|
||||
if quota != 14000 {
|
||||
t.Fatalf("quota = %d, want 14000", quota)
|
||||
// fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
|
||||
if quota != 7000 {
|
||||
t.Fatalf("quota = %d, want 7000", quota)
|
||||
}
|
||||
if result.MatchedTier != "fast" {
|
||||
t.Fatalf("tier = %s, want fast", result.MatchedTier)
|
||||
@@ -286,9 +292,9 @@ func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// normal: p*2 + c*10 = 2000 + 5000 = 7000
|
||||
if quota != 7000 {
|
||||
t.Fatalf("quota = %d, want 7000", quota)
|
||||
// normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
|
||||
if quota != 3500 {
|
||||
t.Fatalf("quota = %d, want 3500", quota)
|
||||
}
|
||||
if result.MatchedTier != "normal" {
|
||||
t.Fatalf("tier = %s, want normal", result.MatchedTier)
|
||||
@@ -306,9 +312,9 @@ func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("expected tiered settle")
|
||||
}
|
||||
// cost = 7000, after group = round(7000 * 1.5) = 10500
|
||||
if quota != 10500 {
|
||||
t.Fatalf("quota = %d, want 10500", quota)
|
||||
// exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
|
||||
if quota != 5250 {
|
||||
t.Fatalf("quota = %d, want 5250", quota)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user