feat: enhance tiered billing logic and improve variable handling in pricing calculations

2026-03-16 22:00:36 +08:00
parent f6c0852da9
commit 5b03b39db2
10 changed files with 431 additions and 240 deletions
@@ -256,14 +256,12 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 		ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
 	}

+	var tieredUsedVars map[string]bool
+	if snap := relayInfo.TieredBillingSnapshot; snap != nil {
+		tieredUsedVars = billingexpr.UsedVars(snap.ExprString)
+	}
 	var tieredResult *billingexpr.TieredResult
-	tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
-		P:    float64(usage.PromptTokens),
-		C:    float64(usage.CompletionTokens),
-		CR:   float64(usage.PromptTokensDetails.CachedTokens),
-		CC:   float64(usage.PromptTokensDetails.CachedCreationTokens - usage.ClaudeCacheCreation1hTokens),
-		CC1h: float64(usage.ClaudeCacheCreation1hTokens),
-	})
+	tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, BuildTieredTokenParams(usage, true, tieredUsedVars))
 	if tieredOk {
 		tieredResult = tieredRes
 	}
@@ -394,14 +392,12 @@ func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData)

 func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent string) {

+	var tieredUsedVars map[string]bool
+	if snap := relayInfo.TieredBillingSnapshot; snap != nil {
+		tieredUsedVars = billingexpr.UsedVars(snap.ExprString)
+	}
 	var tieredResult *billingexpr.TieredResult
-	tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
-		P:  float64(usage.PromptTokens),
-		C:  float64(usage.CompletionTokens),
-		CR: float64(usage.PromptTokensDetails.CachedTokens),
-		AI: float64(usage.PromptTokensDetails.AudioTokens),
-		AO: float64(usage.CompletionTokenDetails.AudioTokens),
-	})
+	tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, BuildTieredTokenParams(usage, false, tieredUsedVars))
 	if tieredOk {
 		tieredResult = tieredRes
 	}
@@ -659,4 +655,3 @@ func checkAndSendSubscriptionQuotaNotify(relayInfo *relaycommon.RelayInfo) {
 		}
 	})
 }
-
@@ -1,6 +1,7 @@
 package service

 import (
+	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 )
@@ -8,6 +9,62 @@ import (
 // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
 type TieredResultWrapper = billingexpr.TieredResult

+// BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
+// normalizing P and C so they mean "tokens not separately priced by the
+// expression". Sub-categories (cache, image, audio) are only subtracted
+// when the expression references them via their own variable.
+//
+// GPT-format APIs report prompt_tokens / completion_tokens as totals that
+// include all sub-categories (cache, image, audio). Claude-format APIs
+// report them as text-only. This function normalizes to text-only when
+// sub-categories are separately priced.
+func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
+	p := float64(usage.PromptTokens)
+	c := float64(usage.CompletionTokens)
+	cr := float64(usage.PromptTokensDetails.CachedTokens)
+	ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
+	cc1h := float64(usage.ClaudeCacheCreation1hTokens)
+	img := float64(usage.PromptTokensDetails.ImageTokens)
+	ai := float64(usage.PromptTokensDetails.AudioTokens)
+	ao := float64(usage.CompletionTokenDetails.AudioTokens)
+
+	if !isClaudeUsageSemantic {
+		if usedVars["cr"] || usedVars["cache_read_tokens"] {
+			p -= cr
+		}
+		if usedVars["cc"] || usedVars["cc1h"] || usedVars["cache_create_tokens"] || usedVars["cache_create_1h_tokens"] {
+			p -= ccTotal
+		}
+		if usedVars["img"] || usedVars["image_tokens"] {
+			p -= img
+		}
+		if usedVars["ai"] || usedVars["audio_input_tokens"] {
+			p -= ai
+		}
+		if usedVars["ao"] || usedVars["audio_output_tokens"] {
+			c -= ao
+		}
+	}
+
+	if p < 0 {
+		p = 0
+	}
+	if c < 0 {
+		c = 0
+	}
+
+	return billingexpr.TokenParams{
+		P:    p,
+		C:    c,
+		CR:   cr,
+		CC:   ccTotal - cc1h,
+		CC1h: cc1h,
+		Img:  img,
+		AI:   ai,
+		AO:   ao,
+	}
+}
+
 // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
 // computes the actual quota using the frozen BillingSnapshot. Returns:
 //   - ok=true, quota, result  when tiered billing applies
@@ -1,8 +1,10 @@
 package service

 import (
+	"math"
 	"testing"

+	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 )
@@ -405,3 +407,183 @@ func TestTryTieredSettle_ErrorFallbackToEstimatedQuotaAfterGroup(t *testing.T) {
 		t.Fatal("result should be nil on error fallback")
 	}
 }
+
+// ---------------------------------------------------------------------------
+// BuildTieredTokenParams: token normalization and ratio parity tests
+// ---------------------------------------------------------------------------
+
+func tieredQuota(exprStr string, usage *dto.Usage, isClaudeSemantic bool, groupRatio float64) float64 {
+	usedVars := billingexpr.UsedVars(exprStr)
+	params := BuildTieredTokenParams(usage, isClaudeSemantic, usedVars)
+	cost, _, _ := billingexpr.RunExpr(exprStr, params)
+	return cost / 1_000_000 * testQuotaPerUnit * groupRatio
+}
+
+func ratioQuota(usage *dto.Usage, isClaudeSemantic bool, modelRatio, completionRatio, cacheRatio, imageRatio, groupRatio float64) float64 {
+	baseTokens := float64(usage.PromptTokens)
+	cacheTokens := float64(usage.PromptTokensDetails.CachedTokens)
+	ccTokens := float64(usage.PromptTokensDetails.CachedCreationTokens)
+	imgTokens := float64(usage.PromptTokensDetails.ImageTokens)
+
+	if !isClaudeSemantic {
+		baseTokens -= cacheTokens
+		baseTokens -= ccTokens
+		baseTokens -= imgTokens
+	}
+
+	promptQuota := baseTokens + cacheTokens*cacheRatio + imgTokens*imageRatio
+	completionQuota := float64(usage.CompletionTokens) * completionRatio
+	return (promptQuota + completionQuota) * modelRatio * groupRatio
+}
+
+func TestBuildTieredTokenParams_GPT_WithCache(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     1000,
+		CompletionTokens: 500,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 200,
+			TextTokens:   800,
+		},
+	}
+	expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
+	got := tieredQuota(expr, usage, false, 1.0)
+	// P=800, C=500, CR=200 → (800*2.5 + 500*15 + 200*0.25) * 0.5 = 4775
+	want := 4775.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_GPT_NoCacheVar(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     1000,
+		CompletionTokens: 500,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 200,
+			TextTokens:   800,
+		},
+	}
+	expr := `tier("base", p * 2.5 + c * 15)`
+	got := tieredQuota(expr, usage, false, 1.0)
+	// No cr → P=1000 (cache stays in P), C=500 → (1000*2.5 + 500*15) * 0.5 = 5000
+	want := 5000.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_GPT_WithImage(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     1000,
+		CompletionTokens: 500,
+		PromptTokensDetails: dto.InputTokenDetails{
+			ImageTokens: 200,
+			TextTokens:  800,
+		},
+	}
+	expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
+	got := tieredQuota(expr, usage, false, 1.0)
+	// P=800, C=500, Img=200 → (800*2 + 500*8 + 200*2.5) * 0.5 = 3050
+	want := 3050.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_Claude_WithCache(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     800,
+		CompletionTokens: 500,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 200,
+			TextTokens:   800,
+		},
+	}
+	expr := `tier("base", p * 3 + c * 15 + cr * 0.3)`
+	got := tieredQuota(expr, usage, true, 1.0)
+	// Claude: P=800 (no subtraction), C=500, CR=200 → (800*3 + 500*15 + 200*0.3) * 0.5 = 4980
+	want := 4980.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_GPT_AudioOutput(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     1000,
+		CompletionTokens: 600,
+		CompletionTokenDetails: dto.OutputTokenDetails{
+			AudioTokens: 100,
+			TextTokens:  500,
+		},
+	}
+	expr := `tier("base", p * 2 + c * 10 + ao * 50)`
+	got := tieredQuota(expr, usage, false, 1.0)
+	// C=600-100=500, AO=100 → (1000*2 + 500*10 + 100*50) * 0.5 = 6000
+	want := 6000.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_GPT_AudioOutputNoVar(t *testing.T) {
+	usage := &dto.Usage{
+		PromptTokens:     1000,
+		CompletionTokens: 600,
+		CompletionTokenDetails: dto.OutputTokenDetails{
+			AudioTokens: 100,
+			TextTokens:  500,
+		},
+	}
+	expr := `tier("base", p * 2 + c * 10)`
+	got := tieredQuota(expr, usage, false, 1.0)
+	// No ao → C=600 (audio stays in C) → (1000*2 + 600*10) * 0.5 = 4000
+	want := 4000.0
+	if math.Abs(got-want) > 0.01 {
+		t.Fatalf("quota = %f, want %f", got, want)
+	}
+}
+
+func TestBuildTieredTokenParams_ParityWithRatio(t *testing.T) {
+	// GPT-5.4 prices: input=$2.5, output=$15, cacheRead=$0.25
+	// Ratio equivalents: modelRatio=1.25, completionRatio=6, cacheRatio=0.1
+	usage := &dto.Usage{
+		PromptTokens:     10000,
+		CompletionTokens: 2000,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 3000,
+			TextTokens:   7000,
+		},
+	}
+	expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
+
+	for _, gr := range []float64{1.0, 1.5, 2.0, 0.5} {
+		tq := tieredQuota(expr, usage, false, gr)
+		rq := ratioQuota(usage, false, 1.25, 6, 0.1, 0, gr)
+
+		if math.Abs(tq-rq) > 0.01 {
+			t.Fatalf("groupRatio=%v: tiered=%f ratio=%f (mismatch)", gr, tq, rq)
+		}
+	}
+}
+
+func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
+	// gpt-image-1-mini prices: input=$2, output=$8, image=$2.5
+	// Ratio equivalents: modelRatio=1, completionRatio=4, imageRatio=1.25
+	usage := &dto.Usage{
+		PromptTokens:     5000,
+		CompletionTokens: 4000,
+		PromptTokensDetails: dto.InputTokenDetails{
+			ImageTokens: 1000,
+			TextTokens:  4000,
+		},
+	}
+	expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
+
+	tq := tieredQuota(expr, usage, false, 1.0)
+	rq := ratioQuota(usage, false, 1.0, 4, 0, 1.25, 1.0)
+
+	if math.Abs(tq-rq) > 0.01 {
+		t.Fatalf("tiered=%f ratio=%f (mismatch)", tq, rq)
+	}
+}