feat: enhance tiered billing logic and improve variable handling in pricing calculations

This commit is contained in:
CaIon
2026-03-16 22:00:36 +08:00
parent f6c0852da9
commit 5b03b39db2
10 changed files with 431 additions and 240 deletions
+10 -15
View File
@@ -256,14 +256,12 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
}
var tieredUsedVars map[string]bool
if snap := relayInfo.TieredBillingSnapshot; snap != nil {
tieredUsedVars = billingexpr.UsedVars(snap.ExprString)
}
var tieredResult *billingexpr.TieredResult
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
P: float64(usage.PromptTokens),
C: float64(usage.CompletionTokens),
CR: float64(usage.PromptTokensDetails.CachedTokens),
CC: float64(usage.PromptTokensDetails.CachedCreationTokens - usage.ClaudeCacheCreation1hTokens),
CC1h: float64(usage.ClaudeCacheCreation1hTokens),
})
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, BuildTieredTokenParams(usage, true, tieredUsedVars))
if tieredOk {
tieredResult = tieredRes
}
@@ -394,14 +392,12 @@ func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData)
func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent string) {
var tieredUsedVars map[string]bool
if snap := relayInfo.TieredBillingSnapshot; snap != nil {
tieredUsedVars = billingexpr.UsedVars(snap.ExprString)
}
var tieredResult *billingexpr.TieredResult
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, billingexpr.TokenParams{
P: float64(usage.PromptTokens),
C: float64(usage.CompletionTokens),
CR: float64(usage.PromptTokensDetails.CachedTokens),
AI: float64(usage.PromptTokensDetails.AudioTokens),
AO: float64(usage.CompletionTokenDetails.AudioTokens),
})
tieredOk, tieredQuota, tieredRes := TryTieredSettle(relayInfo, BuildTieredTokenParams(usage, false, tieredUsedVars))
if tieredOk {
tieredResult = tieredRes
}
@@ -659,4 +655,3 @@ func checkAndSendSubscriptionQuotaNotify(relayInfo *relaycommon.RelayInfo) {
}
})
}
+57
View File
@@ -1,6 +1,7 @@
package service
import (
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/billingexpr"
relaycommon "github.com/QuantumNous/new-api/relay/common"
)
@@ -8,6 +9,62 @@ import (
// TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
type TieredResultWrapper = billingexpr.TieredResult
// BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
// normalizing P and C so they mean "tokens not separately priced by the
// expression". Sub-categories (cache, image, audio) are only subtracted
// when the expression references them via their own variable.
//
// GPT-format APIs report prompt_tokens / completion_tokens as totals that
// include all sub-categories (cache, image, audio). Claude-format APIs
// report them as text-only. This function normalizes to text-only when
// sub-categories are separately priced.
func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
p := float64(usage.PromptTokens)
c := float64(usage.CompletionTokens)
cr := float64(usage.PromptTokensDetails.CachedTokens)
ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
cc1h := float64(usage.ClaudeCacheCreation1hTokens)
img := float64(usage.PromptTokensDetails.ImageTokens)
ai := float64(usage.PromptTokensDetails.AudioTokens)
ao := float64(usage.CompletionTokenDetails.AudioTokens)
if !isClaudeUsageSemantic {
if usedVars["cr"] || usedVars["cache_read_tokens"] {
p -= cr
}
if usedVars["cc"] || usedVars["cc1h"] || usedVars["cache_create_tokens"] || usedVars["cache_create_1h_tokens"] {
p -= ccTotal
}
if usedVars["img"] || usedVars["image_tokens"] {
p -= img
}
if usedVars["ai"] || usedVars["audio_input_tokens"] {
p -= ai
}
if usedVars["ao"] || usedVars["audio_output_tokens"] {
c -= ao
}
}
if p < 0 {
p = 0
}
if c < 0 {
c = 0
}
return billingexpr.TokenParams{
P: p,
C: c,
CR: cr,
CC: ccTotal - cc1h,
CC1h: cc1h,
Img: img,
AI: ai,
AO: ao,
}
}
// TryTieredSettle checks if the request uses tiered_expr billing and, if so,
// computes the actual quota using the frozen BillingSnapshot. Returns:
// - ok=true, quota, result when tiered billing applies
+182
View File
@@ -1,8 +1,10 @@
package service
import (
"math"
"testing"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/billingexpr"
relaycommon "github.com/QuantumNous/new-api/relay/common"
)
@@ -405,3 +407,183 @@ func TestTryTieredSettle_ErrorFallbackToEstimatedQuotaAfterGroup(t *testing.T) {
t.Fatal("result should be nil on error fallback")
}
}
// ---------------------------------------------------------------------------
// BuildTieredTokenParams: token normalization and ratio parity tests
// ---------------------------------------------------------------------------
func tieredQuota(exprStr string, usage *dto.Usage, isClaudeSemantic bool, groupRatio float64) float64 {
usedVars := billingexpr.UsedVars(exprStr)
params := BuildTieredTokenParams(usage, isClaudeSemantic, usedVars)
cost, _, _ := billingexpr.RunExpr(exprStr, params)
return cost / 1_000_000 * testQuotaPerUnit * groupRatio
}
func ratioQuota(usage *dto.Usage, isClaudeSemantic bool, modelRatio, completionRatio, cacheRatio, imageRatio, groupRatio float64) float64 {
baseTokens := float64(usage.PromptTokens)
cacheTokens := float64(usage.PromptTokensDetails.CachedTokens)
ccTokens := float64(usage.PromptTokensDetails.CachedCreationTokens)
imgTokens := float64(usage.PromptTokensDetails.ImageTokens)
if !isClaudeSemantic {
baseTokens -= cacheTokens
baseTokens -= ccTokens
baseTokens -= imgTokens
}
promptQuota := baseTokens + cacheTokens*cacheRatio + imgTokens*imageRatio
completionQuota := float64(usage.CompletionTokens) * completionRatio
return (promptQuota + completionQuota) * modelRatio * groupRatio
}
func TestBuildTieredTokenParams_GPT_WithCache(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 500,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 200,
TextTokens: 800,
},
}
expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
got := tieredQuota(expr, usage, false, 1.0)
// P=800, C=500, CR=200 → (800*2.5 + 500*15 + 200*0.25) * 0.5 = 4775
want := 4775.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_GPT_NoCacheVar(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 500,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 200,
TextTokens: 800,
},
}
expr := `tier("base", p * 2.5 + c * 15)`
got := tieredQuota(expr, usage, false, 1.0)
// No cr → P=1000 (cache stays in P), C=500 → (1000*2.5 + 500*15) * 0.5 = 5000
want := 5000.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_GPT_WithImage(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 500,
PromptTokensDetails: dto.InputTokenDetails{
ImageTokens: 200,
TextTokens: 800,
},
}
expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
got := tieredQuota(expr, usage, false, 1.0)
// P=800, C=500, Img=200 → (800*2 + 500*8 + 200*2.5) * 0.5 = 3050
want := 3050.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_Claude_WithCache(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 800,
CompletionTokens: 500,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 200,
TextTokens: 800,
},
}
expr := `tier("base", p * 3 + c * 15 + cr * 0.3)`
got := tieredQuota(expr, usage, true, 1.0)
// Claude: P=800 (no subtraction), C=500, CR=200 → (800*3 + 500*15 + 200*0.3) * 0.5 = 4980
want := 4980.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_GPT_AudioOutput(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 600,
CompletionTokenDetails: dto.OutputTokenDetails{
AudioTokens: 100,
TextTokens: 500,
},
}
expr := `tier("base", p * 2 + c * 10 + ao * 50)`
got := tieredQuota(expr, usage, false, 1.0)
// C=600-100=500, AO=100 → (1000*2 + 500*10 + 100*50) * 0.5 = 6000
want := 6000.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_GPT_AudioOutputNoVar(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 600,
CompletionTokenDetails: dto.OutputTokenDetails{
AudioTokens: 100,
TextTokens: 500,
},
}
expr := `tier("base", p * 2 + c * 10)`
got := tieredQuota(expr, usage, false, 1.0)
// No ao → C=600 (audio stays in C) → (1000*2 + 600*10) * 0.5 = 4000
want := 4000.0
if math.Abs(got-want) > 0.01 {
t.Fatalf("quota = %f, want %f", got, want)
}
}
func TestBuildTieredTokenParams_ParityWithRatio(t *testing.T) {
// GPT-5.4 prices: input=$2.5, output=$15, cacheRead=$0.25
// Ratio equivalents: modelRatio=1.25, completionRatio=6, cacheRatio=0.1
usage := &dto.Usage{
PromptTokens: 10000,
CompletionTokens: 2000,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 3000,
TextTokens: 7000,
},
}
expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
for _, gr := range []float64{1.0, 1.5, 2.0, 0.5} {
tq := tieredQuota(expr, usage, false, gr)
rq := ratioQuota(usage, false, 1.25, 6, 0.1, 0, gr)
if math.Abs(tq-rq) > 0.01 {
t.Fatalf("groupRatio=%v: tiered=%f ratio=%f (mismatch)", gr, tq, rq)
}
}
}
func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
// gpt-image-1-mini prices: input=$2, output=$8, image=$2.5
// Ratio equivalents: modelRatio=1, completionRatio=4, imageRatio=1.25
usage := &dto.Usage{
PromptTokens: 5000,
CompletionTokens: 4000,
PromptTokensDetails: dto.InputTokenDetails{
ImageTokens: 1000,
TextTokens: 4000,
},
}
expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
tq := tieredQuota(expr, usage, false, 1.0)
rq := ratioQuota(usage, false, 1.0, 4, 0, 1.25, 1.0)
if math.Abs(tq-rq) > 0.01 {
t.Fatalf("tiered=%f ratio=%f (mismatch)", tq, rq)
}
}