feat: add billing expression system documentation and enhance tiered billing logic

- Introduced a new rule for the Billing Expression System, emphasizing the importance of reading `pkg/billingexpr/expr.md` for dynamic billing. - Updated the billing expression logic to support new variables and improved handling of image and audio tokens. - Enhanced the tiered billing functionality with versioning support for expressions and refined quota calculations. - Added tests to validate the new billing expression features and ensure correctness in pricing calculations.
2026-03-17 15:29:43 +08:00
parent 5b03b39db2
commit c5405b2a12
27 changed files with 894 additions and 578 deletions
@@ -26,22 +26,26 @@ func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVa
 	cc1h := float64(usage.ClaudeCacheCreation1hTokens)
 	img := float64(usage.PromptTokensDetails.ImageTokens)
 	ai := float64(usage.PromptTokensDetails.AudioTokens)
+	imgO := float64(usage.CompletionTokenDetails.ImageTokens)
 	ao := float64(usage.CompletionTokenDetails.AudioTokens)

 	if !isClaudeUsageSemantic {
-		if usedVars["cr"] || usedVars["cache_read_tokens"] {
+		if usedVars["cr"] {
 			p -= cr
 		}
-		if usedVars["cc"] || usedVars["cc1h"] || usedVars["cache_create_tokens"] || usedVars["cache_create_1h_tokens"] {
+		if usedVars["cc"] || usedVars["cc1h"] {
 			p -= ccTotal
 		}
-		if usedVars["img"] || usedVars["image_tokens"] {
+		if usedVars["img"] {
 			p -= img
 		}
-		if usedVars["ai"] || usedVars["audio_input_tokens"] {
+		if usedVars["ai"] {
 			p -= ai
 		}
-		if usedVars["ao"] || usedVars["audio_output_tokens"] {
+		if usedVars["img_o"] {
+			c -= imgO
+		}
+		if usedVars["ao"] {
 			c -= ao
 		}
 	}
@@ -60,6 +64,7 @@ func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVa
 		CC:   ccTotal - cc1h,
 		CC1h: cc1h,
 		Img:  img,
+		ImgO: imgO,
 		AI:   ai,
 		AO:   ao,
 	}
@@ -2,11 +2,14 @@ package service

 import (
 	"math"
+	"math/rand"
+	"sync"
 	"testing"

 	"github.com/QuantumNous/new-api/dto"
 	"github.com/QuantumNous/new-api/pkg/billingexpr"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
+	"github.com/shopspring/decimal"
 )

 // Claude Sonnet-style tiered expression: standard vs long-context
@@ -420,20 +423,33 @@ func tieredQuota(exprStr string, usage *dto.Usage, isClaudeSemantic bool, groupR
 }

 func ratioQuota(usage *dto.Usage, isClaudeSemantic bool, modelRatio, completionRatio, cacheRatio, imageRatio, groupRatio float64) float64 {
-	baseTokens := float64(usage.PromptTokens)
-	cacheTokens := float64(usage.PromptTokensDetails.CachedTokens)
-	ccTokens := float64(usage.PromptTokensDetails.CachedCreationTokens)
-	imgTokens := float64(usage.PromptTokensDetails.ImageTokens)
+	dPromptTokens := decimal.NewFromInt(int64(usage.PromptTokens))
+	dCacheTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.CachedTokens))
+	dCcTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.CachedCreationTokens))
+	dImgTokens := decimal.NewFromInt(int64(usage.PromptTokensDetails.ImageTokens))
+	dCompletionTokens := decimal.NewFromInt(int64(usage.CompletionTokens))
+	dModelRatio := decimal.NewFromFloat(modelRatio)
+	dCompletionRatio := decimal.NewFromFloat(completionRatio)
+	dCacheRatio := decimal.NewFromFloat(cacheRatio)
+	dImageRatio := decimal.NewFromFloat(imageRatio)
+	dGroupRatio := decimal.NewFromFloat(groupRatio)

+	baseTokens := dPromptTokens
 	if !isClaudeSemantic {
-		baseTokens -= cacheTokens
-		baseTokens -= ccTokens
-		baseTokens -= imgTokens
+		baseTokens = baseTokens.Sub(dCacheTokens)
+		baseTokens = baseTokens.Sub(dCcTokens)
+		baseTokens = baseTokens.Sub(dImgTokens)
 	}

-	promptQuota := baseTokens + cacheTokens*cacheRatio + imgTokens*imageRatio
-	completionQuota := float64(usage.CompletionTokens) * completionRatio
-	return (promptQuota + completionQuota) * modelRatio * groupRatio
+	cachedTokensWithRatio := dCacheTokens.Mul(dCacheRatio)
+	imageTokensWithRatio := dImgTokens.Mul(dImageRatio)
+	promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio)
+	completionQuota := dCompletionTokens.Mul(dCompletionRatio)
+	ratio := dModelRatio.Mul(dGroupRatio)
+
+	result := promptQuota.Add(completionQuota).Mul(ratio)
+	f, _ := result.Float64()
+	return f
 }

 func TestBuildTieredTokenParams_GPT_WithCache(t *testing.T) {
@@ -587,3 +603,137 @@ func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
 		t.Fatalf("tiered=%f ratio=%f (mismatch)", tq, rq)
 	}
 }
+
+// ---------------------------------------------------------------------------
+// Stress test: 1000 concurrent goroutines, complex tiered expr vs ratio,
+// random token counts, verify correctness and measure performance
+// ---------------------------------------------------------------------------
+
+const complexTieredExpr = `p <= 200000 ? tier("standard", p * 3 + c * 15 + cr * 0.3 + cc * 3.75 + cc1h * 6 + img * 3 + img_o * 30 + ai * 10 + ao * 40) : tier("long_context", p * 6 + c * 22.5 + cr * 0.6 + cc * 7.5 + cc1h * 12 + img * 6 + img_o * 60 + ai * 20 + ao * 80)`
+
+func randomUsage(rng *rand.Rand) *dto.Usage {
+	cacheRead := int(rng.Float64() * 50000)
+	cacheCreate := int(rng.Float64() * 10000)
+	imgIn := int(rng.Float64() * 5000)
+	audioIn := int(rng.Float64() * 3000)
+	prompt := int(rng.Float64()*300000) + cacheRead + cacheCreate + imgIn + audioIn
+
+	imgOut := int(rng.Float64() * 2000)
+	audioOut := int(rng.Float64() * 1000)
+	completion := int(rng.Float64()*50000) + imgOut + audioOut
+
+	return &dto.Usage{
+		PromptTokens:     prompt,
+		CompletionTokens: completion,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens:         cacheRead,
+			CachedCreationTokens: cacheCreate,
+			ImageTokens:          imgIn,
+			AudioTokens:          audioIn,
+			TextTokens:           prompt - cacheRead - cacheCreate - imgIn - audioIn,
+		},
+		CompletionTokenDetails: dto.OutputTokenDetails{
+			ImageTokens: imgOut,
+			AudioTokens: audioOut,
+			TextTokens:  completion - imgOut - audioOut,
+		},
+	}
+}
+
+func TestStress_TieredBilling_1000Concurrent(t *testing.T) {
+	usedVars := billingexpr.UsedVars(complexTieredExpr)
+
+	var wg sync.WaitGroup
+	errCh := make(chan string, 1000)
+
+	for i := 0; i < 1000; i++ {
+		wg.Add(1)
+		go func(seed int64) {
+			defer wg.Done()
+			rng := rand.New(rand.NewSource(seed))
+
+			for j := 0; j < 100; j++ {
+				usage := randomUsage(rng)
+				groupRatio := 0.5 + rng.Float64()*2.0
+
+				params := BuildTieredTokenParams(usage, false, usedVars)
+				cost, trace, err := billingexpr.RunExpr(complexTieredExpr, params)
+				if err != nil {
+					errCh <- err.Error()
+					return
+				}
+				if cost < 0 {
+					errCh <- "negative cost"
+					return
+				}
+
+				quota := billingexpr.QuotaRound(cost / 1_000_000 * testQuotaPerUnit * groupRatio)
+				if quota < 0 {
+					errCh <- "negative quota"
+					return
+				}
+
+				_ = trace.MatchedTier
+			}
+		}(int64(i))
+	}
+
+	wg.Wait()
+	close(errCh)
+	for e := range errCh {
+		t.Fatal(e)
+	}
+}
+
+func BenchmarkTieredBilling_ComplexExpr(b *testing.B) {
+	rng := rand.New(rand.NewSource(42))
+	usedVars := billingexpr.UsedVars(complexTieredExpr)
+	usages := make([]*dto.Usage, 1000)
+	for i := range usages {
+		usages[i] = randomUsage(rng)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		usage := usages[i%len(usages)]
+		params := BuildTieredTokenParams(usage, false, usedVars)
+		billingexpr.RunExpr(complexTieredExpr, params)
+	}
+}
+
+func BenchmarkRatioBilling_Equivalent(b *testing.B) {
+	rng := rand.New(rand.NewSource(42))
+	usages := make([]*dto.Usage, 1000)
+	for i := range usages {
+		usages[i] = randomUsage(rng)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		usage := usages[i%len(usages)]
+		ratioQuota(usage, false, 1.5, 5.0, 0.1, 1.0, 1.5)
+	}
+}
+
+func BenchmarkTieredBilling_Parallel(b *testing.B) {
+	usedVars := billingexpr.UsedVars(complexTieredExpr)
+
+	b.RunParallel(func(pb *testing.PB) {
+		rng := rand.New(rand.NewSource(rand.Int63()))
+		for pb.Next() {
+			usage := randomUsage(rng)
+			params := BuildTieredTokenParams(usage, false, usedVars)
+			billingexpr.RunExpr(complexTieredExpr, params)
+		}
+	})
+}
+
+func BenchmarkRatioBilling_Parallel(b *testing.B) {
+	b.RunParallel(func(pb *testing.PB) {
+		rng := rand.New(rand.NewSource(rand.Int63()))
+		for pb.Next() {
+			usage := randomUsage(rng)
+			ratioQuota(usage, false, 1.5, 5.0, 0.1, 1.0, 1.5)
+		}
+	})
+}
@@ -0,0 +1,102 @@
+package service
+
+import (
+	"math"
+	"strings"
+
+	"github.com/QuantumNous/new-api/common"
+	"github.com/QuantumNous/new-api/setting/operation_setting"
+)
+
+// ToolCallUsage captures all tool call counts from a single request.
+type ToolCallUsage struct {
+	WebSearchCalls         int
+	WebSearchModelName     string
+	ClaudeWebSearchCalls   int
+	FileSearchCalls        int
+	ImageGenerationCall    bool
+	ImageGenerationQuality string
+	ImageGenerationSize    string
+}
+
+// ToolCallItem represents a single billed tool usage line.
+type ToolCallItem struct {
+	Name       string  `json:"name"`
+	CallCount  int     `json:"call_count"`
+	PricePer1K float64 `json:"price_per_1k"`
+	TotalPrice float64 `json:"total_price"`
+	Quota      int     `json:"quota"`
+}
+
+// ToolCallResult holds the aggregated tool call billing for a request.
+type ToolCallResult struct {
+	TotalQuota int            `json:"total_quota"`
+	Items      []ToolCallItem `json:"items,omitempty"`
+}
+
+func getWebSearchPriceKey(modelName string) string {
+	isNormalPrice :=
+		strings.HasPrefix(modelName, "o3") ||
+			strings.HasPrefix(modelName, "o4") ||
+			strings.HasPrefix(modelName, "gpt-5")
+	if isNormalPrice {
+		return "web_search"
+	}
+	return "web_search_high"
+}
+
+// ComputeToolCallQuota calculates the total quota for all tool calls in a
+// request. All tool prices are $/1K calls (configurable via ToolCallPrices
+// option). groupRatio is applied. Per-call billing (UsePrice) callers should
+// NOT add this result — per-call price already includes everything.
+func ComputeToolCallQuota(usage ToolCallUsage, groupRatio float64) ToolCallResult {
+	var items []ToolCallItem
+	totalQuota := 0
+
+	addItem := func(name string, count int, pricePer1K float64) {
+		if count <= 0 || pricePer1K <= 0 {
+			return
+		}
+		totalPrice := pricePer1K * float64(count) / 1000
+		quota := int(math.Round(totalPrice * common.QuotaPerUnit * groupRatio))
+		items = append(items, ToolCallItem{
+			Name:       name,
+			CallCount:  count,
+			PricePer1K: pricePer1K,
+			TotalPrice: totalPrice,
+			Quota:      quota,
+		})
+		totalQuota += quota
+	}
+
+	if usage.WebSearchCalls > 0 {
+		priceKey := getWebSearchPriceKey(usage.WebSearchModelName)
+		addItem("web_search", usage.WebSearchCalls, operation_setting.GetToolPrice(priceKey))
+	}
+
+	if usage.ClaudeWebSearchCalls > 0 {
+		addItem("claude_web_search", usage.ClaudeWebSearchCalls, operation_setting.GetToolPrice("claude_web_search"))
+	}
+
+	if usage.FileSearchCalls > 0 {
+		addItem("file_search", usage.FileSearchCalls, operation_setting.GetToolPrice("file_search"))
+	}
+
+	if usage.ImageGenerationCall {
+		price := operation_setting.GetGPTImage1PriceOnceCall(usage.ImageGenerationQuality, usage.ImageGenerationSize)
+		quota := int(math.Round(price * common.QuotaPerUnit * groupRatio))
+		items = append(items, ToolCallItem{
+			Name:       "image_generation",
+			CallCount:  1,
+			PricePer1K: price * 1000,
+			TotalPrice: price,
+			Quota:      quota,
+		})
+		totalQuota += quota
+	}
+
+	return ToolCallResult{
+		TotalQuota: totalQuota,
+		Items:      items,
+	}
+}