feat(default): add model performance badges

Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
2026-05-06 22:20:43 +08:00
parent d98f0e8ac3
commit e8cfb546fa
16 changed files with 316 additions and 34 deletions
@@ -3,6 +3,7 @@ package perfmetrics
 import (
 	"context"
 	"fmt"
+	"math"
 	"sort"
 	"sync"
 	"time"
@@ -121,6 +122,77 @@ func Query(params QueryParams) (QueryResult, error) {
 	return buildQueryResult(params.Model, merged), nil
 }

+func QuerySummaryAll(hours int) (SummaryAllResult, error) {
+	if hours <= 0 {
+		hours = 24
+	}
+	if hours > 24*30 {
+		hours = 24 * 30
+	}
+	endTs := time.Now().Unix()
+	startTs := endTs - int64(hours)*3600
+
+	rows, err := model.GetPerfMetricsSummaryAll(startTs, endTs)
+	if err != nil {
+		return SummaryAllResult{}, err
+	}
+
+	totals := map[string]counters{}
+	for _, row := range rows {
+		totals[row.ModelName] = counters{
+			requestCount:   row.RequestCount,
+			successCount:   row.SuccessCount,
+			totalLatencyMs: row.TotalLatencyMs,
+			outputTokens:   row.OutputTokens,
+			generationMs:   row.GenerationMs,
+		}
+	}
+
+	hotBuckets.Range(func(key, value any) bool {
+		k := key.(bucketKey)
+		if k.bucketTs < startTs || k.bucketTs > endTs {
+			return true
+		}
+		snap := value.(*atomicBucket).snapshot()
+		if snap.requestCount == 0 {
+			return true
+		}
+		cur := totals[k.model]
+		cur.requestCount += snap.requestCount
+		cur.successCount += snap.successCount
+		cur.totalLatencyMs += snap.totalLatencyMs
+		cur.outputTokens += snap.outputTokens
+		cur.generationMs += snap.generationMs
+		totals[k.model] = cur
+		return true
+	})
+
+	models := make([]ModelSummary, 0, len(totals))
+	for name, total := range totals {
+		if total.requestCount == 0 {
+			continue
+		}
+		avgLatency := total.totalLatencyMs / total.requestCount
+		successRate := float64(total.successCount) / float64(total.requestCount) * 100
+		avgTps := 0.0
+		if total.generationMs > 0 {
+			avgTps = float64(total.outputTokens) / (float64(total.generationMs) / 1000.0)
+		}
+		models = append(models, ModelSummary{
+			ModelName:    name,
+			AvgLatencyMs: avgLatency,
+			SuccessRate:  math.Round(successRate*100) / 100,
+			AvgTps:       math.Round(avgTps*100) / 100,
+			RequestCount: total.requestCount,
+		})
+	}
+	sort.Slice(models, func(i, j int) bool {
+		return models[i].ModelName < models[j].ModelName
+	})
+
+	return SummaryAllResult{Models: models}, nil
+}
+
 func bucketStart(ts int64) int64 {
 	bucketSeconds := perf_metrics_setting.GetBucketSeconds()
 	if bucketSeconds <= 0 {
@@ -47,6 +47,18 @@ type QueryResult struct {
 	Groups       []GroupResult `json:"groups"`
 }

+type ModelSummary struct {
+	ModelName    string  `json:"model_name"`
+	AvgLatencyMs int64   `json:"avg_latency_ms"`
+	SuccessRate  float64 `json:"success_rate"`
+	AvgTps       float64 `json:"avg_tps"`
+	RequestCount int64   `json:"request_count"`
+}
+
+type SummaryAllResult struct {
+	Models []ModelSummary `json:"models"`
+}
+
 type bucketKey struct {
 	model    string
 	group    string