feat(default): add model performance badges

Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
This commit is contained in:
CaIon
2026-05-06 22:20:43 +08:00
parent d98f0e8ac3
commit e8cfb546fa
16 changed files with 316 additions and 34 deletions
+72
View File
@@ -3,6 +3,7 @@ package perfmetrics
import (
"context"
"fmt"
"math"
"sort"
"sync"
"time"
@@ -121,6 +122,77 @@ func Query(params QueryParams) (QueryResult, error) {
return buildQueryResult(params.Model, merged), nil
}
func QuerySummaryAll(hours int) (SummaryAllResult, error) {
if hours <= 0 {
hours = 24
}
if hours > 24*30 {
hours = 24 * 30
}
endTs := time.Now().Unix()
startTs := endTs - int64(hours)*3600
rows, err := model.GetPerfMetricsSummaryAll(startTs, endTs)
if err != nil {
return SummaryAllResult{}, err
}
totals := map[string]counters{}
for _, row := range rows {
totals[row.ModelName] = counters{
requestCount: row.RequestCount,
successCount: row.SuccessCount,
totalLatencyMs: row.TotalLatencyMs,
outputTokens: row.OutputTokens,
generationMs: row.GenerationMs,
}
}
hotBuckets.Range(func(key, value any) bool {
k := key.(bucketKey)
if k.bucketTs < startTs || k.bucketTs > endTs {
return true
}
snap := value.(*atomicBucket).snapshot()
if snap.requestCount == 0 {
return true
}
cur := totals[k.model]
cur.requestCount += snap.requestCount
cur.successCount += snap.successCount
cur.totalLatencyMs += snap.totalLatencyMs
cur.outputTokens += snap.outputTokens
cur.generationMs += snap.generationMs
totals[k.model] = cur
return true
})
models := make([]ModelSummary, 0, len(totals))
for name, total := range totals {
if total.requestCount == 0 {
continue
}
avgLatency := total.totalLatencyMs / total.requestCount
successRate := float64(total.successCount) / float64(total.requestCount) * 100
avgTps := 0.0
if total.generationMs > 0 {
avgTps = float64(total.outputTokens) / (float64(total.generationMs) / 1000.0)
}
models = append(models, ModelSummary{
ModelName: name,
AvgLatencyMs: avgLatency,
SuccessRate: math.Round(successRate*100) / 100,
AvgTps: math.Round(avgTps*100) / 100,
RequestCount: total.requestCount,
})
}
sort.Slice(models, func(i, j int) bool {
return models[i].ModelName < models[j].ModelName
})
return SummaryAllResult{Models: models}, nil
}
func bucketStart(ts int64) int64 {
bucketSeconds := perf_metrics_setting.GetBucketSeconds()
if bucketSeconds <= 0 {
+12
View File
@@ -47,6 +47,18 @@ type QueryResult struct {
Groups []GroupResult `json:"groups"`
}
type ModelSummary struct {
ModelName string `json:"model_name"`
AvgLatencyMs int64 `json:"avg_latency_ms"`
SuccessRate float64 `json:"success_rate"`
AvgTps float64 `json:"avg_tps"`
RequestCount int64 `json:"request_count"`
}
type SummaryAllResult struct {
Models []ModelSummary `json:"models"`
}
type bucketKey struct {
model string
group string