feat(default): add model performance badges
Add a batched performance summary API for model square cards and show compact latency, throughput, and status metrics without increasing card size. Also fix OTP verification form submission.
This commit is contained in:
@@ -3,6 +3,7 @@ package perfmetrics
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -121,6 +122,77 @@ func Query(params QueryParams) (QueryResult, error) {
|
||||
return buildQueryResult(params.Model, merged), nil
|
||||
}
|
||||
|
||||
func QuerySummaryAll(hours int) (SummaryAllResult, error) {
|
||||
if hours <= 0 {
|
||||
hours = 24
|
||||
}
|
||||
if hours > 24*30 {
|
||||
hours = 24 * 30
|
||||
}
|
||||
endTs := time.Now().Unix()
|
||||
startTs := endTs - int64(hours)*3600
|
||||
|
||||
rows, err := model.GetPerfMetricsSummaryAll(startTs, endTs)
|
||||
if err != nil {
|
||||
return SummaryAllResult{}, err
|
||||
}
|
||||
|
||||
totals := map[string]counters{}
|
||||
for _, row := range rows {
|
||||
totals[row.ModelName] = counters{
|
||||
requestCount: row.RequestCount,
|
||||
successCount: row.SuccessCount,
|
||||
totalLatencyMs: row.TotalLatencyMs,
|
||||
outputTokens: row.OutputTokens,
|
||||
generationMs: row.GenerationMs,
|
||||
}
|
||||
}
|
||||
|
||||
hotBuckets.Range(func(key, value any) bool {
|
||||
k := key.(bucketKey)
|
||||
if k.bucketTs < startTs || k.bucketTs > endTs {
|
||||
return true
|
||||
}
|
||||
snap := value.(*atomicBucket).snapshot()
|
||||
if snap.requestCount == 0 {
|
||||
return true
|
||||
}
|
||||
cur := totals[k.model]
|
||||
cur.requestCount += snap.requestCount
|
||||
cur.successCount += snap.successCount
|
||||
cur.totalLatencyMs += snap.totalLatencyMs
|
||||
cur.outputTokens += snap.outputTokens
|
||||
cur.generationMs += snap.generationMs
|
||||
totals[k.model] = cur
|
||||
return true
|
||||
})
|
||||
|
||||
models := make([]ModelSummary, 0, len(totals))
|
||||
for name, total := range totals {
|
||||
if total.requestCount == 0 {
|
||||
continue
|
||||
}
|
||||
avgLatency := total.totalLatencyMs / total.requestCount
|
||||
successRate := float64(total.successCount) / float64(total.requestCount) * 100
|
||||
avgTps := 0.0
|
||||
if total.generationMs > 0 {
|
||||
avgTps = float64(total.outputTokens) / (float64(total.generationMs) / 1000.0)
|
||||
}
|
||||
models = append(models, ModelSummary{
|
||||
ModelName: name,
|
||||
AvgLatencyMs: avgLatency,
|
||||
SuccessRate: math.Round(successRate*100) / 100,
|
||||
AvgTps: math.Round(avgTps*100) / 100,
|
||||
RequestCount: total.requestCount,
|
||||
})
|
||||
}
|
||||
sort.Slice(models, func(i, j int) bool {
|
||||
return models[i].ModelName < models[j].ModelName
|
||||
})
|
||||
|
||||
return SummaryAllResult{Models: models}, nil
|
||||
}
|
||||
|
||||
func bucketStart(ts int64) int64 {
|
||||
bucketSeconds := perf_metrics_setting.GetBucketSeconds()
|
||||
if bucketSeconds <= 0 {
|
||||
|
||||
@@ -47,6 +47,18 @@ type QueryResult struct {
|
||||
Groups []GroupResult `json:"groups"`
|
||||
}
|
||||
|
||||
type ModelSummary struct {
|
||||
ModelName string `json:"model_name"`
|
||||
AvgLatencyMs int64 `json:"avg_latency_ms"`
|
||||
SuccessRate float64 `json:"success_rate"`
|
||||
AvgTps float64 `json:"avg_tps"`
|
||||
RequestCount int64 `json:"request_count"`
|
||||
}
|
||||
|
||||
type SummaryAllResult struct {
|
||||
Models []ModelSummary `json:"models"`
|
||||
}
|
||||
|
||||
type bucketKey struct {
|
||||
model string
|
||||
group string
|
||||
|
||||
Reference in New Issue
Block a user