feat: Add model performance metrics to dashboard

Add a shared `performance-metrics` feature module for perf metric APIs, DTOs, and formatting, then surface global 24h model performance on the dashboard with cards and a top-model table.

Reuse the shared metrics module from pricing model details, remove duplicated perf API/formatting code from pricing, and add localized labels for the new dashboard performance UI.
This commit is contained in:
t0ng7u
2026-05-08 01:06:44 +08:00
parent a7475a1e67
commit c19d5aa663
19 changed files with 471 additions and 113 deletions
@@ -0,0 +1,298 @@
import { useMemo } from 'react'
import { useQuery } from '@tanstack/react-query'
import { Activity, Gauge, HeartPulse, Timer } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import { formatNumber } from '@/lib/format'
import { cn } from '@/lib/utils'
import { Skeleton } from '@/components/ui/skeleton'
import {
Table,
TableBody,
TableCell,
TableHead,
TableHeader,
TableRow,
} from '@/components/ui/table'
import { getPerfMetricsSummary } from '@/features/performance-metrics/api'
import {
formatLatency,
formatThroughput,
formatUptimePct,
} from '@/features/performance-metrics/lib/format'
import type { PerfModelSummary } from '@/features/performance-metrics/types'
const PERFORMANCE_WINDOW_HOURS = 24
const TOP_MODEL_LIMIT = 8
type WeightedMetric = 'avg_latency_ms' | 'avg_tps' | 'success_rate'
type PerformanceSummary = {
totalRequests: number
avgLatencyMs: number
avgTps: number
successRate: number
}
function weightedAverage(
rows: PerfModelSummary[],
metric: WeightedMetric,
isValid: (value: number) => boolean
): number {
let total = 0
let weight = 0
for (const row of rows) {
const value = Number(row[metric])
const requestCount = Number(row.request_count) || 0
if (requestCount <= 0 || !isValid(value)) continue
total += value * requestCount
weight += requestCount
}
return weight > 0 ? total / weight : 0
}
function buildPerformanceSummary(rows: PerfModelSummary[]): PerformanceSummary {
const totalRequests = rows.reduce(
(sum, row) => sum + (Number(row.request_count) || 0),
0
)
return {
totalRequests,
avgLatencyMs: Math.round(
weightedAverage(
rows,
'avg_latency_ms',
(value) => Number.isFinite(value) && value > 0
)
),
avgTps: weightedAverage(
rows,
'avg_tps',
(value) => Number.isFinite(value) && value > 0
),
successRate: weightedAverage(rows, 'success_rate', Number.isFinite),
}
}
function successRateClassName(successRate: number): string {
if (successRate >= 99.9) return 'text-emerald-600 dark:text-emerald-400'
if (successRate >= 99) return 'text-amber-600 dark:text-amber-400'
return 'text-rose-600 dark:text-rose-400'
}
function successDotClassName(successRate: number): string {
if (successRate >= 99.9) return 'bg-emerald-500'
if (successRate >= 99) return 'bg-amber-500'
return 'bg-rose-500'
}
function PerformanceMetricItem(props: {
icon: React.ComponentType<{ className?: string }>
label: string
value: string
hint: string
loading?: boolean
valueClassName?: string
}) {
const Icon = props.icon
return (
<div className='px-3 py-2.5 sm:px-5 sm:py-4'>
<div className='flex items-center gap-2'>
<Icon
className='text-muted-foreground/60 size-3.5 shrink-0'
aria-hidden='true'
/>
<div className='text-muted-foreground truncate text-xs font-medium tracking-wider uppercase'>
{props.label}
</div>
</div>
{props.loading ? (
<div className='mt-2 space-y-1.5'>
<Skeleton className='h-7 w-20' />
<Skeleton className='h-3.5 w-28' />
</div>
) : (
<>
<div
className={cn(
'text-foreground mt-1.5 font-mono text-lg font-bold tracking-tight tabular-nums sm:mt-2 sm:text-2xl',
props.valueClassName
)}
>
{props.value}
</div>
<div className='text-muted-foreground/60 mt-1 hidden text-xs md:block'>
{props.hint}
</div>
</>
)}
</div>
)
}
function PerformanceTableHeader(props: { description: string }) {
const { t } = useTranslation()
return (
<div className='flex flex-col gap-1.5 border-b px-3 py-2 sm:px-5 sm:py-3 lg:flex-row lg:items-center lg:justify-between'>
<div className='flex items-center gap-2'>
<Activity className='text-muted-foreground/60 size-4' />
<div className='text-sm font-semibold'>
{t('Model performance metrics')}
</div>
</div>
<span className='text-muted-foreground text-xs'>{props.description}</span>
</div>
)
}
export function PerformanceOverview() {
const { t } = useTranslation()
const metricsQuery = useQuery({
queryKey: ['perf-metrics-summary', PERFORMANCE_WINDOW_HOURS],
queryFn: () => getPerfMetricsSummary(PERFORMANCE_WINDOW_HOURS),
staleTime: 60 * 1000,
retry: false,
})
const models = useMemo(
() =>
[...(metricsQuery.data?.data.models ?? [])]
.filter((model) => Number(model.request_count) > 0)
.sort((a, b) => b.request_count - a.request_count),
[metricsQuery.data]
)
const summary = useMemo(() => buildPerformanceSummary(models), [models])
const topModels = useMemo(() => models.slice(0, TOP_MODEL_LIMIT), [models])
const loading = metricsQuery.isLoading
const hasData = models.length > 0
const description = t('Performance metrics for the last 24 hours')
return (
<section className='space-y-3 sm:space-y-4'>
<div className='overflow-hidden rounded-lg border'>
<div className='divide-border/60 grid grid-cols-2 divide-x sm:grid-cols-4'>
<PerformanceMetricItem
icon={Activity}
label={t('Requests (24h)')}
value={formatNumber(summary.totalRequests)}
hint={t('Monitored relay requests')}
loading={loading}
/>
<PerformanceMetricItem
icon={Timer}
label={t('Average latency')}
value={formatLatency(summary.avgLatencyMs)}
hint={t('Weighted by request count')}
loading={loading}
/>
<PerformanceMetricItem
icon={Gauge}
label={t('Throughput')}
value={formatThroughput(summary.avgTps)}
hint='TPS'
loading={loading}
/>
<PerformanceMetricItem
icon={HeartPulse}
label={t('Success rate')}
value={formatUptimePct(summary.successRate)}
hint={t('Weighted by request count')}
loading={loading}
valueClassName={successRateClassName(summary.successRate)}
/>
</div>
</div>
<div className='overflow-hidden rounded-lg border'>
<PerformanceTableHeader description={description} />
{!loading && !hasData ? (
<div className='text-muted-foreground p-6 text-center text-sm'>
{t('No performance data available')}
</div>
) : (
<div className='overflow-x-auto'>
<Table className='text-sm'>
<TableHeader>
<TableRow className='hover:bg-transparent'>
<TableHead>{t('Model')}</TableHead>
<TableHead className='text-right'>
{t('Requests (24h)')}
</TableHead>
<TableHead className='text-right'>
{t('Average latency')}
</TableHead>
<TableHead className='text-right'>
{t('Throughput')}
</TableHead>
<TableHead className='text-right'>
{t('Success rate')}
</TableHead>
</TableRow>
</TableHeader>
<TableBody>
{loading
? Array.from({ length: 4 }).map((_, index) => (
<TableRow key={index}>
<TableCell>
<Skeleton className='h-4 w-40' />
</TableCell>
<TableCell className='text-right'>
<Skeleton className='ml-auto h-4 w-16' />
</TableCell>
<TableCell className='text-right'>
<Skeleton className='ml-auto h-4 w-16' />
</TableCell>
<TableCell className='text-right'>
<Skeleton className='ml-auto h-4 w-16' />
</TableCell>
<TableCell className='text-right'>
<Skeleton className='ml-auto h-4 w-20' />
</TableCell>
</TableRow>
))
: topModels.map((model) => (
<TableRow key={model.model_name}>
<TableCell className='max-w-[220px] truncate font-mono'>
{model.model_name}
</TableCell>
<TableCell className='text-right font-mono tabular-nums'>
{formatNumber(model.request_count)}
</TableCell>
<TableCell className='text-right font-mono tabular-nums'>
{formatLatency(model.avg_latency_ms)}
</TableCell>
<TableCell className='text-right font-mono tabular-nums'>
{formatThroughput(model.avg_tps)}
</TableCell>
<TableCell
className={cn(
'text-right font-mono font-semibold tabular-nums',
successRateClassName(model.success_rate)
)}
>
<span className='inline-flex items-center justify-end gap-1.5'>
<span
className={cn(
'size-2 rounded-full',
successDotClassName(model.success_rate)
)}
aria-hidden='true'
/>
{formatUptimePct(model.success_rate)}
</span>
</TableCell>
</TableRow>
))}
</TableBody>
</Table>
</div>
)}
</div>
</section>
)
}
+37 -1
View File
@@ -47,6 +47,12 @@ const LazyConsumptionDistributionChart = lazy(() =>
}))
)
const LazyPerformanceOverview = lazy(() =>
import('./components/models/performance-overview').then((m) => ({
default: m.PerformanceOverview,
}))
)
const LazyUserCharts = lazy(() =>
import('./components/users/user-charts').then((m) => ({
default: m.UserCharts,
@@ -83,6 +89,31 @@ function ModelChartsFallback() {
)
}
function PerformanceOverviewFallback() {
return (
<div className='space-y-3 sm:space-y-4'>
<div className='overflow-hidden rounded-lg border'>
<div className='divide-border/60 grid grid-cols-2 divide-x sm:grid-cols-4'>
{Array.from({ length: 4 }).map((_, i) => (
<div key={i} className='px-3 py-2.5 sm:px-5 sm:py-4'>
<Skeleton className='h-4 w-24' />
<Skeleton className='mt-2 h-7 w-20' />
<Skeleton className='mt-1.5 h-3.5 w-28' />
</div>
))}
</div>
</div>
<div className='overflow-hidden rounded-lg border'>
<div className='flex items-center justify-between border-b px-4 py-3 sm:px-5'>
<Skeleton className='h-5 w-40' />
<Skeleton className='h-4 w-48' />
</div>
<Skeleton className='h-44 w-full' />
</div>
</div>
)
}
const SECTION_META: Record<
DashboardSectionId,
{ titleKey: string; descriptionKey: string }
@@ -219,6 +250,11 @@ export function Dashboard() {
</Suspense>
</FadeIn>
<FadeIn delay={0.1}>
<Suspense fallback={<PerformanceOverviewFallback />}>
<LazyPerformanceOverview />
</Suspense>
</FadeIn>
<FadeIn delay={0.15}>
<Suspense fallback={<ModelChartsFallback />}>
<LazyConsumptionDistributionChart
data={modelData}
@@ -232,7 +268,7 @@ export function Dashboard() {
/>
</Suspense>
</FadeIn>
<FadeIn delay={0.15}>
<FadeIn delay={0.2}>
<Suspense fallback={<ModelChartsFallback />}>
<LazyModelCharts
data={modelData}
+24
View File
@@ -0,0 +1,24 @@
import { api } from '@/lib/api'
import type { PerformanceMetricsData, PerfSummaryAllData } from './types'
export async function getPerfMetricsSummary(
hours = 24
): Promise<PerfSummaryAllData> {
const res = await api.get<PerfSummaryAllData>('/api/perf-metrics/summary', {
params: { hours },
})
return res.data
}
export async function getPerfMetrics(
modelName: string,
hours = 24
): Promise<PerformanceMetricsData> {
const res = await api.get<PerformanceMetricsData>('/api/perf-metrics', {
params: {
model: modelName,
hours,
},
})
return res.data
}
@@ -0,0 +1,16 @@
export function formatThroughput(tps: number): string {
if (tps <= 0) return '—'
if (tps >= 1_000) return `${(tps / 1_000).toFixed(1)}K t/s`
return `${tps.toFixed(tps < 10 ? 2 : 1)} t/s`
}
export function formatLatency(ms: number): string {
if (!Number.isFinite(ms) || ms <= 0) return '—'
if (ms >= 1_000) return `${(ms / 1_000).toFixed(2)}s`
return `${Math.round(ms)}ms`
}
export function formatUptimePct(pct: number): string {
if (!Number.isFinite(pct)) return '—'
return `${pct.toFixed(2)}%`
}
+42
View File
@@ -0,0 +1,42 @@
export type PerformanceSeriesPoint = {
ts: number
avg_ttft_ms: number
avg_latency_ms: number
success_rate: number
avg_tps: number
}
export type PerformanceGroup = {
group: string
avg_ttft_ms: number
avg_latency_ms: number
success_rate: number
avg_tps: number
series: PerformanceSeriesPoint[]
}
export type PerformanceMetricsData = {
success: boolean
message?: string
data: {
model_name: string
series_schema?: string
groups: PerformanceGroup[]
}
}
export type PerfModelSummary = {
model_name: string
avg_latency_ms: number
success_rate: number
avg_tps: number
request_count: number
}
export type PerfSummaryAllData = {
success: boolean
message?: string
data: {
models: PerfModelSummary[]
}
}
-62
View File
@@ -10,65 +10,3 @@ export async function getPricing(): Promise<PricingData> {
const res = await api.get('/api/pricing')
return res.data
}
export type PerformanceSeriesPoint = {
ts: number
avg_ttft_ms: number
avg_latency_ms: number
success_rate: number
avg_tps: number
}
export type PerformanceGroup = {
group: string
avg_ttft_ms: number
avg_latency_ms: number
success_rate: number
avg_tps: number
series: PerformanceSeriesPoint[]
}
export type PerformanceMetricsData = {
success: boolean
message?: string
data: {
model_name: string
series_schema?: string
groups: PerformanceGroup[]
}
}
export type PerfModelSummary = {
model_name: string
avg_latency_ms: number
success_rate: number
avg_tps: number
request_count: number
}
export type PerfSummaryAllData = {
success: boolean
message?: string
data: {
models: PerfModelSummary[]
}
}
export async function getPerfMetricsSummary(
hours = 24
): Promise<PerfSummaryAllData> {
const res = await api.get(`/api/perf-metrics/summary?hours=${hours}`)
return res.data
}
export async function getPerfMetrics(
modelName: string,
hours = 24
): Promise<PerformanceMetricsData> {
const params = new URLSearchParams({
model: modelName,
hours: String(hours),
})
const res = await api.get(`/api/perf-metrics?${params.toString()}`)
return res.data
}
@@ -1,9 +1,9 @@
import { useEffect, useMemo, useState } from 'react'
import { useMemo, useState } from 'react'
import { useQuery } from '@tanstack/react-query'
import { ChevronLeft, ChevronRight } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import { Button } from '@/components/ui/button'
import { getPerfMetricsSummary } from '../api'
import { getPerfMetricsSummary } from '@/features/performance-metrics/api'
import { DEFAULT_PRICING_PAGE_SIZE, DEFAULT_TOKEN_UNIT } from '../constants'
import type { PricingModel, TokenUnit } from '../types'
import { ModelCard } from './model-card'
@@ -24,22 +24,19 @@ export function ModelCardGrid(props: ModelCardGridProps) {
const pageSize = DEFAULT_PRICING_PAGE_SIZE
const tokenUnit = props.tokenUnit ?? DEFAULT_TOKEN_UNIT
const totalPages = Math.max(1, Math.ceil(props.models.length / pageSize))
const currentPage = Math.min(page, totalPages)
const perfQuery = useQuery({
queryKey: ['perf-metrics-summary'],
queryKey: ['perf-metrics-summary', 24],
queryFn: () => getPerfMetricsSummary(24),
staleTime: 60 * 1000,
retry: false,
})
useEffect(() => {
setPage(1)
}, [props.models])
const pagedModels = useMemo(() => {
const start = (page - 1) * pageSize
const start = (currentPage - 1) * pageSize
return props.models.slice(start, start + pageSize)
}, [page, pageSize, props.models])
}, [currentPage, pageSize, props.models])
const perfMap = useMemo(() => {
const map = new Map<string, ModelPerfBadgeData>()
@@ -76,7 +73,7 @@ export function ModelCardGrid(props: ModelCardGridProps) {
<div className='text-muted-foreground flex flex-col items-center justify-between gap-3 border-t px-4 py-3 text-sm sm:flex-row'>
<p className='text-muted-foreground'>
{t('Page {{current}} of {{total}}', {
current: page,
current: currentPage,
total: totalPages,
})}
</p>
@@ -86,7 +83,7 @@ export function ModelCardGrid(props: ModelCardGridProps) {
variant='outline'
size='sm'
onClick={() => setPage((current) => Math.max(1, current - 1))}
disabled={page <= 1}
disabled={currentPage <= 1}
className='gap-1.5'
>
<ChevronLeft className='size-4' />
@@ -99,7 +96,7 @@ export function ModelCardGrid(props: ModelCardGridProps) {
onClick={() =>
setPage((current) => Math.min(totalPages, current + 1))
}
disabled={page >= totalPages}
disabled={currentPage >= totalPages}
className='gap-1.5'
>
{t('Next')}
+1 -2
View File
@@ -14,8 +14,7 @@ import { parseTags } from '../lib/filters'
import { isTokenBasedModel } from '../lib/model-helpers'
import { formatPrice, formatRequestPrice } from '../lib/price'
import type { PricingModel, TokenUnit } from '../types'
import { ModelPerfBadge } from './model-perf-badge'
import type { ModelPerfBadgeData } from './model-perf-badge'
import { ModelPerfBadge, type ModelPerfBadgeData } from './model-perf-badge'
export interface ModelCardProps {
model: PricingModel
@@ -12,13 +12,14 @@ import {
TableRow,
} from '@/components/ui/table'
import { GroupBadge } from '@/components/group-badge'
import { getPerfMetrics, type PerformanceGroup } from '../api'
import { getPerfMetrics } from '@/features/performance-metrics/api'
import {
formatLatency,
formatThroughput,
formatUptimePct,
type UptimeDayPoint,
} from '../lib/mock-stats'
} from '@/features/performance-metrics/lib/format'
import type { PerformanceGroup } from '@/features/performance-metrics/types'
import { type UptimeDayPoint } from '../lib/mock-stats'
import type { PricingModel } from '../types'
import { LatencyTrendChart, UptimeTrendChart } from './model-details-charts'
import { UptimeSparkline } from './model-details-uptime-sparkline'
@@ -142,7 +143,10 @@ export function ModelDetailsPerformance(props: { model: PricingModel }) {
queryFn: () => getPerfMetrics(props.model.model_name, 24),
staleTime: 60 * 1000,
})
const groups = metricsQuery.data?.data.groups ?? []
const groups = useMemo(
() => metricsQuery.data?.data.groups ?? [],
[metricsQuery.data]
)
const performances = useMemo<PerformanceRow[]>(
() =>
groups.map((group) => ({
@@ -7,11 +7,8 @@ import {
TooltipContent,
TooltipTrigger,
} from '@/components/ui/tooltip'
import {
aggregateUptime,
formatUptimePct,
type UptimeDayPoint,
} from '../lib/mock-stats'
import { formatUptimePct } from '@/features/performance-metrics/lib/format'
import { aggregateUptime, type UptimeDayPoint } from '../lib/mock-stats'
// ---------------------------------------------------------------------------
// Uptime sparkline
@@ -26,7 +26,12 @@ import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'
import { CopyButton } from '@/components/copy-button'
import { GroupBadge } from '@/components/group-badge'
import { PublicLayout } from '@/components/layout'
import { getPerfMetrics } from '../api'
import { getPerfMetrics } from '@/features/performance-metrics/api'
import {
formatLatency,
formatThroughput,
formatUptimePct,
} from '@/features/performance-metrics/lib/format'
import { DEFAULT_TOKEN_UNIT, QUOTA_TYPE_VALUES } from '../constants'
import { usePricingData } from '../hooks/use-pricing-data'
import {
@@ -36,11 +41,6 @@ import {
isDynamicPricingModel,
} from '../lib/dynamic-price'
import { parseTags } from '../lib/filters'
import {
formatLatency,
formatThroughput,
formatUptimePct,
} from '../lib/mock-stats'
import { getAvailableGroups, isTokenBasedModel } from '../lib/model-helpers'
import { inferModelMetadata } from '../lib/model-metadata'
import { formatFixedPrice, formatGroupPrice } from '../lib/price'
@@ -1,7 +1,10 @@
import { memo } from 'react'
import { useTranslation } from 'react-i18next'
import { cn } from '@/lib/utils'
import { formatLatency, formatThroughput } from '../lib/mock-stats'
import {
formatLatency,
formatThroughput,
} from '@/features/performance-metrics/lib/format'
export type ModelPerfBadgeData = {
avg_latency_ms: number
-20
View File
@@ -464,26 +464,6 @@ export function aggregateUptime(points: UptimeDayPoint[]): {
}
}
/** Format throughput for display: "0" → "—". */
export function formatThroughput(tps: number): string {
if (tps <= 0) return '—'
if (tps >= 1_000) return `${(tps / 1_000).toFixed(1)}K t/s`
return `${tps.toFixed(tps < 10 ? 2 : 1)} t/s`
}
/** Format latency in ms with proper unit selection. */
export function formatLatency(ms: number): string {
if (!Number.isFinite(ms) || ms <= 0) return '—'
if (ms >= 1_000) return `${(ms / 1_000).toFixed(2)}s`
return `${Math.round(ms)}ms`
}
/** Format uptime percentage with 2 decimal places. */
export function formatUptimePct(pct: number): string {
if (!Number.isFinite(pct)) return '—'
return `${pct.toFixed(2)}%`
}
/** Compact integer formatter for token counts in apps tab. */
export function formatTokenVolume(n: number): string {
if (!Number.isFinite(n) || n <= 0) return '0'