feat: collect model performance metrics (#4635)

This commit is contained in:
Calcium-Ion
2026-05-06 13:55:23 +08:00
committed by GitHub
parent 8b2b03d276
commit 9acf5fecae
26 changed files with 1078 additions and 120 deletions
+6 -2
View File
@@ -14,6 +14,7 @@ import (
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/pkg/billingexpr"
perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/setting/system_setting"
@@ -219,7 +220,7 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
logContent += fmt.Sprintf("(可能是上游超时)")
logContent += "(可能是上游超时)"
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
"tokenId %d, model %s pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
} else {
@@ -340,7 +341,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
logContent += fmt.Sprintf("(可能是上游超时)")
logContent += "(可能是上游超时)"
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
"tokenId %d, model %s pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, relayInfo.FinalPreConsumedQuota))
} else {
@@ -375,6 +376,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, u
Group: relayInfo.UsingGroup,
Other: other,
})
gopool.Go(func() {
perfmetrics.RecordRelaySample(relayInfo, true)
})
}
func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {
+5
View File
@@ -11,10 +11,12 @@ import (
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/model"
"github.com/QuantumNous/new-api/pkg/billingexpr"
perfmetrics "github.com/QuantumNous/new-api/pkg/perf_metrics"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/types"
"github.com/bytedance/gopkg/util/gopool"
"github.com/gin-gonic/gin"
"github.com/shopspring/decimal"
)
@@ -471,4 +473,7 @@ func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
Group: relayInfo.UsingGroup,
Other: other,
})
gopool.Go(func() {
perfmetrics.RecordRelaySample(relayInfo, true)
})
}