diff --git a/relay/channel/aws/constants.go b/relay/channel/aws/constants.go index ff1f377e..d24e02eb 100644 --- a/relay/channel/aws/constants.go +++ b/relay/channel/aws/constants.go @@ -19,6 +19,7 @@ var awsModelIDMap = map[string]string{ "claude-opus-4-5-20251101": "anthropic.claude-opus-4-5-20251101-v1:0", "claude-opus-4-6": "anthropic.claude-opus-4-6-v1", "claude-opus-4-7": "anthropic.claude-opus-4-7", + "claude-opus-4-8": "anthropic.claude-opus-4-8", // Nova models "nova-micro-v1:0": "amazon.nova-micro-v1:0", "nova-lite-v1:0": "amazon.nova-lite-v1:0", @@ -97,6 +98,11 @@ var awsModelCanCrossRegionMap = map[string]map[string]bool{ "ap": true, "eu": true, }, + "anthropic.claude-opus-4-8": { + "us": true, + "ap": true, + "eu": true, + }, "anthropic.claude-haiku-4-5-20251001-v1:0": { "us": true, "ap": true, diff --git a/relay/channel/claude/constants.go b/relay/channel/claude/constants.go index 3c516aef..0e7ba865 100644 --- a/relay/channel/claude/constants.go +++ b/relay/channel/claude/constants.go @@ -33,6 +33,13 @@ var ModelList = []string{ "claude-opus-4-7-medium", "claude-opus-4-7-low", "claude-opus-4-7-thinking", + "claude-opus-4-8", + "claude-opus-4-8-max", + "claude-opus-4-8-xhigh", + "claude-opus-4-8-high", + "claude-opus-4-8-medium", + "claude-opus-4-8-low", + "claude-opus-4-8-thinking", } var ChannelName = "claude" diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index 6356cd35..18d7455e 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -154,14 +154,17 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe } if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" && - (strings.HasPrefix(textRequest.Model, "claude-opus-4-6") || strings.HasPrefix(textRequest.Model, "claude-opus-4-7")) { + (strings.HasPrefix(textRequest.Model, "claude-opus-4-6") || + strings.HasPrefix(textRequest.Model, "claude-opus-4-7") || + strings.HasPrefix(textRequest.Model, "claude-opus-4-8")) { claudeRequest.Model = baseModel claudeRequest.Thinking = &dto.Thinking{ Type: "adaptive", } claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) - if strings.HasPrefix(baseModel, "claude-opus-4-7") { - // Opus 4.7 rejects non-default temperature/top_p/top_k with 400 + if strings.HasPrefix(baseModel, "claude-opus-4-7") || + strings.HasPrefix(baseModel, "claude-opus-4-8") { + // Opus 4.7/4.8 reject non-default temperature/top_p/top_k with 400 // and defaults display to "omitted"; restore the 4.6 visible summary. claudeRequest.Thinking.Display = "summarized" claudeRequest.Temperature = nil @@ -175,8 +178,9 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe strings.HasSuffix(textRequest.Model, "-thinking") { trimmedModel := strings.TrimSuffix(textRequest.Model, "-thinking") - if strings.HasPrefix(trimmedModel, "claude-opus-4-7") { - // Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort. + if strings.HasPrefix(trimmedModel, "claude-opus-4-7") || + strings.HasPrefix(trimmedModel, "claude-opus-4-8") { + // Opus 4.7/4.8 reject thinking.type="enabled"; use adaptive at high effort. claudeRequest.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"} claudeRequest.OutputConfig = json.RawMessage(`{"effort":"high"}`) claudeRequest.Temperature = nil diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index fdc7b38e..495c500b 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -9,6 +9,10 @@ import ( "github.com/stretchr/testify/require" ) +func commonPointer[T any](value T) *T { + return &value +} + func TestFormatClaudeResponseInfo_MessageStart(t *testing.T) { claudeInfo := &ClaudeResponseInfo{ Usage: &dto.Usage{}, @@ -310,6 +314,58 @@ func TestRequestOpenAI2ClaudeMessage_IgnoresUnsupportedFileContent(t *testing.T) require.Equal(t, "see attachment", *content[0].Text) } +func TestRequestOpenAI2ClaudeMessage_ClaudeOpus48HighUsesAdaptiveThinking(t *testing.T) { + request := dto.GeneralOpenAIRequest{ + Model: "claude-opus-4-8-high", + Temperature: commonPointer(0.7), + TopP: commonPointer(0.9), + TopK: commonPointer(40), + Messages: []dto.Message{ + { + Role: "user", + Content: "hello", + }, + }, + } + + claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request) + require.NoError(t, err) + require.Equal(t, "claude-opus-4-8", claudeRequest.Model) + require.NotNil(t, claudeRequest.Thinking) + require.Equal(t, "adaptive", claudeRequest.Thinking.Type) + require.Equal(t, "summarized", claudeRequest.Thinking.Display) + require.JSONEq(t, `{"effort":"high"}`, string(claudeRequest.OutputConfig)) + require.Nil(t, claudeRequest.Temperature) + require.Nil(t, claudeRequest.TopP) + require.Nil(t, claudeRequest.TopK) +} + +func TestRequestOpenAI2ClaudeMessage_ClaudeOpus48ThinkingUsesAdaptiveHighEffort(t *testing.T) { + request := dto.GeneralOpenAIRequest{ + Model: "claude-opus-4-8-thinking", + Temperature: commonPointer(0.7), + TopP: commonPointer(0.9), + TopK: commonPointer(40), + Messages: []dto.Message{ + { + Role: "user", + Content: "hello", + }, + }, + } + + claudeRequest, err := RequestOpenAI2ClaudeMessage(nil, request) + require.NoError(t, err) + require.Equal(t, "claude-opus-4-8", claudeRequest.Model) + require.NotNil(t, claudeRequest.Thinking) + require.Equal(t, "adaptive", claudeRequest.Thinking.Type) + require.Equal(t, "summarized", claudeRequest.Thinking.Display) + require.JSONEq(t, `{"effort":"high"}`, string(claudeRequest.OutputConfig)) + require.Nil(t, claudeRequest.Temperature) + require.Nil(t, claudeRequest.TopP) + require.Nil(t, claudeRequest.TopK) +} + func TestRequestOpenAI2ClaudeMessage_SupportsPDFFileContent(t *testing.T) { request := dto.GeneralOpenAIRequest{ Model: "claude-3-5-sonnet", diff --git a/relay/channel/vertex/adaptor.go b/relay/channel/vertex/adaptor.go index 0d91032d..7f087c21 100644 --- a/relay/channel/vertex/adaptor.go +++ b/relay/channel/vertex/adaptor.go @@ -45,6 +45,7 @@ var claudeModelMap = map[string]string{ "claude-opus-4-5-20251101": "claude-opus-4-5@20251101", "claude-opus-4-6": "claude-opus-4-6", "claude-opus-4-7": "claude-opus-4-7", + "claude-opus-4-8": "claude-opus-4-8", } const anthropicVersion = "vertex-2023-10-16" diff --git a/relay/claude_handler.go b/relay/claude_handler.go index 7ec934f9..d72218de 100644 --- a/relay/claude_handler.go +++ b/relay/claude_handler.go @@ -53,14 +53,17 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ } if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" && - (strings.HasPrefix(request.Model, "claude-opus-4-6") || strings.HasPrefix(request.Model, "claude-opus-4-7")) { + (strings.HasPrefix(request.Model, "claude-opus-4-6") || + strings.HasPrefix(request.Model, "claude-opus-4-7") || + strings.HasPrefix(request.Model, "claude-opus-4-8")) { request.Model = baseModel request.Thinking = &dto.Thinking{ Type: "adaptive", } request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel)) - if strings.HasPrefix(request.Model, "claude-opus-4-7") { - // Opus 4.7 rejects non-default temperature/top_p/top_k with 400 + if strings.HasPrefix(request.Model, "claude-opus-4-7") || + strings.HasPrefix(request.Model, "claude-opus-4-8") { + // Opus 4.7/4.8 reject non-default temperature/top_p/top_k with 400 // and defaults display to "omitted"; restore the 4.6 visible summary. request.Thinking.Display = "summarized" request.Temperature = nil @@ -74,8 +77,9 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ strings.HasSuffix(request.Model, "-thinking") { if request.Thinking == nil { baseModel := strings.TrimSuffix(request.Model, "-thinking") - if strings.HasPrefix(baseModel, "claude-opus-4-7") { - // Opus 4.7 rejects thinking.type="enabled"; use adaptive at high effort. + if strings.HasPrefix(baseModel, "claude-opus-4-7") || + strings.HasPrefix(baseModel, "claude-opus-4-8") { + // Opus 4.7/4.8 reject thinking.type="enabled"; use adaptive at high effort. request.Thinking = &dto.Thinking{Type: "adaptive", Display: "summarized"} request.OutputConfig = json.RawMessage(`{"effort":"high"}`) request.Temperature = nil diff --git a/setting/ratio_setting/cache_ratio.go b/setting/ratio_setting/cache_ratio.go index fe6e3b32..89d0bfc2 100644 --- a/setting/ratio_setting/cache_ratio.go +++ b/setting/ratio_setting/cache_ratio.go @@ -71,6 +71,13 @@ var defaultCacheRatio = map[string]float64{ "claude-opus-4-7-high": 0.1, "claude-opus-4-7-medium": 0.1, "claude-opus-4-7-low": 0.1, + "claude-opus-4-8": 0.1, + "claude-opus-4-8-thinking": 0.1, + "claude-opus-4-8-max": 0.1, + "claude-opus-4-8-xhigh": 0.1, + "claude-opus-4-8-high": 0.1, + "claude-opus-4-8-medium": 0.1, + "claude-opus-4-8-low": 0.1, } var defaultCreateCacheRatio = map[string]float64{ @@ -106,6 +113,13 @@ var defaultCreateCacheRatio = map[string]float64{ "claude-opus-4-7-high": 1.25, "claude-opus-4-7-medium": 1.25, "claude-opus-4-7-low": 1.25, + "claude-opus-4-8": 1.25, + "claude-opus-4-8-thinking": 1.25, + "claude-opus-4-8-max": 1.25, + "claude-opus-4-8-xhigh": 1.25, + "claude-opus-4-8-high": 1.25, + "claude-opus-4-8-medium": 1.25, + "claude-opus-4-8-low": 1.25, } //var defaultCreateCacheRatio = map[string]float64{} diff --git a/setting/ratio_setting/model_ratio.go b/setting/ratio_setting/model_ratio.go index 80702ee4..23fd360e 100644 --- a/setting/ratio_setting/model_ratio.go +++ b/setting/ratio_setting/model_ratio.go @@ -152,6 +152,12 @@ var defaultModelRatio = map[string]float64{ "claude-opus-4-7-high": 2.5, "claude-opus-4-7-medium": 2.5, "claude-opus-4-7-low": 2.5, + "claude-opus-4-8": 2.5, + "claude-opus-4-8-max": 2.5, + "claude-opus-4-8-xhigh": 2.5, + "claude-opus-4-8-high": 2.5, + "claude-opus-4-8-medium": 2.5, + "claude-opus-4-8-low": 2.5, "claude-3-opus-20240229": 7.5, // $15 / 1M tokens "claude-opus-4-20250514": 7.5, "claude-opus-4-1-20250805": 7.5,