Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion src/services/api/openai/__tests__/thinking.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,22 @@ describe('isOpenAIThinkingEnabled', () => {
expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(true)
})

test('returns true when model name is "mimo-v2-flash"', () => {
expect(isOpenAIThinkingEnabled('mimo-v2-flash')).toBe(true)
})

test('returns true when model name is "mimo-v2-pro"', () => {
expect(isOpenAIThinkingEnabled('mimo-v2-pro')).toBe(true)
})

test('returns true when model name is "mimo-v2.5-pro"', () => {
expect(isOpenAIThinkingEnabled('mimo-v2.5-pro')).toBe(true)
})

test('returns true when model name contains "mimo"', () => {
expect(isOpenAIThinkingEnabled('MiMo-V2-Omni')).toBe(true)
})

test('returns false when model name is "gpt-4o"', () => {
expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false)
})
Expand Down Expand Up @@ -197,7 +213,10 @@ describe('buildOpenAIRequestBody — thinking params', () => {
test('includes vLLM/self-hosted thinking format when enabled', () => {
const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true })
expect(body.enable_thinking).toBe(true)
expect(body.chat_template_kwargs).toEqual({ thinking: true })
expect(body.chat_template_kwargs).toEqual({
thinking: true,
enable_thinking: true,
})
})

test('includes both formats simultaneously when enabled', () => {
Expand Down
27 changes: 14 additions & 13 deletions src/services/api/openai/requestBody.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/
import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js'

/**
* Detect whether DeepSeek-style thinking mode should be enabled.
* Detect whether thinking mode should be enabled for this model.
*
* Enabled when:
* 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR
* 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive)
* 2. Model name contains "deepseek" or "mimo" (auto-detect, case-insensitive)
*
* Disabled when:
* - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection)
Expand All @@ -23,9 +23,9 @@ export function isOpenAIThinkingEnabled(model: string): boolean {
if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false
// Explicit enable
if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true
// Auto-detect from model name (all DeepSeek models support thinking mode)
// Auto-detect from model name (DeepSeek and MiMo models support thinking mode)
const modelLower = model.toLowerCase()
return modelLower.includes('deepseek')
return modelLower.includes('deepseek') || modelLower.includes('mimo')
}

/**
Expand Down Expand Up @@ -58,12 +58,12 @@ export function resolveOpenAIMaxTokens(
* Build the request body for OpenAI chat.completions.create().
* Extracted for testability — the thinking mode params are injected here.
*
* DeepSeek thinking mode: inject thinking params via request body.
* Two formats are added simultaneously to support different deployments:
* - Official DeepSeek API: `thinking: { type: 'enabled' }`
* - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
* Three thinking-mode formats are sent simultaneously; each endpoint uses the
* format it recognizes and ignores the others:
* - Official DeepSeek API: `thinking: { type: 'enabled' }`
* - Self-hosted DeepSeek: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }`
* - MiMo (Xiaomi): `chat_template_kwargs: { enable_thinking: true }`
* OpenAI SDK passes unknown keys through to the HTTP body.
* Each endpoint will use the format it recognizes and ignore the others.
*/
export function buildOpenAIRequestBody(params: {
model: string
Expand All @@ -76,7 +76,7 @@ export function buildOpenAIRequestBody(params: {
}): ChatCompletionCreateParamsStreaming & {
thinking?: { type: string }
enable_thinking?: boolean
chat_template_kwargs?: { thinking: boolean }
chat_template_kwargs?: { thinking: boolean; enable_thinking: boolean }
} {
const {
model,
Expand All @@ -97,14 +97,15 @@ export function buildOpenAIRequestBody(params: {
}),
stream: true,
stream_options: { include_usage: true },
// DeepSeek thinking mode: enable chain-of-thought output.
// When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek.
// Enable chain-of-thought output for DeepSeek and MiMo models.
// When active, temperature/top_p/presence_penalty/frequency_penalty are ignored.
...(enableThinking && {
// Official DeepSeek API format
thinking: { type: 'enabled' },
// Self-hosted DeepSeek-V3.2 format
enable_thinking: true,
chat_template_kwargs: { thinking: true },
// Both DeepSeek self-hosted and MiMo formats in chat_template_kwargs
chat_template_kwargs: { thinking: true, enable_thinking: true },
}),
// Only send temperature when thinking mode is off (DeepSeek ignores it anyway,
// but other providers may respect it)
Expand Down