From 833181e02554f682dfd0146c683733d387f40297 Mon Sep 17 00:00:00 2001 From: cepvor Date: Thu, 14 May 2026 16:47:12 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20MiMo=20=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=20thinking=20mode=20=E8=87=AA=E5=8A=A8=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E4=B8=8E=E5=85=BC=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isOpenAIThinkingEnabled() 现在自动检测模型名包含 "mimo" 的模型 (与 DeepSeek 并列),因为 MiMo 同样使用 reasoning_content 字段 且支持 thinking mode。 buildOpenAIRequestBody() 在 chat_template_kwargs 中同时发送 thinking: true 和 enable_thinking: true,兼容 DeepSeek 自托管和 MiMo 的 thinking 启用格式。 已有 reasoning_content 回传逻辑(openaiConvertMessages.ts)和流 解析逻辑(openaiStreamAdapter.ts)无需修改,MiMo 与 DeepSeek 共用 相同的 reasoning_content 字段协议。 Co-Authored-By: deepseek-v4-pro[1m] --- .../api/openai/__tests__/thinking.test.ts | 21 ++++++++++++++- src/services/api/openai/requestBody.ts | 27 ++++++++++--------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/services/api/openai/__tests__/thinking.test.ts b/src/services/api/openai/__tests__/thinking.test.ts index a1f477a184..c147a8dc1a 100644 --- a/src/services/api/openai/__tests__/thinking.test.ts +++ b/src/services/api/openai/__tests__/thinking.test.ts @@ -147,6 +147,22 @@ describe('isOpenAIThinkingEnabled', () => { expect(isOpenAIThinkingEnabled('deepseek-coder')).toBe(true) }) + test('returns true when model name is "mimo-v2-flash"', () => { + expect(isOpenAIThinkingEnabled('mimo-v2-flash')).toBe(true) + }) + + test('returns true when model name is "mimo-v2-pro"', () => { + expect(isOpenAIThinkingEnabled('mimo-v2-pro')).toBe(true) + }) + + test('returns true when model name is "mimo-v2.5-pro"', () => { + expect(isOpenAIThinkingEnabled('mimo-v2.5-pro')).toBe(true) + }) + + test('returns true when model name contains "mimo"', () => { + expect(isOpenAIThinkingEnabled('MiMo-V2-Omni')).toBe(true) + }) + test('returns false when model name is "gpt-4o"', () => { expect(isOpenAIThinkingEnabled('gpt-4o')).toBe(false) }) @@ -197,7 +213,10 @@ describe('buildOpenAIRequestBody — thinking params', () => { test('includes vLLM/self-hosted thinking format when enabled', () => { const body = buildOpenAIRequestBody({ ...baseParams, enableThinking: true }) expect(body.enable_thinking).toBe(true) - expect(body.chat_template_kwargs).toEqual({ thinking: true }) + expect(body.chat_template_kwargs).toEqual({ + thinking: true, + enable_thinking: true, + }) }) test('includes both formats simultaneously when enabled', () => { diff --git a/src/services/api/openai/requestBody.ts b/src/services/api/openai/requestBody.ts index 9de76752de..d47ccdfc80 100644 --- a/src/services/api/openai/requestBody.ts +++ b/src/services/api/openai/requestBody.ts @@ -7,11 +7,11 @@ import type { ChatCompletionCreateParamsStreaming } from 'openai/resources/chat/ import { isEnvTruthy, isEnvDefinedFalsy } from '../../../utils/envUtils.js' /** - * Detect whether DeepSeek-style thinking mode should be enabled. + * Detect whether thinking mode should be enabled for this model. * * Enabled when: * 1. OPENAI_ENABLE_THINKING=1 is set (explicit enable), OR - * 2. Model name contains "deepseek-reasoner" OR "DeepSeek-V3.2" (auto-detect, case-insensitive) + * 2. Model name contains "deepseek" or "mimo" (auto-detect, case-insensitive) * * Disabled when: * - OPENAI_ENABLE_THINKING=0/false/no/off is explicitly set (overrides model detection) @@ -23,9 +23,9 @@ export function isOpenAIThinkingEnabled(model: string): boolean { if (isEnvDefinedFalsy(process.env.OPENAI_ENABLE_THINKING)) return false // Explicit enable if (isEnvTruthy(process.env.OPENAI_ENABLE_THINKING)) return true - // Auto-detect from model name (all DeepSeek models support thinking mode) + // Auto-detect from model name (DeepSeek and MiMo models support thinking mode) const modelLower = model.toLowerCase() - return modelLower.includes('deepseek') + return modelLower.includes('deepseek') || modelLower.includes('mimo') } /** @@ -58,12 +58,12 @@ export function resolveOpenAIMaxTokens( * Build the request body for OpenAI chat.completions.create(). * Extracted for testability — the thinking mode params are injected here. * - * DeepSeek thinking mode: inject thinking params via request body. - * Two formats are added simultaneously to support different deployments: - * - Official DeepSeek API: `thinking: { type: 'enabled' }` - * - Self-hosted DeepSeek-V3.2: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }` + * Three thinking-mode formats are sent simultaneously; each endpoint uses the + * format it recognizes and ignores the others: + * - Official DeepSeek API: `thinking: { type: 'enabled' }` + * - Self-hosted DeepSeek: `enable_thinking: true` + `chat_template_kwargs: { thinking: true }` + * - MiMo (Xiaomi): `chat_template_kwargs: { enable_thinking: true }` * OpenAI SDK passes unknown keys through to the HTTP body. - * Each endpoint will use the format it recognizes and ignore the others. */ export function buildOpenAIRequestBody(params: { model: string @@ -76,7 +76,7 @@ export function buildOpenAIRequestBody(params: { }): ChatCompletionCreateParamsStreaming & { thinking?: { type: string } enable_thinking?: boolean - chat_template_kwargs?: { thinking: boolean } + chat_template_kwargs?: { thinking: boolean; enable_thinking: boolean } } { const { model, @@ -97,14 +97,15 @@ export function buildOpenAIRequestBody(params: { }), stream: true, stream_options: { include_usage: true }, - // DeepSeek thinking mode: enable chain-of-thought output. - // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored by DeepSeek. + // Enable chain-of-thought output for DeepSeek and MiMo models. + // When active, temperature/top_p/presence_penalty/frequency_penalty are ignored. ...(enableThinking && { // Official DeepSeek API format thinking: { type: 'enabled' }, // Self-hosted DeepSeek-V3.2 format enable_thinking: true, - chat_template_kwargs: { thinking: true }, + // Both DeepSeek self-hosted and MiMo formats in chat_template_kwargs + chat_template_kwargs: { thinking: true, enable_thinking: true }, }), // Only send temperature when thinking mode is off (DeepSeek ignores it anyway, // but other providers may respect it)