diff --git a/demohouse/pocket_pal/README.md b/demohouse/pocket_pal/README.md index bbce9ca0..64222784 100644 --- a/demohouse/pocket_pal/README.md +++ b/demohouse/pocket_pal/README.md @@ -34,8 +34,7 @@ #### 相关模型 - Doubao-流式语音识别:将用户的语音提问转写为文本,以便于大模型对用户问题的理解与回复。 -- Doubao-1.5-vision-pro-32k:负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。 -- DeepSeek-R1:负责对AI帮写等场景,进行深度思考并回答。 +- Doubao-Seed-1.6:负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。在AI帮写等场景,进行深度思考并回答。 - Doubao-语音合成:负责将模型生成的文本回答转化为自然流畅的语音输出。 #### 流程架构 @@ -50,8 +49,7 @@ |相关服务 |描述 |计费说明 | |-|-|-| |[Doubao-流式语音识别](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=seedasr-streaming) |将用户的语音提问转写为文本,以便于视觉大模型对用户问题的理解与回复。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) | -|[Doubao-1.5-vision-pro-32k](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-1-5-vision-pro-32k) |负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) | -|[DeepSeek-R1](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=deepseek-r1) |负责对AI帮写等场景,进行深度思考并回答。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) | +|[Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6) |负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) | |[Doubao-语音合成](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=ve-tts) |负责将模型生成的文本回答转化为自然流畅的语音输出。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) | ### 技术实现 diff --git a/demohouse/pocket_pal/assets/img_qrcode.png b/demohouse/pocket_pal/assets/img_qrcode.png index 1c3a4c45..987d3301 100644 Binary files a/demohouse/pocket_pal/assets/img_qrcode.png and b/demohouse/pocket_pal/assets/img_qrcode.png differ diff --git a/demohouse/pocket_pal/src/api/llm.ts b/demohouse/pocket_pal/src/api/llm.ts index 8bc47305..a7fc3dbe 100644 --- a/demohouse/pocket_pal/src/api/llm.ts +++ b/demohouse/pocket_pal/src/api/llm.ts @@ -48,8 +48,8 @@ interface ChatCompletionChunk { export class LLMApi { static TAG = 'LLMApi'; private static BASE_URL = 'https://ark.cn-beijing.volces.com/api/v3'; - static VLM_MODEL = 'bot-20250205194702-tv4dt'; - static DEEP_SEEK_MODEL = 'bot-20250212171216-4w645'; + static MODEL_VLM_DOUBAO_1_5 = 'bot-20250205194702-tv4dt'; // doubao 1.5 + static MODEL_DOUBAO_1_6 = 'bot-20250624151927-m6bkk'; // doubao seed 1.6 VLM thinking static VLM_SYSTEM_PROMPT = ` # 角色 你是一个全能智能体,拥有丰富的百科知识,你性格很温暖,喜欢帮助别人,非常热心。 @@ -150,7 +150,7 @@ export class LLMApi { let buffer = ''; handle.on((event: StreamEvent) => { - // console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`); + console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`); if (event.event === 'data') { try { const dataStr = String(event.data); @@ -173,7 +173,7 @@ export class LLMApi { const content = choice.delta?.content ?? ''; const reasoningContent = choice.delta?.reasoning_content; - if (content || reasoningContent) { + if ((content || reasoningContent) && reasoningContent !== '\n') { buffer += content; onData(content, reasoningContent); } @@ -196,8 +196,9 @@ export class LLMApi { static async chat( params: LLMRequestParams, apiKey?: string, - model: string = LLMApi.VLM_MODEL + model: string = LLMApi.MODEL_VLM_DOUBAO_1_5 ): Promise<(onData: (text: string, reasoningContent?: string) => void, onComplete?: () => void) => void> { + console.log(`LLMApi chat model=${model}`) const handle = await appletRequest({ url: `${this.BASE_URL}/bots/chat/completions`, method: 'POST', @@ -209,7 +210,7 @@ export class LLMApi { body: { model: model, messages: params.messages ?? [], - stream: true + stream: true, }, addCommonParams: false, streamType: 'sse' @@ -223,8 +224,8 @@ export class LLMApi { } } -const constructUserMessage = (question: string, image?: string, modelType: 'VLM' | 'DS' = 'VLM') => { - if (image && modelType === 'VLM') { +const constructUserMessage = (question: string, image?: string) => { + if (image) { return { role: 'user', content: [ @@ -254,8 +255,8 @@ export const createLLMRequest = async ( onComplete?: () => void, image?: string, historyMessages: Array<{ type: string; content: string; image?: string }> = [], - apiKey?: string, - modelType: 'VLM' | 'DS' = 'VLM' + apiKey?: string[], + modelType: 'VLM' | 'Thinking' = 'VLM' ) => { console.log(`createLLMRequest question=${question} modelType=${modelType}`) @@ -276,7 +277,7 @@ export const createLLMRequest = async ( // 转换最近5条历史消息 ...historyMessages.slice(-5).map((msg) => msg.type === 'user' - ? constructUserMessage(msg.content, msg.image, modelType) + ? constructUserMessage(msg.content, msg.image) : { role: 'assistant', content: msg.content @@ -301,9 +302,11 @@ export const createLLMRequest = async ( }) ); - const model = modelType === 'VLM' ? LLMApi.VLM_MODEL : LLMApi.DEEP_SEEK_MODEL; + const hasDoubao16Key = apiKey !== undefined && apiKey?.length >= 3; + const key = hasDoubao16Key ? apiKey?.[2] : apiKey?.[0]; + const model = hasDoubao16Key ? LLMApi.MODEL_DOUBAO_1_6 : LLMApi.MODEL_VLM_DOUBAO_1_5; - const handleStream = await LLMApi.chat(params, apiKey, model); + const handleStream = await LLMApi.chat(params, key, model); // 创建一个 Promise 来处理流式响应 return new Promise((resolve, reject) => { try { diff --git a/demohouse/pocket_pal/src/components/ChatList/index.tsx b/demohouse/pocket_pal/src/components/ChatList/index.tsx index c2901b27..b3d9db6c 100644 --- a/demohouse/pocket_pal/src/components/ChatList/index.tsx +++ b/demohouse/pocket_pal/src/components/ChatList/index.tsx @@ -85,11 +85,11 @@ const ChatInterface: React.FC = ({ initialMessages, apiKey } currentLLMRequestRef.current = abortController; // 判断是否为 AI 帮写场景 - const useDeepSeek = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写"); - if (useDeepSeek) { + const useThinking = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写"); + if (useThinking) { setIsAIAssistMode(true); } - console.log(`handleUserMessage useDeepSeek=${useDeepSeek} apiKey=${apiKey} ${typeof apiKey}`) + console.log(`handleUserMessage useThinking=${useThinking}`) // 获取最近的历史消息(不包括当前的用户消息和机器人回复) const recentMessages = isFromInitial ? [] : messages.slice(-5); @@ -100,7 +100,7 @@ const ChatInterface: React.FC = ({ initialMessages, apiKey } content: '', status: 'searching', isPlaying: false, - ...(useDeepSeek && { reasoningContent: '' }) + ...(useThinking && { reasoningContent: '' }) }; setMessages(prev => [...prev, botMessage]); @@ -132,103 +132,77 @@ const ChatInterface: React.FC = ({ initialMessages, apiKey } initTTS(); try { - if (useDeepSeek) { - let deepSeekRequest = ''; - if (isFromInitial) { - deepSeekRequest = '请根据图片内容判断场景类型,并进行AI帮写,图片内容描述如下:\n'; - await new Promise((resolve, reject) => { - createLLMRequest( - content, - (chunk, reasoning) => { - if (abortController.signal.aborted) return; - deepSeekRequest += chunk; - }, - () => { - if (abortController.signal.aborted) return; - resolve(); - }, - image, - recentMessages, - apiKey?.[0], - 'VLM' - ).catch(reject); - }); - } else { - deepSeekRequest = content; - } - - console.log(`deepSeekRequest ${deepSeekRequest}`); - if (!abortController.signal.aborted) { - await createLLMRequest( - deepSeekRequest, - async (chunk, reasoning) => { - if (abortController.signal.aborted) return; - // console.log(`DeepSeek onData c=${chunk} r=${reasoning}`); - if (chunk) { - // console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`); - pendingChunks.push(chunk); - } - - setMessages(prevMessages => { - const lastMessage = prevMessages[prevMessages.length - 1]; - if (lastMessage.type === 'bot') { - return [ - ...prevMessages.slice(0, -1), - { - ...lastMessage, - content: lastMessage.content + chunk, - reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''), - isPlaying: lastMessage.isPlaying, - ttsStreamingId: lastMessage.ttsStreamingId - } - ]; - } - return prevMessages; - }); - - if (chunk) { - await processPendingChunks(); - } - }, - async () => { - console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`); - if (abortController.signal.aborted) return; - while (pendingChunks.length > 0) { - await processPendingChunks(); + if (useThinking) { + let deepSeekRequest = isFromInitial ? '请根据图片内容判断场景类型,并进行AI帮写' : content; + await createLLMRequest( + deepSeekRequest, + async (chunk, reasoning) => { + if (abortController.signal.aborted) return; + // console.log(`DeepSeek onData c=${chunk} r=${reasoning}`); + if (chunk) { + // console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`); + pendingChunks.push(chunk); + } + + setMessages(prevMessages => { + const lastMessage = prevMessages[prevMessages.length - 1]; + if (lastMessage.type === 'bot') { + return [ + ...prevMessages.slice(0, -1), + { + ...lastMessage, + content: lastMessage.content + chunk, + reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''), + isPlaying: lastMessage.isPlaying, + ttsStreamingId: lastMessage.ttsStreamingId + } + ]; } + return prevMessages; + }); - if (ttsStreamingId) { - await appendStreamingTTS({ - streamingId: ttsStreamingId, - newText: '', - isFinish: true - }); - } + if (chunk) { + await processPendingChunks(); + } + }, + async () => { + console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`); + if (abortController.signal.aborted) return; + while (pendingChunks.length > 0) { + await processPendingChunks(); + } - setMessages(prevMessages => { - const lastMessage = prevMessages[prevMessages.length - 1]; - if (lastMessage.type === 'bot') { - return [ - ...prevMessages.slice(0, -1), - { - ...lastMessage, - status: 'completed', - isPlaying: lastMessage.isPlaying, - ttsStreamingId: lastMessage.ttsStreamingId - } - ]; - } - return prevMessages; + if (ttsStreamingId) { + await appendStreamingTTS({ + streamingId: ttsStreamingId, + newText: '', + isFinish: true }); - setIsResponding(false); - currentLLMRequestRef.current = null; - }, - undefined, - recentMessages, - apiKey?.[1], - 'DS' - ); - } + } + + setMessages(prevMessages => { + const lastMessage = prevMessages[prevMessages.length - 1]; + if (lastMessage.type === 'bot') { + return [ + ...prevMessages.slice(0, -1), + { + ...lastMessage, + status: 'completed', + isPlaying: lastMessage.isPlaying, + ttsStreamingId: lastMessage.ttsStreamingId + } + ]; + } + return prevMessages; + }); + setIsResponding(false); + currentLLMRequestRef.current = null; + }, + image, + recentMessages, + apiKey, + 'Thinking' + ); } else { // 非 AI 帮写场景,保持原有逻辑 await createLLMRequest( @@ -291,7 +265,7 @@ const ChatInterface: React.FC = ({ initialMessages, apiKey } }, image, recentMessages, - apiKey?.[0], + apiKey, 'VLM' ); }