volcengine · liruokunBD · Jun 27, 2025
diff --git a/demohouse/pocket_pal/README.md b/demohouse/pocket_pal/README.md
@@ -34,8 +34,7 @@
 #### 相关模型
 
 - Doubao-流式语音识别：将用户的语音提问转写为文本，以便于大模型对用户问题的理解与回复。
-- Doubao-1.5-vision-pro-32k：负责对实时捕捉的屏幕截图进行视觉内容理解，并结合当前画面回答用户的问题。
-- DeepSeek-R1：负责对AI帮写等场景，进行深度思考并回答。
+- Doubao-Seed-1.6：负责对实时捕捉的屏幕截图进行视觉内容理解，并结合当前画面回答用户的问题。在AI帮写等场景，进行深度思考并回答。
 - Doubao-语音合成：负责将模型生成的文本回答转化为自然流畅的语音输出。
 
 #### 流程架构
@@ -50,8 +49,7 @@
 |相关服务    |描述    |计费说明    |
 |-|-|-|
 |[Doubao-流式语音识别](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=seedasr-streaming)    |将用户的语音提问转写为文本，以便于视觉大模型对用户问题的理解与回复。    |[多种计费方式](https://www.volcengine.com/docs/82379/1099320)    |
-|[Doubao-1.5-vision-pro-32k](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-1-5-vision-pro-32k)         |负责对实时捕捉的屏幕截图进行视觉内容理解，并结合当前画面回答用户的问题。         |[多种计费方式](https://www.volcengine.com/docs/82379/1099320)    |
-|[DeepSeek-R1](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=deepseek-r1)    |负责对AI帮写等场景，进行深度思考并回答。    |[多种计费方式](https://www.volcengine.com/docs/82379/1099320)    |
+|[Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6)         |负责对实时捕捉的屏幕截图进行视觉内容理解，并结合当前画面回答用户的问题。         |[多种计费方式](https://www.volcengine.com/docs/82379/1099320)    |
 |[Doubao-语音合成](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=ve-tts)    |负责将模型生成的文本回答转化为自然流畅的语音输出。    |[多种计费方式](https://www.volcengine.com/docs/82379/1099320)         |
 
 ### 技术实现

diff --git a/demohouse/pocket_pal/assets/img_qrcode.png b/demohouse/pocket_pal/assets/img_qrcode.png
diff --git a/demohouse/pocket_pal/src/api/llm.ts b/demohouse/pocket_pal/src/api/llm.ts
@@ -48,8 +48,8 @@ interface ChatCompletionChunk {
 export class LLMApi {
   static TAG = 'LLMApi';
   private static BASE_URL = 'https://ark.cn-beijing.volces.com/api/v3';
-  static VLM_MODEL = 'bot-20250205194702-tv4dt';
-  static DEEP_SEEK_MODEL = 'bot-20250212171216-4w645';
+  static MODEL_VLM_DOUBAO_1_5 = 'bot-20250205194702-tv4dt'; // doubao 1.5
+  static MODEL_DOUBAO_1_6 = 'bot-20250624151927-m6bkk'; // doubao seed 1.6 VLM thinking
   static VLM_SYSTEM_PROMPT = `
   # 角色
   你是一个全能智能体，拥有丰富的百科知识，你性格很温暖，喜欢帮助别人，非常热心。
@@ -150,7 +150,7 @@ export class LLMApi {
       let buffer = '';
 
       handle.on((event: StreamEvent) => {
-        // console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`);
+        console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`);
         if (event.event === 'data') {
           try {
             const dataStr = String(event.data);
@@ -173,7 +173,7 @@ export class LLMApi {
 
                 const content = choice.delta?.content ?? '';
                 const reasoningContent = choice.delta?.reasoning_content;
-                if (content || reasoningContent) {
+                if ((content || reasoningContent) && reasoningContent !== '\n') {
                   buffer += content;
                   onData(content, reasoningContent);
                 }
@@ -196,8 +196,9 @@ export class LLMApi {
   static async chat(
     params: LLMRequestParams,
     apiKey?: string,
-    model: string = LLMApi.VLM_MODEL
+    model: string = LLMApi.MODEL_VLM_DOUBAO_1_5
   ): Promise<(onData: (text: string, reasoningContent?: string) => void, onComplete?: () => void) => void> {
+    console.log(`LLMApi chat model=${model}`)
     const handle = await appletRequest({
       url: `${this.BASE_URL}/bots/chat/completions`,
       method: 'POST',
@@ -209,7 +210,7 @@ export class LLMApi {
       body: {
         model: model,
         messages: params.messages ?? [],
-        stream: true
+        stream: true,
       },
       addCommonParams: false,
       streamType: 'sse'
@@ -223,8 +224,8 @@ export class LLMApi {
   }
 }
 
-const constructUserMessage = (question: string, image?: string, modelType: 'VLM' | 'DS' = 'VLM') => {
-  if (image && modelType === 'VLM') {
+const constructUserMessage = (question: string, image?: string) => {
+  if (image) {
     return {
       role: 'user',
       content: [
@@ -254,8 +255,8 @@ export const createLLMRequest = async (
   onComplete?: () => void,
   image?: string,
   historyMessages: Array<{ type: string; content: string; image?: string }> = [],
-  apiKey?: string,
-  modelType: 'VLM' | 'DS' = 'VLM'
+  apiKey?: string[],
+  modelType: 'VLM' | 'Thinking' = 'VLM'
 ) => {
   console.log(`createLLMRequest question=${question} modelType=${modelType}`)
 
@@ -276,7 +277,7 @@ export const createLLMRequest = async (
       // 转换最近5条历史消息
       ...historyMessages.slice(-5).map((msg) =>
         msg.type === 'user'
-          ? constructUserMessage(msg.content, msg.image, modelType)
+          ? constructUserMessage(msg.content, msg.image)
           : {
               role: 'assistant',
               content: msg.content
@@ -301,9 +302,11 @@ export const createLLMRequest = async (
       })
     );
 
-    const model = modelType === 'VLM' ? LLMApi.VLM_MODEL : LLMApi.DEEP_SEEK_MODEL;
+    const hasDoubao16Key = apiKey !== undefined && apiKey?.length >= 3;
+    const key = hasDoubao16Key ? apiKey?.[2] : apiKey?.[0];
+    const model = hasDoubao16Key ? LLMApi.MODEL_DOUBAO_1_6 : LLMApi.MODEL_VLM_DOUBAO_1_5;
 
-    const handleStream = await LLMApi.chat(params, apiKey, model);
+    const handleStream = await LLMApi.chat(params, key, model);
     // 创建一个 Promise 来处理流式响应
     return new Promise((resolve, reject) => {
       try {

diff --git a/demohouse/pocket_pal/src/components/ChatList/index.tsx b/demohouse/pocket_pal/src/components/ChatList/index.tsx
@@ -85,11 +85,11 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
     currentLLMRequestRef.current = abortController;
 
     // 判断是否为 AI 帮写场景
-    const useDeepSeek = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写");
-    if (useDeepSeek) {
+    const useThinking = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写");
+    if (useThinking) {
       setIsAIAssistMode(true);
     }
-    console.log(`handleUserMessage useDeepSeek=${useDeepSeek} apiKey=${apiKey} ${typeof apiKey}`)
+    console.log(`handleUserMessage useThinking=${useThinking}`)
 
     // 获取最近的历史消息(不包括当前的用户消息和机器人回复)
     const recentMessages = isFromInitial ? [] : messages.slice(-5);
@@ -100,7 +100,7 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
       content: '',
       status: 'searching',
       isPlaying: false,
-      ...(useDeepSeek && { reasoningContent: '' })
+      ...(useThinking && { reasoningContent: '' })
     };
 
     setMessages(prev => [...prev, botMessage]);
@@ -132,103 +132,77 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
 
     initTTS();
     try {
-      if (useDeepSeek) {
-        let deepSeekRequest = '';
-        if (isFromInitial) {
-          deepSeekRequest = '请根据图片内容判断场景类型，并进行AI帮写，图片内容描述如下：\n';
-          await new Promise<void>((resolve, reject) => {
-            createLLMRequest(
-              content,
-              (chunk, reasoning) => {
-                if (abortController.signal.aborted) return;
-                deepSeekRequest += chunk;
-              },
-              () => {
-                if (abortController.signal.aborted) return;
-                resolve();
-              },
-              image,
-              recentMessages,
-              apiKey?.[0],
-              'VLM'
-            ).catch(reject);
-          });
-        } else {
-          deepSeekRequest = content;
-        }
-
-        console.log(`deepSeekRequest ${deepSeekRequest}`);
-        if (!abortController.signal.aborted) {
-          await createLLMRequest(
-            deepSeekRequest,
-            async (chunk, reasoning) => {
-              if (abortController.signal.aborted) return;
-              // console.log(`DeepSeek onData c=${chunk} r=${reasoning}`);
-              if (chunk) {
-                // console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`);
-                pendingChunks.push(chunk);
-              }
-
-              setMessages(prevMessages => {
-                const lastMessage = prevMessages[prevMessages.length - 1];
-                if (lastMessage.type === 'bot') {
-                  return [
-                    ...prevMessages.slice(0, -1),
-                    {
-                      ...lastMessage,
-                      content: lastMessage.content + chunk,
-                      reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''),
-                      isPlaying: lastMessage.isPlaying,
-                      ttsStreamingId: lastMessage.ttsStreamingId
-                    }
-                  ];
-                }
-                return prevMessages;
-              });
-
-              if (chunk) {
-                await processPendingChunks();
-              }
-            },
-            async () => {
-              console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`);
-              if (abortController.signal.aborted) return;
-              while (pendingChunks.length > 0) {
-                await processPendingChunks();
+      if (useThinking) {
+        let deepSeekRequest = isFromInitial ? '请根据图片内容判断场景类型，并进行AI帮写' : content;
+        await createLLMRequest(
+          deepSeekRequest,
+          async (chunk, reasoning) => {
+            if (abortController.signal.aborted) return;
+            // console.log(`DeepSeek onData c=${chunk} r=${reasoning}`);
+            if (chunk) {
+              // console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`);
+              pendingChunks.push(chunk);
+            }
+
+            setMessages(prevMessages => {
+              const lastMessage = prevMessages[prevMessages.length - 1];
+              if (lastMessage.type === 'bot') {
+                return [
+                  ...prevMessages.slice(0, -1),
+                  {
+                    ...lastMessage,
+                    content: lastMessage.content + chunk,
+                    reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''),
+                    isPlaying: lastMessage.isPlaying,
+                    ttsStreamingId: lastMessage.ttsStreamingId
+                  }
+                ];
               }
+              return prevMessages;
+            });
 
-              if (ttsStreamingId) {
-                await appendStreamingTTS({
-                  streamingId: ttsStreamingId,
-                  newText: '',
-                  isFinish: true
-                });
-              }
+            if (chunk) {
+              await processPendingChunks();
+            }
+          },
+          async () => {
+            console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`);
+            if (abortController.signal.aborted) return;
+            while (pendingChunks.length > 0) {
+              await processPendingChunks();
+            }
 
-              setMessages(prevMessages => {
-                const lastMessage = prevMessages[prevMessages.length - 1];
-                if (lastMessage.type === 'bot') {
-                  return [
-                    ...prevMessages.slice(0, -1),
-                    {
-                      ...lastMessage,
-                      status: 'completed',
-                      isPlaying: lastMessage.isPlaying,
-                      ttsStreamingId: lastMessage.ttsStreamingId
-                    }
-                  ];
-                }
-                return prevMessages;
+            if (ttsStreamingId) {
+              await appendStreamingTTS({
+                streamingId: ttsStreamingId,
+                newText: '',
+                isFinish: true
               });
-              setIsResponding(false);
-              currentLLMRequestRef.current = null;
-            },
-            undefined,
-            recentMessages,
-            apiKey?.[1],
-            'DS'
-          );
-        }
+            }
+
+            setMessages(prevMessages => {
+              const lastMessage = prevMessages[prevMessages.length - 1];
+              if (lastMessage.type === 'bot') {
+                return [
+                  ...prevMessages.slice(0, -1),
+                  {
+                    ...lastMessage,
+                    status: 'completed',
+                    isPlaying: lastMessage.isPlaying,
+                    ttsStreamingId: lastMessage.ttsStreamingId
+                  }
+                ];
+              }
+              return prevMessages;
+            });
+            setIsResponding(false);
+            currentLLMRequestRef.current = null;
+          },
+          image,
+          recentMessages,
+          apiKey,
+          'Thinking'
+        );
       } else {
         // 非 AI 帮写场景，保持原有逻辑
         await createLLMRequest(
@@ -291,7 +265,7 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
           },
           image,
           recentMessages,
-          apiKey?.[0],
+          apiKey,
           'VLM'
         );
       }