Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions demohouse/pocket_pal/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@
#### 相关模型

- Doubao-流式语音识别:将用户的语音提问转写为文本,以便于大模型对用户问题的理解与回复。
- Doubao-1.5-vision-pro-32k:负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。
- DeepSeek-R1:负责对AI帮写等场景,进行深度思考并回答。
- Doubao-Seed-1.6:负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。在AI帮写等场景,进行深度思考并回答。
- Doubao-语音合成:负责将模型生成的文本回答转化为自然流畅的语音输出。

#### 流程架构
Expand All @@ -50,8 +49,7 @@
|相关服务 |描述 |计费说明 |
|-|-|-|
|[Doubao-流式语音识别](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=seedasr-streaming) |将用户的语音提问转写为文本,以便于视觉大模型对用户问题的理解与回复。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) |
|[Doubao-1.5-vision-pro-32k](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-1-5-vision-pro-32k) |负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) |
|[DeepSeek-R1](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=deepseek-r1) |负责对AI帮写等场景,进行深度思考并回答。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) |
|[Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6) |负责对实时捕捉的屏幕截图进行视觉内容理解,并结合当前画面回答用户的问题。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) |
|[Doubao-语音合成](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=ve-tts) |负责将模型生成的文本回答转化为自然流畅的语音输出。 |[多种计费方式](https://www.volcengine.com/docs/82379/1099320) |

### 技术实现
Expand Down
Binary file modified demohouse/pocket_pal/assets/img_qrcode.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
29 changes: 16 additions & 13 deletions demohouse/pocket_pal/src/api/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ interface ChatCompletionChunk {
export class LLMApi {
static TAG = 'LLMApi';
private static BASE_URL = 'https://ark.cn-beijing.volces.com/api/v3';
static VLM_MODEL = 'bot-20250205194702-tv4dt';
static DEEP_SEEK_MODEL = 'bot-20250212171216-4w645';
static MODEL_VLM_DOUBAO_1_5 = 'bot-20250205194702-tv4dt'; // doubao 1.5
static MODEL_DOUBAO_1_6 = 'bot-20250624151927-m6bkk'; // doubao seed 1.6 VLM thinking
static VLM_SYSTEM_PROMPT = `
# 角色
你是一个全能智能体,拥有丰富的百科知识,你性格很温暖,喜欢帮助别人,非常热心。
Expand Down Expand Up @@ -150,7 +150,7 @@ export class LLMApi {
let buffer = '';

handle.on((event: StreamEvent) => {
// console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`);
console.log(`${LLMApi.TAG} streamResponse ${JSON.stringify(event)}`);
if (event.event === 'data') {
try {
const dataStr = String(event.data);
Expand All @@ -173,7 +173,7 @@ export class LLMApi {

const content = choice.delta?.content ?? '';
const reasoningContent = choice.delta?.reasoning_content;
if (content || reasoningContent) {
if ((content || reasoningContent) && reasoningContent !== '\n') {
buffer += content;
onData(content, reasoningContent);
}
Expand All @@ -196,8 +196,9 @@ export class LLMApi {
static async chat(
params: LLMRequestParams,
apiKey?: string,
model: string = LLMApi.VLM_MODEL
model: string = LLMApi.MODEL_VLM_DOUBAO_1_5
): Promise<(onData: (text: string, reasoningContent?: string) => void, onComplete?: () => void) => void> {
console.log(`LLMApi chat model=${model}`)
const handle = await appletRequest({
url: `${this.BASE_URL}/bots/chat/completions`,
method: 'POST',
Expand All @@ -209,7 +210,7 @@ export class LLMApi {
body: {
model: model,
messages: params.messages ?? [],
stream: true
stream: true,
},
addCommonParams: false,
streamType: 'sse'
Expand All @@ -223,8 +224,8 @@ export class LLMApi {
}
}

const constructUserMessage = (question: string, image?: string, modelType: 'VLM' | 'DS' = 'VLM') => {
if (image && modelType === 'VLM') {
const constructUserMessage = (question: string, image?: string) => {
if (image) {
return {
role: 'user',
content: [
Expand Down Expand Up @@ -254,8 +255,8 @@ export const createLLMRequest = async (
onComplete?: () => void,
image?: string,
historyMessages: Array<{ type: string; content: string; image?: string }> = [],
apiKey?: string,
modelType: 'VLM' | 'DS' = 'VLM'
apiKey?: string[],
modelType: 'VLM' | 'Thinking' = 'VLM'
) => {
console.log(`createLLMRequest question=${question} modelType=${modelType}`)

Expand All @@ -276,7 +277,7 @@ export const createLLMRequest = async (
// 转换最近5条历史消息
...historyMessages.slice(-5).map((msg) =>
msg.type === 'user'
? constructUserMessage(msg.content, msg.image, modelType)
? constructUserMessage(msg.content, msg.image)
: {
role: 'assistant',
content: msg.content
Expand All @@ -301,9 +302,11 @@ export const createLLMRequest = async (
})
);

const model = modelType === 'VLM' ? LLMApi.VLM_MODEL : LLMApi.DEEP_SEEK_MODEL;
const hasDoubao16Key = apiKey !== undefined && apiKey?.length >= 3;
const key = hasDoubao16Key ? apiKey?.[2] : apiKey?.[0];
const model = hasDoubao16Key ? LLMApi.MODEL_DOUBAO_1_6 : LLMApi.MODEL_VLM_DOUBAO_1_5;

const handleStream = await LLMApi.chat(params, apiKey, model);
const handleStream = await LLMApi.chat(params, key, model);
// 创建一个 Promise 来处理流式响应
return new Promise((resolve, reject) => {
try {
Expand Down
170 changes: 72 additions & 98 deletions demohouse/pocket_pal/src/components/ChatList/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
currentLLMRequestRef.current = abortController;

// 判断是否为 AI 帮写场景
const useDeepSeek = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写");
if (useDeepSeek) {
const useThinking = isAIAssistMode || (isFromInitial && messages[0]?.content === "AI帮写");
if (useThinking) {
setIsAIAssistMode(true);
}
console.log(`handleUserMessage useDeepSeek=${useDeepSeek} apiKey=${apiKey} ${typeof apiKey}`)
console.log(`handleUserMessage useThinking=${useThinking}`)

// 获取最近的历史消息(不包括当前的用户消息和机器人回复)
const recentMessages = isFromInitial ? [] : messages.slice(-5);
Expand All @@ -100,7 +100,7 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
content: '',
status: 'searching',
isPlaying: false,
...(useDeepSeek && { reasoningContent: '' })
...(useThinking && { reasoningContent: '' })
};

setMessages(prev => [...prev, botMessage]);
Expand Down Expand Up @@ -132,103 +132,77 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }

initTTS();
try {
if (useDeepSeek) {
let deepSeekRequest = '';
if (isFromInitial) {
deepSeekRequest = '请根据图片内容判断场景类型,并进行AI帮写,图片内容描述如下:\n';
await new Promise<void>((resolve, reject) => {
createLLMRequest(
content,
(chunk, reasoning) => {
if (abortController.signal.aborted) return;
deepSeekRequest += chunk;
},
() => {
if (abortController.signal.aborted) return;
resolve();
},
image,
recentMessages,
apiKey?.[0],
'VLM'
).catch(reject);
});
} else {
deepSeekRequest = content;
}

console.log(`deepSeekRequest ${deepSeekRequest}`);
if (!abortController.signal.aborted) {
await createLLMRequest(
deepSeekRequest,
async (chunk, reasoning) => {
if (abortController.signal.aborted) return;
// console.log(`DeepSeek onData c=${chunk} r=${reasoning}`);
if (chunk) {
// console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`);
pendingChunks.push(chunk);
}

setMessages(prevMessages => {
const lastMessage = prevMessages[prevMessages.length - 1];
if (lastMessage.type === 'bot') {
return [
...prevMessages.slice(0, -1),
{
...lastMessage,
content: lastMessage.content + chunk,
reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''),
isPlaying: lastMessage.isPlaying,
ttsStreamingId: lastMessage.ttsStreamingId
}
];
}
return prevMessages;
});

if (chunk) {
await processPendingChunks();
}
},
async () => {
console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`);
if (abortController.signal.aborted) return;
while (pendingChunks.length > 0) {
await processPendingChunks();
if (useThinking) {
let deepSeekRequest = isFromInitial ? '请根据图片内容判断场景类型,并进行AI帮写' : content;
await createLLMRequest(
deepSeekRequest,
async (chunk, reasoning) => {
if (abortController.signal.aborted) return;
// console.log(`DeepSeek onData c=${chunk} r=${reasoning}`);
if (chunk) {
// console.log(`DeepSeek onData pendingChunks.push=${chunk} ttsStreamingId=${ttsStreamingId}`);
pendingChunks.push(chunk);
}

setMessages(prevMessages => {
const lastMessage = prevMessages[prevMessages.length - 1];
if (lastMessage.type === 'bot') {
return [
...prevMessages.slice(0, -1),
{
...lastMessage,
content: lastMessage.content + chunk,
reasoningContent: (lastMessage.reasoningContent ?? '') + (reasoning ?? ''),
isPlaying: lastMessage.isPlaying,
ttsStreamingId: lastMessage.ttsStreamingId
}
];
}
return prevMessages;
});

if (ttsStreamingId) {
await appendStreamingTTS({
streamingId: ttsStreamingId,
newText: '',
isFinish: true
});
}
if (chunk) {
await processPendingChunks();
}
},
async () => {
console.log(`DeepSeek onComplete ttsStreamingId=${ttsStreamingId}`);
if (abortController.signal.aborted) return;
while (pendingChunks.length > 0) {
await processPendingChunks();
}

setMessages(prevMessages => {
const lastMessage = prevMessages[prevMessages.length - 1];
if (lastMessage.type === 'bot') {
return [
...prevMessages.slice(0, -1),
{
...lastMessage,
status: 'completed',
isPlaying: lastMessage.isPlaying,
ttsStreamingId: lastMessage.ttsStreamingId
}
];
}
return prevMessages;
if (ttsStreamingId) {
await appendStreamingTTS({
streamingId: ttsStreamingId,
newText: '',
isFinish: true
});
setIsResponding(false);
currentLLMRequestRef.current = null;
},
undefined,
recentMessages,
apiKey?.[1],
'DS'
);
}
}

setMessages(prevMessages => {
const lastMessage = prevMessages[prevMessages.length - 1];
if (lastMessage.type === 'bot') {
return [
...prevMessages.slice(0, -1),
{
...lastMessage,
status: 'completed',
isPlaying: lastMessage.isPlaying,
ttsStreamingId: lastMessage.ttsStreamingId
}
];
}
return prevMessages;
});
setIsResponding(false);
currentLLMRequestRef.current = null;
},
image,
recentMessages,
apiKey,
'Thinking'
);
} else {
// 非 AI 帮写场景,保持原有逻辑
await createLLMRequest(
Expand Down Expand Up @@ -291,7 +265,7 @@ const ChatInterface: React.FC<ChatInterfaceProps> = ({ initialMessages, apiKey }
},
image,
recentMessages,
apiKey?.[0],
apiKey,
'VLM'
);
}
Expand Down