diff --git a/demohouse/hgdoll/.gitignore b/demohouse/hgdoll/.gitignore
new file mode 100644
index 00000000..af56f610
--- /dev/null
+++ b/demohouse/hgdoll/.gitignore
@@ -0,0 +1,2 @@
+.DS_Store
+.idea/
diff --git a/demohouse/hgdoll/LICENSE b/demohouse/hgdoll/LICENSE
new file mode 100644
index 00000000..c5ca71e8
--- /dev/null
+++ b/demohouse/hgdoll/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 削微寒
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/demohouse/hgdoll/README.md b/demohouse/hgdoll/README.md
new file mode 100644
index 00000000..29c268b4
--- /dev/null
+++ b/demohouse/hgdoll/README.md
@@ -0,0 +1,92 @@
+
+
+
中文 | English
+
HGDoll 是一款 AI 手机陪玩应用.
+
+
+这是一款完全开源的 AI 手机陪玩应用。在你游戏时,HGDoll 可实时看到你的游戏画面,陪你聊天、为你加油鼓劲,带来有趣的陪伴体验。它基于豆包大模型和火山方舟 Arkitect 构建,包含[安卓客户端](android/README.md)(Kotlin)和[后端服务](server/README.md)(Python)两部分,支持本地运行轻松上手。
+
+https://github.com/user-attachments/assets/704d7f2a-3206-45f2-8760-d9cf9577ca7c
+
+目前,HGDoll 还只是一个“小玩具”,仍有许多 Bug 和改进空间,我会持续更新和完善,同时欢迎大家上手体验,一起贡献代码。
+
+## 架构图
+
+```mermaid
+graph TD
+ User((用户)) --> Android[安卓客户端]
+
+ subgraph Client[客户端]
+ Android --> Speech[语音识别]
+ Android --> Screen[屏幕录制]
+ Speech --> SpeechAPI[Doubao-流式语音识别]
+ SpeechAPI --> TextResult[语音转文字结果]
+ Screen --> ScreenCapture[定时截图]
+ AudioPlay[语音播放] --> Android
+ end
+
+ subgraph Server[Server 端 Arkitect]
+ TextResult --> Backend[后端服务]
+ ScreenCapture --> Backend
+ Backend --> TempMemory[临时记忆体]
+ TempMemory --> Context[会话上下文]
+ Context --> CTX1[Context-id-1]
+ Context --> CTX2[Context-id-2]
+ Context --> CTX3[Context-id-3]
+ Context --> CTXN[...]
+ Context --> Prompt[Prompt 生成]
+ ImageResult[截图识别结果] --> TempMemory
+ AudioResult[语音合成结果] --> AudioPlay
+ end
+
+ subgraph AI[AI 模型服务]
+ Backend --> VLM[Doubao-vision-pro-32k]
+ VLM --> ImageResult
+ Prompt --> LLM[Doubao-pro-32k]
+ LLM --> TTS[Doubao-语音合成]
+ TTS --> AudioResult
+ end
+
+ style User fill:#f9f,stroke:#333,stroke-width:2px
+ style Client fill:#e4f7fb,stroke:#333,stroke-width:1px
+ style Server fill:#e6ffe6,stroke:#333,stroke-width:1px
+ style AI fill:#e6e6ff,stroke:#333,stroke-width:1px
+ style Android fill:#fff,stroke:#333,stroke-width:1px
+ style Backend fill:#fff,stroke:#333,stroke-width:1px
+ style VLM fill:#fff,stroke:#333,stroke-width:1px
+ style LLM fill:#fff,stroke:#333,stroke-width:1px
+ style TTS fill:#fff,stroke:#333,stroke-width:1px
+```
+
+
+## 快速开始
+
+客户端、后端的启动和安装步骤都在对应目录下,需要配置必要的 API Key 申请方法,[点击查看](docs/key.md)
+
+### 项目结构
+
+```
+HGDoll/
+├── android/ # 安卓客户端
+├── server/ # 后端服务
+└── docs/ # 项目文档
+```
+
+### 技术栈
+
+#### 安卓客户端
+- Kotlin
+- Jetpack Compose
+- Gradle Kotlin DSL
+- AndroidX
+
+#### 后端服务
+- Python 3.8-3.12
+- FastAPI
+- 火山方舟 Arkitect SDK
+- Uvicorn
+
+
+## 许可证
+
+本项目采用 MIT 许可证,详见 [LICENSE](LICENSE) 文件。
diff --git a/demohouse/hgdoll/android/.gitignore b/demohouse/hgdoll/android/.gitignore
new file mode 100644
index 00000000..093721fc
--- /dev/null
+++ b/demohouse/hgdoll/android/.gitignore
@@ -0,0 +1,29 @@
+# Compiled class file
+*.class
+
+# Log file
+*.log
+
+# BlueJ files
+*.ctxt
+
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+
+# Package Files #
+*.jar
+*.war
+*.nar
+*.ear
+*.zip
+*.tar.gz
+*.rar
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+replay_pid*
+
+# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects
+.kotlin/
+.DS_Store
+.gradle/
diff --git a/demohouse/hgdoll/android/README.md b/demohouse/hgdoll/android/README.md
new file mode 100644
index 00000000..dbdbb649
--- /dev/null
+++ b/demohouse/hgdoll/android/README.md
@@ -0,0 +1,60 @@
+# HGDoll 安卓客户端
+
+
+
+HGDoll 是一个基于 Kotlin 和 Jetpack Compose 开发的安卓客户端应用,它采用豆包语音大模型服务,[点击查看](../docs/key.md)如何申请运行所需的 API Key。
+
+## 环境要求
+
+- Android Studio Hedgehog | 2023.1.1 或更高版本
+- JDK 17 或更高版本
+- Gradle 8.0 或更高版本
+- Android SDK 34 (Android 14) 或更高版本
+
+## 快速开始
+
+### 1. 克隆代码库
+
+```bash
+git clone https://github.com/521xueweihan/HGDoll.git
+cd android/
+```
+
+### 2. 配置开发环境
+
+1. 打开 Android Studio
+2. 选择 "Open an existing project"
+3. 选择克隆下来的 `android` 目录
+4. 等待 Gradle 同步完成
+
+### 3. 运行应用
+
+1. 连接 Android 设备或启动模拟器
+2. 点击 Android Studio 工具栏中的 "Run" 按钮(绿色三角形)
+3. 选择目标设备
+4. 等待应用安装和启动
+5. 在 App 中输入对应的 ASR Token、ASR App ID、本地 Server IP
+
+
+## 项目结构
+
+- `app/` - 主应用模块
+- `gradle/` - Gradle 包装器文件
+- `build.gradle.kts` - 项目级构建配置
+- `settings.gradle.kts` - 项目设置文件
+- `gradle.properties` - Gradle 属性配置
+
+## 技术栈
+
+- Kotlin - 主要编程语言
+- Jetpack Compose - 现代 UI 工具包
+- Gradle Kotlin DSL - 构建脚本
+- AndroidX - Android 扩展库
+
+## 贡献指南
+
+1. Fork 项目
+2. 创建特性分支 (`git checkout -b feature/AmazingFeature`)
+3. 提交更改 (`git commit -m 'Add some AmazingFeature'`)
+4. 推送到分支 (`git push origin feature/AmazingFeature`)
+5. 创建 Pull Request
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/.gitignore b/demohouse/hgdoll/android/app/.gitignore
new file mode 100644
index 00000000..42afabfd
--- /dev/null
+++ b/demohouse/hgdoll/android/app/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/build.gradle.kts b/demohouse/hgdoll/android/app/build.gradle.kts
new file mode 100644
index 00000000..fcea8cb9
--- /dev/null
+++ b/demohouse/hgdoll/android/app/build.gradle.kts
@@ -0,0 +1,73 @@
+plugins {
+ alias(libs.plugins.android.application)
+ alias(libs.plugins.kotlin.android)
+ alias(libs.plugins.kotlin.compose)
+}
+
+android {
+ namespace = "com.example.android"
+ compileSdk = 35
+
+ defaultConfig {
+ applicationId = "com.example.android"
+ minSdk = 24
+ targetSdk = 35
+ versionCode = 1
+ versionName = "1.0"
+
+ testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
+ }
+
+ buildTypes {
+ release {
+ isMinifyEnabled = false
+ proguardFiles(
+ getDefaultProguardFile("proguard-android-optimize.txt"),
+ "proguard-rules.pro"
+ )
+ }
+ }
+ compileOptions {
+ sourceCompatibility = JavaVersion.VERSION_11
+ targetCompatibility = JavaVersion.VERSION_11
+ }
+ kotlinOptions {
+ jvmTarget = "11"
+ }
+ buildFeatures {
+ compose = true
+ }
+}
+
+dependencies {
+ implementation(libs.androidx.core.ktx)
+ implementation(libs.androidx.lifecycle.runtime.ktx)
+ implementation(libs.androidx.activity.compose)
+ implementation(platform(libs.androidx.compose.bom))
+ implementation(libs.androidx.ui)
+ implementation(libs.androidx.ui.graphics)
+ implementation(libs.androidx.ui.tooling.preview)
+ implementation(libs.androidx.material3)
+
+ // 添加屏幕录制相关依赖
+ implementation("androidx.media:media:1.6.0")
+ implementation("com.github.bumptech.glide:glide:4.12.0")
+
+ // 添加音频录制相关依赖
+ implementation("com.google.android.exoplayer:exoplayer-core:2.19.1")
+ implementation("com.google.android.exoplayer:exoplayer-ui:2.19.1")
+
+ // 网络请求相关依赖
+ implementation("com.squareup.retrofit2:retrofit:2.9.0")
+ implementation("com.squareup.retrofit2:converter-gson:2.9.0")
+ implementation("com.squareup.okhttp3:okhttp:4.11.0")
+ implementation("com.squareup.okhttp3:logging-interceptor:4.11.0")
+
+ testImplementation(libs.junit)
+ androidTestImplementation(libs.androidx.junit)
+ androidTestImplementation(libs.androidx.espresso.core)
+ androidTestImplementation(platform(libs.androidx.compose.bom))
+ androidTestImplementation(libs.androidx.ui.test.junit4)
+ debugImplementation(libs.androidx.ui.tooling)
+ debugImplementation(libs.androidx.ui.test.manifest)
+}
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/proguard-rules.pro b/demohouse/hgdoll/android/app/proguard-rules.pro
new file mode 100644
index 00000000..481bb434
--- /dev/null
+++ b/demohouse/hgdoll/android/app/proguard-rules.pro
@@ -0,0 +1,21 @@
+# Add project specific ProGuard rules here.
+# You can control the set of applied configuration files using the
+# proguardFiles setting in build.gradle.
+#
+# For more details, see
+# http://developer.android.com/guide/developing/tools/proguard.html
+
+# If your project uses WebView with JS, uncomment the following
+# and specify the fully qualified class name to the JavaScript interface
+# class:
+#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
+# public *;
+#}
+
+# Uncomment this to preserve the line number information for
+# debugging stack traces.
+#-keepattributes SourceFile,LineNumberTable
+
+# If you keep the line number information, uncomment this to
+# hide the original source file name.
+#-renamesourcefileattribute SourceFile
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml b/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml
new file mode 100644
index 00000000..c09ec71c
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png b/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png
new file mode 100644
index 00000000..62490683
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png differ
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt
new file mode 100644
index 00000000..35ed5e50
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt
@@ -0,0 +1,341 @@
+package com.example.android
+
+import android.content.BroadcastReceiver
+import android.content.Context
+import android.content.Intent
+import android.content.IntentFilter
+import android.media.projection.MediaProjectionManager
+import android.os.Build
+import android.os.Bundle
+import android.util.Log
+import androidx.activity.ComponentActivity
+import androidx.activity.compose.setContent
+import androidx.activity.result.contract.ActivityResultContracts
+import androidx.compose.foundation.layout.*
+import androidx.compose.material3.*
+import androidx.compose.runtime.*
+import androidx.compose.ui.Alignment
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.graphics.Brush
+import androidx.compose.ui.graphics.Color
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.text.input.PasswordVisualTransformation
+import androidx.compose.ui.text.style.TextAlign
+import androidx.compose.ui.unit.dp
+import androidx.compose.ui.unit.sp
+import androidx.core.content.ContextCompat
+import com.example.android.service.ScreenshotService
+import com.example.android.service.AsrService
+import com.example.android.ui.theme.AndroidTheme
+import com.example.android.utils.PermissionUtils
+import kotlinx.coroutines.*
+import java.util.UUID
+
+class MainActivity : ComponentActivity() {
+ companion object {
+ private const val TAG = "MainActivity"
+ const val EXTRA_RESULT_CODE = "result_code"
+ const val EXTRA_RESULT_DATA = "result_data"
+ }
+ private var isRecording = false
+ private var showPermissionDialog = mutableStateOf(false)
+ private var permissionDialogMessage = mutableStateOf("")
+ private var showSettingsButton = mutableStateOf(false)
+ private val scope = CoroutineScope(Dispatchers.Main + Job())
+ private var isAsrRecording = false
+ private var asrResult = ""
+ private var asrAccessToken = ""
+ private var asrAppId = ""
+ private var server_ip = ""
+ private var context_id = ""
+ private var isRequestingAudioPermission = false
+ private var isRequestingScreenPermission = false
+
+ private val mediaProjectionLauncher = registerForActivityResult(
+ ActivityResultContracts.StartActivityForResult()
+ ) { result ->
+ isRequestingScreenPermission = false
+ if (result.resultCode == RESULT_OK) {
+ Log.d(TAG, "Screen capture result: ${result.resultCode}")
+ startRecording(result.resultCode, result.data)
+ // 屏幕录制权限已授予,启动 ASR 服务
+ startAsr()
+ isRecording = true
+ } else {
+ // 屏幕录制权限被拒绝
+ showPermissionDialog.value = true
+ permissionDialogMessage.value = "需要屏幕录制权限才能使用截图功能"
+ showSettingsButton.value = true
+ }
+ }
+
+
+ private val asrReceiver = object : BroadcastReceiver() {
+ override fun onReceive(context: Context?, intent: Intent?) {
+ if (intent?.action == "com.example.android.ASR_RESULT") {
+ val text = intent.getStringExtra("text") ?: ""
+ asrResult = text
+ }
+ }
+ }
+
+ override fun onCreate(savedInstanceState: Bundle?) {
+ super.onCreate(savedInstanceState)
+ setContent {
+ AndroidTheme {
+ Surface(
+ modifier = Modifier.fillMaxSize(),
+ color = MaterialTheme.colorScheme.background
+ ) {
+ var isRecording by remember { mutableStateOf(false) }
+ var asrResult by remember { mutableStateOf("") }
+ var asrAccessToken by remember { mutableStateOf("") }
+ var asrAppId by remember { mutableStateOf("") }
+ var server_ip by remember { mutableStateOf("") }
+
+ // 同步状态变量
+ LaunchedEffect(asrAccessToken, asrAppId, server_ip) {
+ this@MainActivity.asrAccessToken = asrAccessToken
+ this@MainActivity.asrAppId = asrAppId
+ this@MainActivity.server_ip = server_ip
+ }
+
+ Column(
+ modifier = Modifier
+ .fillMaxSize()
+ .padding(16.dp),
+ horizontalAlignment = Alignment.CenterHorizontally,
+ verticalArrangement = Arrangement.Center
+ ) {
+ Text(
+ text = "HGDoll",
+ fontSize = 32.sp,
+ style = MaterialTheme.typography.headlineLarge.copy(
+ brush = Brush.linearGradient(
+ colors = listOf(
+ Color(0xFFE91E63), // 粉色
+ Color(0xFF2196F3), // 蓝色
+ Color(0xFF4CAF50), // 绿色
+ Color(0xFFFFC107) // 黄色
+ )
+ )
+ ),
+ fontWeight = FontWeight.Bold,
+ textAlign = TextAlign.Center,
+ modifier = Modifier.padding(bottom = 8.dp)
+ )
+
+ Text(
+ text = "基于豆包系列大模型打造的 AI 开源应用",
+ fontSize = 16.sp,
+ color = Color.Gray,
+ textAlign = TextAlign.Center,
+ modifier = Modifier.padding(bottom = 32.dp)
+ )
+ Button(
+ onClick = {
+ if (isRecording) {
+ Log.d(TAG, "Stopping recording with context_id: $context_id")
+ stopRecording()
+ stopAsr()
+ context_id = ""
+ isRecording = false
+ } else {
+ context_id = if (context_id.isEmpty()) {
+ UUID.randomUUID().toString()
+ } else {
+ context_id
+ }
+ Log.d(TAG, "Generated new context_id: $context_id")
+ // 有权限,启动服务
+ startRecording()
+ startAsr()
+ isRecording = true
+ }
+ }
+ ) {
+ Text(if (isRecording) "停止录制" else "开始录制")
+ }
+
+ Spacer(modifier = Modifier.height(16.dp))
+
+ OutlinedTextField(
+ value = asrAccessToken,
+ onValueChange = { asrAccessToken = it },
+ label = { Text("ASR Access Token") },
+ modifier = Modifier.fillMaxWidth(),
+ singleLine = true,
+ visualTransformation = PasswordVisualTransformation()
+ )
+
+ Spacer(modifier = Modifier.height(8.dp))
+
+ OutlinedTextField(
+ value = asrAppId,
+ onValueChange = { asrAppId = it },
+ label = { Text("ASR App ID") },
+ modifier = Modifier.fillMaxWidth(),
+ singleLine = true,
+ visualTransformation = PasswordVisualTransformation()
+ )
+
+ Spacer(modifier = Modifier.height(8.dp))
+
+ OutlinedTextField(
+ value = server_ip,
+ onValueChange = { server_ip = it },
+ label = { Text("Server IP") },
+ modifier = Modifier.fillMaxWidth(),
+ singleLine = true
+ )
+
+ if (asrResult.isNotEmpty()) {
+ Spacer(modifier = Modifier.height(16.dp))
+ Text(
+ text = "识别结果:$asrResult",
+ modifier = Modifier.padding(16.dp)
+ )
+ }
+ }
+ }
+ }
+ }
+
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) {
+ registerReceiver(
+ asrReceiver,
+ IntentFilter("com.example.android.ASR_RESULT"),
+ Context.RECEIVER_NOT_EXPORTED
+ )
+ }
+ }
+
+
+ override fun onResume() {
+ super.onResume()
+ if (showSettingsButton.value) {
+ showSettingsButton.value = false
+ checkAndRequestPermissions()
+ }
+ }
+
+ private fun checkAndRequestPermissions() {
+ if (!PermissionUtils.hasAllPermissions(this)) {
+ isRequestingAudioPermission = true
+ requestPermissions(arrayOf(android.Manifest.permission.RECORD_AUDIO), 1)
+ return
+ }
+ // 录音权限已授予,申请屏幕录制权限
+ requestScreenCapturePermission()
+ }
+
+ private fun requestScreenCapturePermission() {
+ try {
+ val projectionManager = getSystemService(MediaProjectionManager::class.java)
+ val captureIntent = projectionManager.createScreenCaptureIntent()
+ Log.d(TAG, "Launching screen capture intent...")
+ isRequestingScreenPermission = true
+ mediaProjectionLauncher.launch(captureIntent)
+ } catch (e: Exception) {
+ Log.e(TAG, "Error starting capture", e)
+ showPermissionDialog.value = true
+ permissionDialogMessage.value = "启动屏幕截图失败,请重试"
+ showSettingsButton.value = false
+ }
+ }
+
+ private fun startRecording(resultCode: Int? = null, data: Intent? = null) {
+ Log.d(TAG, "Starting recording with context_id: $context_id")
+ Log.d(TAG, "Starting work...")
+
+ if (resultCode == null) {
+ // 直接调用权限检查
+ checkAndRequestPermissions()
+ return
+ }
+
+ try {
+ // 启动截图服务
+ val screenshotIntent = Intent(this, ScreenshotService::class.java).apply {
+ action = ScreenshotService.ACTION_START
+ putExtra(EXTRA_RESULT_CODE, resultCode)
+ putExtra(EXTRA_RESULT_DATA, data)
+ putExtra(ScreenshotService.SERVER_IP, server_ip)
+ putExtra(ScreenshotService.CONTEXT_ID, context_id)
+ }
+ ContextCompat.startForegroundService(this, screenshotIntent)
+ Log.d(TAG, "Screenshot service started")
+ } catch (e: Exception) {
+ Log.e(TAG, "Error starting capture", e)
+ showPermissionDialog.value = true
+ permissionDialogMessage.value = "启动录制失败,请重试"
+ showSettingsButton.value = false
+ }
+ }
+
+ override fun onRequestPermissionsResult(requestCode: Int, permissions: Array, grantResults: IntArray) {
+ super.onRequestPermissionsResult(requestCode, permissions, grantResults)
+ when (requestCode) {
+ 1 -> {
+ isRequestingAudioPermission = false
+ if (grantResults.isNotEmpty() && grantResults[0] == android.content.pm.PackageManager.PERMISSION_GRANTED) {
+ // 录音权限已授予,申请屏幕录制权限
+ requestScreenCapturePermission()
+ } else {
+ // 权限被拒绝
+ showPermissionDialog.value = true
+ permissionDialogMessage.value = "需要录音权限才能使用语音识别功能"
+ showSettingsButton.value = true
+ }
+ }
+ }
+ }
+
+ private fun stopRecording() {
+ Log.d(TAG, "Stopping capture...")
+ isRecording = false
+
+ try {
+ // 停止截图服务
+ val screenshotIntent = Intent(this, ScreenshotService::class.java).apply {
+ action = ScreenshotService.ACTION_STOP
+ }
+ startService(screenshotIntent)
+ Log.d(TAG, "Screenshot service stopped")
+ } catch (e: Exception) {
+ Log.e(TAG, "Error stopping capture", e)
+ showPermissionDialog.value = true
+ permissionDialogMessage.value = "停止录制失败,请重试"
+ showSettingsButton.value = false
+ }
+ }
+
+
+ private fun startAsr() {
+ Log.d(TAG, "Starting ASR with context_id: $context_id")
+ // 不再检查权限,因为权限检查已经在 checkAndRequestPermissions 中处理
+ val intent = Intent(this, AsrService::class.java).apply {
+ action = AsrService.ACTION_START
+ putExtra(AsrService.EXTRA_ACCESS_TOKEN, asrAccessToken)
+ putExtra(AsrService.EXTRA_APP_ID, asrAppId)
+ putExtra(AsrService.SERVER_IP, server_ip)
+ putExtra(AsrService.CONTEXT_ID, context_id)
+ }
+ ContextCompat.startForegroundService(this, intent)
+ isAsrRecording = true
+ }
+
+ private fun stopAsr() {
+ val intent = Intent(this, AsrService::class.java).apply {
+ action = AsrService.ACTION_STOP
+ }
+ startService(intent)
+ isAsrRecording = false
+ }
+
+ override fun onDestroy() {
+ super.onDestroy()
+ scope.cancel()
+ unregisterReceiver(asrReceiver)
+ }
+}
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt
new file mode 100644
index 00000000..b1098563
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt
@@ -0,0 +1,1158 @@
+package com.example.android.service
+
+import android.app.Notification
+import android.app.NotificationChannel
+import android.app.NotificationManager
+import android.app.Service
+import android.content.Intent
+import android.media.AudioFormat
+import android.media.AudioRecord
+import android.media.MediaPlayer
+import android.media.MediaRecorder
+import android.os.Build
+import android.os.IBinder
+import android.util.Log
+import androidx.core.app.NotificationCompat
+import okhttp3.OkHttpClient
+import okhttp3.Request
+import okhttp3.Response
+import okhttp3.WebSocket
+import okhttp3.WebSocketListener
+import org.json.JSONObject
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicBoolean
+import okio.ByteString
+import java.util.zip.GZIPOutputStream
+import java.util.zip.GZIPInputStream
+import com.google.gson.Gson
+import android.util.Base64
+import okhttp3.MediaType.Companion.toMediaTypeOrNull
+import okhttp3.RequestBody.Companion.toRequestBody
+import org.json.JSONArray
+import java.io.ByteArrayOutputStream
+import java.io.ByteArrayInputStream
+import java.io.File
+import java.io.FileOutputStream
+import android.media.AudioManager
+import android.media.AudioAttributes
+import android.content.Context
+import android.media.AudioFocusRequest
+import java.net.SocketTimeoutException
+import kotlinx.coroutines.GlobalScope
+import kotlinx.coroutines.launch
+import kotlinx.coroutines.withContext
+
+class AsrService : Service() {
+ companion object {
+ const val ACTION_START = "com.example.android.action.START_ASR"
+ const val ACTION_STOP = "com.example.android.action.STOP_ASR"
+ const val EXTRA_ACCESS_TOKEN = "extra_access_token"
+ const val EXTRA_APP_ID = "extra_app_id"
+ const val SERVER_IP = "server_ip"
+ const val CONTEXT_ID = "context_id"
+ private const val NOTIFICATION_ID = 3
+ private const val CHANNEL_ID = "asr_service_channel"
+ private const val SAMPLE_RATE = 16000
+ private const val CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO
+ private const val AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT
+ private const val ASR_URL = "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel"
+ private const val UPLOAD_URL = "http://%s/api/v3/bots/chat/completions"
+ private const val ASR_RESOURCE_ID = "volc.bigasr.sauc.duration"
+ private const val CONNECT_ID = "67ee89ba-7050-4c04-a3d7-ac61a63499b3"
+ private const val BUFFER_SIZE = 16000 // 200ms of audio data (16kHz * 2bytes * 0.2s)
+ private const val AUDIO_FORMAT_CONFIG = "pcm"
+ private const val AUDIO_BITS = 16
+ private const val AUDIO_CHANNELS = 1
+ private const val MAX_RETRIES = 3
+ private const val INIT_TIMEOUT = 10000L // 增加到 10 秒
+ private const val FORCE_DEFINITE_DURATION = 10000L // 10s
+ private const val SEND_INTERVAL = 1000L // 增加发送间隔
+ private const val RECONNECT_DELAY = 5000L // 增加到 5 秒
+ private const val PING_INTERVAL = 30000L // 30秒
+ private const val PONG_TIMEOUT = 15000L // 15秒
+ private const val MAX_PING_FAILURES = 3 // 最大ping失败次数
+
+ // 协议相关常量
+ private const val PROTOCOL_VERSION = 0b0001
+ private const val DEFAULT_HEADER_SIZE = 0b0001
+ private const val FULL_CLIENT_REQUEST = 0b0001
+ private const val AUDIO_ONLY_REQUEST = 0b0010
+ private const val FULL_SERVER_RESPONSE = 0b1001
+ private const val SERVER_ACK = 0b1011
+ private const val POS_SEQUENCE = 0b0001
+ private const val NEG_WITH_SEQUENCE = 0b0011
+ private const val JSON = 0b0001
+ private const val GZIP = 0b0001
+ private const val RAW = 0x0002
+ }
+
+
+ private var audioRecord: AudioRecord? = null
+ private var recordingThread: Thread? = null
+ private val isRecording = AtomicBoolean(false)
+ private var isInitialized = AtomicBoolean(false)
+ private var webSocket: WebSocket? = null
+ private var accessToken = ""
+ private var appId = ""
+ private var serverIp = ""
+ private var contextId = ""
+
+ private var sequence = 0
+ private var retryCount = 0
+ private var lastVoiceTime = 0L // 上次检测到语音的时间
+ private var silenceDuration = 0L // 静音持续时间
+ private val SILENCE_THRESHOLD = 300 // 降低静音阈值
+ private val MIN_SILENCE_DURATION = 3000L // 增加最小静音持续时间
+ private var isAlreadyDefinite = false // 是否已经收到过 definite result
+ private var lastAsrTime = 0L // 上次收到识别结果的时间
+ private var currentText = "" // 当前的识别文本
+ private var startTime = 0L // 开始时间
+ private var isSpeaking = false // 是否正在说话
+ private var voiceBuffer = ByteArrayOutputStream() // 存储语音数据
+ private var LAST_RESPONSE_TIME = 0L
+ private var isProcessingRequest = false
+ private var isPlayingAudio = false
+ private var mediaPlayer: MediaPlayer? = null
+ private val tempAudioDir: File by lazy {
+ File(cacheDir, "temp_audio").apply {
+ if (!exists()) {
+ mkdirs()
+ }
+ }
+ }
+
+ private val okHttpClient = OkHttpClient.Builder()
+ .connectTimeout(30, TimeUnit.SECONDS)
+ .readTimeout(60, TimeUnit.SECONDS)
+ .writeTimeout(60, TimeUnit.SECONDS)
+ .pingInterval(5, TimeUnit.SECONDS) // 减少 ping 间隔到 5 秒
+ .retryOnConnectionFailure(true)
+ .build()
+
+ private var audioManager: AudioManager? = null
+
+ private var lastPingTime = 0L
+ private var isConnecting = AtomicBoolean(false)
+
+ private var audioFocusRequest: AudioFocusRequest? = null
+ private var audioFocusGranted = false
+
+ private var pingFailureCount = 0
+ private var lastPongTime = 0L
+
+ private var statusCodeRetryCount = 0
+
+ private var originalVolume = 0
+ private val VOLUME_REDUCTION_FACTOR = 0.5f // 降低到原始音量的50%
+
+ override fun onCreate() {
+ super.onCreate()
+ createNotificationChannel()
+ audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager
+ }
+
+ override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
+ Log.i("intent", intent.toString())
+ when (intent?.action) {
+ ACTION_START -> {
+ accessToken = intent.getStringExtra(EXTRA_ACCESS_TOKEN) ?: ""
+ appId = intent.getStringExtra(EXTRA_APP_ID) ?: ""
+ serverIp = intent.getStringExtra(SERVER_IP) ?: ""
+ contextId = intent.getStringExtra(CONTEXT_ID) ?: ""
+ startAsr()
+ }
+ ACTION_STOP -> stopAsr()
+ }
+ return START_NOT_STICKY
+ }
+
+ private fun getHeader(messageType: Int, messageTypeSpecificFlags: Int, serialMethod: Int, compressionType: Int, reservedData: Int): ByteArray {
+ val header = ByteArray(4)
+ header[0] = ((PROTOCOL_VERSION shl 4) or DEFAULT_HEADER_SIZE).toByte()
+ header[1] = ((messageType shl 4) or messageTypeSpecificFlags).toByte()
+ header[2] = ((serialMethod shl 4) or compressionType).toByte()
+ header[3] = reservedData.toByte()
+ return header
+ }
+
+ private fun intToBytes(a: Int): ByteArray {
+ return byteArrayOf(
+ ((a ushr 24) and 0xFF).toByte(),
+ ((a ushr 16) and 0xFF).toByte(),
+ ((a ushr 8) and 0xFF).toByte(),
+ (a and 0xFF).toByte()
+ )
+ }
+
+ private fun gzipCompress(src: ByteArray): ByteArray {
+ val out = ByteArrayOutputStream()
+ val gzip = GZIPOutputStream(out)
+ gzip.write(src)
+ gzip.close()
+ return out.toByteArray()
+ }
+
+ private fun connectWebSocket() {
+ if (isConnecting.get()) {
+ Log.d("AsrService", "Already connecting, skipping...")
+ return
+ }
+
+ isConnecting.set(true)
+ Log.d("AsrService", "Connecting to WebSocket...")
+
+ try {
+ // 确保关闭旧的连接
+ webSocket?.close(1000, "Reconnecting")
+ Thread.sleep(100)
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error closing old WebSocket", e)
+ }
+
+ val request = Request.Builder()
+ .url(ASR_URL)
+ .addHeader("X-Api-App-Key", appId)
+ .addHeader("X-Api-Access-Key", accessToken)
+ .addHeader("X-Api-Resource-Id", ASR_RESOURCE_ID)
+ .addHeader("X-Api-Connect-Id", CONNECT_ID)
+ .addHeader("Content-Type", "application/json")
+ .addHeader("User-Agent", "OkHttp Android")
+ .build()
+
+ webSocket = okHttpClient.newWebSocket(request, object : WebSocketListener() {
+ override fun onOpen(webSocket: WebSocket, response: Response) {
+ Log.d("AsrService", "WebSocket connection opened")
+ retryCount = 0
+ pingFailureCount = 0
+ lastPingTime = 0
+ lastPongTime = 0
+ isConnecting.set(false)
+
+ // 发送初始化消息
+ val user = JSONObject().apply {
+ put("uid", "ARK_VLM_DEMO")
+ }
+ val audio = JSONObject().apply {
+ put("format", AUDIO_FORMAT_CONFIG)
+ put("sample_rate", SAMPLE_RATE)
+ put("bits", AUDIO_BITS)
+ put("channel", AUDIO_CHANNELS)
+ }
+ val request = JSONObject().apply {
+ put("model_name", "bigmodel")
+ put("result_type", "single")
+ put("show_utterances", true)
+ put("end_window_size", 600)
+ put("force_to_speech_time", 1500)
+ }
+ val payload = JSONObject().apply {
+ put("user", user)
+ put("audio", audio)
+ put("request", request)
+ }
+
+ val payloadStr = payload.toString()
+ val payloadBytes = gzipCompress(payloadStr.toByteArray())
+ val header = getHeader(FULL_CLIENT_REQUEST, POS_SEQUENCE, JSON, GZIP, 0)
+ val payloadSize = intToBytes(payloadBytes.size)
+ sequence = 1
+ val seqBytes = intToBytes(sequence)
+
+ val fullClientRequest = ByteArray(header.size + seqBytes.size + payloadSize.size + payloadBytes.size)
+ var destPos = 0
+ System.arraycopy(header, 0, fullClientRequest, destPos, header.size)
+ destPos += header.size
+ System.arraycopy(seqBytes, 0, fullClientRequest, destPos, seqBytes.size)
+ destPos += seqBytes.size
+ System.arraycopy(payloadSize, 0, fullClientRequest, destPos, payloadSize.size)
+ destPos += payloadSize.size
+ System.arraycopy(payloadBytes, 0, fullClientRequest, destPos, payloadBytes.size)
+
+ try {
+ val result = webSocket.send(ByteString.of(*fullClientRequest))
+ if (!result) {
+ Log.e("AsrService", "Failed to send initialization message")
+ throw Exception("Failed to send initialization message")
+ }
+ Log.d("AsrService", "Initialization message sent successfully")
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error during initialization", e)
+ webSocket.close(1000, "Initialization failed")
+ isInitialized.set(false)
+ }
+ }
+
+ override fun onMessage(webSocket: WebSocket, text: String) {
+ Log.d("AsrService", "Received pong from server")
+ lastPongTime = System.currentTimeMillis()
+ pingFailureCount = 0
+ }
+
+ override fun onMessage(webSocket: WebSocket, bytes: ByteString) {
+ Log.d("AsrService", "Received message from server")
+ lastPongTime = System.currentTimeMillis()
+ pingFailureCount = 0
+ val res = bytes.toByteArray()
+ val sequence = parseResponse(res)
+ LAST_RESPONSE_TIME = System.currentTimeMillis()
+ }
+
+ override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) {
+ Log.e("AsrService", "WebSocket error: ${t.message}", t)
+ isInitialized.set(false)
+ isConnecting.set(false)
+
+ if (t is SocketTimeoutException) {
+ // 如果是超时错误,立即重试
+ Log.d("AsrService", "Socket timeout, retrying immediately")
+ connectWebSocket()
+ } else if (retryCount < MAX_RETRIES) {
+ retryCount++
+ val delay = RECONNECT_DELAY * retryCount
+ Log.d("AsrService", "Retrying connection in ${delay}ms, attempt $retryCount")
+ Thread.sleep(delay)
+ connectWebSocket()
+ } else {
+ Log.e("AsrService", "Max retries reached, stopping ASR")
+ stopAsr()
+ }
+ }
+
+ override fun onClosing(webSocket: WebSocket, code: Int, reason: String) {
+ Log.d("AsrService", "WebSocket closing: $code - $reason")
+ isInitialized.set(false)
+ isConnecting.set(false)
+ }
+
+ override fun onClosed(webSocket: WebSocket, code: Int, reason: String) {
+ Log.d("AsrService", "WebSocket closed: $code - $reason")
+ isInitialized.set(false)
+ isConnecting.set(false)
+ }
+ })
+ }
+
+ private fun checkConnection() {
+ if (!isInitialized.get() || webSocket == null) {
+ return
+ }
+
+ val currentTime = System.currentTimeMillis()
+
+ // 检查是否需要发送ping
+ if (currentTime - lastPingTime > PING_INTERVAL) {
+ try {
+ val result = webSocket?.send("")
+ if (result == true) {
+ lastPingTime = currentTime
+ Log.d("AsrService", "Sent ping to server")
+ } else {
+ Log.e("AsrService", "Failed to send ping")
+ handlePingFailure()
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error sending ping", e)
+ handlePingFailure()
+ }
+ }
+
+ // 检查pong是否超时
+ if (lastPingTime > 0 && currentTime - lastPingTime > PONG_TIMEOUT) {
+ Log.e("AsrService", "Pong timeout, reconnecting...")
+ handlePingFailure()
+ }
+
+ // 检查服务器响应超时
+ if (currentTime - LAST_RESPONSE_TIME > INIT_TIMEOUT) {
+ Log.e("AsrService", "No response from server for too long, reconnecting...")
+ handlePingFailure()
+ }
+ }
+
+ private fun handlePingFailure() {
+ pingFailureCount++
+ if (pingFailureCount >= MAX_PING_FAILURES) {
+ Log.e("AsrService", "Max ping failures reached, reconnecting...")
+ isInitialized.set(false)
+ connectWebSocket()
+ pingFailureCount = 0
+ }
+ }
+
+ private fun requestAudioFocus() {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+ audioFocusRequest = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT)
+ .setAudioAttributes(
+ AudioAttributes.Builder()
+ .setUsage(AudioAttributes.USAGE_MEDIA)
+ .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+ .build()
+ )
+ .setOnAudioFocusChangeListener { focusChange ->
+ when (focusChange) {
+ AudioManager.AUDIOFOCUS_GAIN -> {
+ Log.d("AsrService", "Audio focus gained")
+ audioFocusGranted = true
+ // 恢复录音
+ if (isRecording.get() && audioRecord?.state == AudioRecord.STATE_INITIALIZED) {
+ audioRecord?.startRecording()
+ }
+ }
+ AudioManager.AUDIOFOCUS_LOSS -> {
+ Log.d("AsrService", "Audio focus lost")
+ audioFocusGranted = false
+ // 暂停录音
+ audioRecord?.stop()
+ }
+ AudioManager.AUDIOFOCUS_LOSS_TRANSIENT -> {
+ Log.d("AsrService", "Audio focus lost transiently")
+ audioFocusGranted = false
+ // 暂停录音
+ audioRecord?.stop()
+ }
+ AudioManager.AUDIOFOCUS_LOSS_TRANSIENT_CAN_DUCK -> {
+ Log.d("AsrService", "Audio focus lost transiently can duck")
+ audioFocusGranted = false
+ // 暂停录音,因为 AudioRecord 不支持动态音量调整
+ audioRecord?.stop()
+ }
+ }
+ }
+ .build()
+
+ val result = audioManager?.requestAudioFocus(audioFocusRequest!!)
+ audioFocusGranted = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED
+ Log.d("AsrService", "Audio focus request result: $result")
+ }
+ }
+
+ private fun abandonAudioFocus() {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+ audioFocusRequest?.let {
+ audioManager?.abandonAudioFocusRequest(it)
+ audioFocusGranted = false
+ Log.d("AsrService", "Audio focus abandoned")
+ }
+ }
+ }
+
+ private fun startAsr() {
+ if (isRecording.get()) {
+ return
+ }
+
+ try {
+ // 发送初始文本
+ GlobalScope.launch(kotlinx.coroutines.Dispatchers.IO) {
+ try {
+ sendTextAsr("应用初始化")
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error sending initial text", e)
+ }
+ }
+
+ // 请求音频焦点
+ requestAudioFocus()
+ if (!audioFocusGranted) {
+ Log.e("AsrService", "Failed to get audio focus")
+ return
+ }
+
+ // 先停止并释放之前的录音实例
+ try {
+ audioRecord?.stop()
+ audioRecord?.release()
+ Thread.sleep(2000) // 增加等待时间到2秒
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error releasing previous AudioRecord", e)
+ }
+
+ // 检查麦克风是否可用
+ val audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager
+ if (!audioManager.isMicrophoneMute) {
+ Log.d("AsrService", "Microphone is not muted")
+ } else {
+ Log.w("AsrService", "Microphone is muted, trying to unmute")
+ audioManager.isMicrophoneMute = false
+ }
+
+ // 重置音频路由
+ audioManager.mode = AudioManager.MODE_IN_COMMUNICATION
+ audioManager.isSpeakerphoneOn = false
+ audioManager.setStreamVolume(AudioManager.STREAM_VOICE_CALL,
+ audioManager.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) / 2, 0)
+
+ // 设置音频参数,启用回声消除和噪声抑制
+ audioManager.setParameters("noise_suppression=on")
+ audioManager.setParameters("echo_cancellation=on")
+ audioManager.setParameters("agc=on")
+
+ // 获取推荐的缓冲区大小
+ val bufferSize = AudioRecord.getMinBufferSize(
+ SAMPLE_RATE,
+ CHANNEL_CONFIG,
+ AUDIO_FORMAT
+ )
+ Log.d("AsrService", "AudioRecord buffer size: $bufferSize")
+
+ // 使用更大的缓冲区大小
+ val actualBufferSize = bufferSize * 2
+ Log.d("AsrService", "Using actual buffer size: $actualBufferSize")
+
+ // 尝试不同的音频源和配置,优先使用VOICE_COMMUNICATION
+ val configs = arrayOf(
+ Triple(MediaRecorder.AudioSource.VOICE_COMMUNICATION, SAMPLE_RATE, actualBufferSize),
+ Triple(MediaRecorder.AudioSource.VOICE_RECOGNITION, SAMPLE_RATE, actualBufferSize),
+ Triple(MediaRecorder.AudioSource.CAMCORDER, SAMPLE_RATE, actualBufferSize),
+ Triple(MediaRecorder.AudioSource.MIC, SAMPLE_RATE, actualBufferSize)
+ )
+
+ var audioRecordInitialized = false
+ var lastException: Exception? = null
+
+ for ((audioSource, sampleRate, bufferSize) in configs) {
+ try {
+ Log.d("AsrService", "Trying audio source: $audioSource, sample rate: $sampleRate, buffer size: $bufferSize")
+
+ // 创建新的AudioRecord实例
+ audioRecord = AudioRecord(
+ audioSource,
+ sampleRate,
+ CHANNEL_CONFIG,
+ AUDIO_FORMAT,
+ bufferSize
+ )
+
+ if (audioRecord?.state == AudioRecord.STATE_INITIALIZED) {
+ Log.d("AsrService", "Successfully initialized AudioRecord with source: $audioSource")
+ audioRecordInitialized = true
+ break
+ } else {
+ Log.e("AsrService", "Failed to initialize AudioRecord with source: $audioSource")
+ audioRecord?.release()
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error initializing AudioRecord with source: $audioSource", e)
+ lastException = e
+ audioRecord?.release()
+ Thread.sleep(1000) // 在尝试下一个配置之前等待更长时间
+ }
+ }
+
+ if (!audioRecordInitialized) {
+ // 如果所有配置都失败,尝试使用默认配置
+ try {
+ Log.d("AsrService", "Trying default configuration as last resort")
+ audioRecord = AudioRecord(
+ MediaRecorder.AudioSource.MIC,
+ SAMPLE_RATE,
+ CHANNEL_CONFIG,
+ AUDIO_FORMAT,
+ AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT)
+ )
+
+ if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) {
+ throw lastException ?: Exception("Failed to initialize AudioRecord with any configuration")
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Failed to initialize AudioRecord with default configuration", e)
+ throw e
+ }
+ }
+
+ // 设置音频属性
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) {
+ val audioAttributes = AudioAttributes.Builder()
+ .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION)
+ .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+ .setFlags(AudioAttributes.FLAG_AUDIBILITY_ENFORCED)
+ .build()
+ audioRecord?.setPreferredDevice(audioManager.getDevices(AudioManager.GET_DEVICES_INPUTS)?.firstOrNull())
+ }
+
+ // 启动录音前先读取一些数据来预热
+ val warmupBuffer = ByteArray(actualBufferSize)
+ audioRecord?.startRecording()
+ audioRecord?.read(warmupBuffer, 0, actualBufferSize)
+ Log.d("AsrService", "AudioRecord warmup completed")
+
+ // 重置状态
+ isRecording.set(true)
+ isAlreadyDefinite = false
+ currentText = ""
+ startTime = System.currentTimeMillis()
+ lastAsrTime = System.currentTimeMillis()
+ voiceBuffer.reset()
+ sequence = 0
+ isSpeaking = false
+ lastVoiceTime = System.currentTimeMillis()
+ silenceDuration = 0
+
+ startForeground(NOTIFICATION_ID, createNotification())
+
+ // 开始发送音频数据
+ recordingThread = Thread {
+ val buffer = ByteArray(BUFFER_SIZE)
+ var totalBytesRead = 0
+ var lastSendTime = System.currentTimeMillis()
+ var consecutiveSilenceCount = 0
+
+ while (isRecording.get()) {
+ try {
+ val read = audioRecord?.read(buffer, 0, BUFFER_SIZE) ?: 0
+
+ if (read > 0) {
+ totalBytesRead += read
+
+ // 计算音频能量值
+ var energy = 0.0
+ var maxSample = 0
+ var sampleCount = 0
+
+ for (i in 0 until read step 2) {
+ if (i + 1 < read) {
+ val sample = (buffer[i].toInt() and 0xFF) or (buffer[i + 1].toInt() shl 8)
+ val signedSample = if (sample > 32767) sample - 65536 else sample
+ if (Math.abs(signedSample) > maxSample) {
+ maxSample = Math.abs(signedSample)
+ }
+ energy += signedSample * signedSample
+ sampleCount++
+ }
+ }
+
+ if (sampleCount > 0) {
+ energy = Math.sqrt(energy / sampleCount)
+
+ // 改进语音检测逻辑
+ val isVoiceDetected = energy > SILENCE_THRESHOLD || maxSample > SILENCE_THRESHOLD * 2
+
+
+ if (isVoiceDetected) {
+ consecutiveSilenceCount = 0
+ if (!isSpeaking) {
+ // 开始新的语音识别
+ isSpeaking = true
+ lastVoiceTime = System.currentTimeMillis()
+ silenceDuration = 0
+ voiceBuffer.reset()
+ sequence = 0
+ isAlreadyDefinite = false
+
+ // 确保 WebSocket 连接已建立
+ if (webSocket == null || !isInitialized.get()) {
+ Log.d("AsrService", "Starting new WebSocket connection for speech")
+ connectWebSocket()
+ }
+ }
+ silenceDuration = 0
+ } else {
+ consecutiveSilenceCount++
+ if (consecutiveSilenceCount > 20) { // 增加连续静音计数阈值
+ silenceDuration = System.currentTimeMillis() - lastVoiceTime
+ if (silenceDuration > MIN_SILENCE_DURATION) {
+ isSpeaking = false
+ }
+ }
+ }
+ }
+
+ // 写入数据到缓冲区
+ voiceBuffer.write(buffer, 0, read)
+
+ // 检查是否需要发送数据
+ val currentTime = System.currentTimeMillis()
+ val shouldSend = currentTime - lastSendTime >= SEND_INTERVAL && isSpeaking
+
+ if (shouldSend && isInitialized.get()) {
+ val audioData = voiceBuffer.toByteArray()
+ val isLast = !isRecording.get() ||
+ (currentTime - lastAsrTime > FORCE_DEFINITE_DURATION &&
+ currentText.isNotEmpty() && !isAlreadyDefinite)
+
+ try {
+ val audioOnlyRequest = sendAudioOnlyRequest(audioData, isLast)
+ val result = webSocket?.send(ByteString.of(*audioOnlyRequest))
+
+ // 重置缓冲区
+ voiceBuffer.reset()
+ lastSendTime = currentTime
+
+ if (result == true) {
+ Log.d("AsrService", "Audio data sent successfully, size: ${audioData.size}, isLast: $isLast")
+ } else {
+ Log.e("AsrService", "Failed to send audio data")
+ if (isRecording.get()) {
+ connectWebSocket()
+ }
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error sending audio data", e)
+ if (isRecording.get()) {
+ connectWebSocket()
+ }
+ }
+ }
+ } else if (read < 0) {
+ Log.e("AsrService", "Error reading audio data: $read")
+ return@Thread
+ } else {
+ Thread.sleep(10)
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error in recording thread", e)
+ stopAsr()
+ return@Thread
+ }
+ }
+ }.apply { start() }
+
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error starting ASR", e)
+ stopAsr()
+ }
+ }
+
+ private fun stopAsr() {
+ Log.d("stopAsr", "Stopping ASR service...")
+
+ // 重置状态码重试计数
+ statusCodeRetryCount = 0
+
+ // 放弃音频焦点
+ abandonAudioFocus()
+
+ // 恢复音频设置
+ audioManager?.mode = AudioManager.MODE_NORMAL
+ audioManager?.isSpeakerphoneOn = true
+
+ // 1. 停止录音线程
+ isRecording.set(false)
+
+ // 2. 等待录音线程结束
+ recordingThread?.join(1000) // 最多等待1秒
+ recordingThread = null
+
+ // 3. 停止音频录制
+ try {
+ audioRecord?.stop()
+ audioRecord?.release()
+ } catch (e: Exception) {
+ Log.e("stopAsr", "Error stopping audio record", e)
+ }
+ audioRecord = null
+
+ // 4. 关闭 WebSocket 连接
+ try {
+ webSocket?.close(1000, "Normal closure")
+ // 等待一小段时间确保连接完全关闭
+ Thread.sleep(100)
+ } catch (e: Exception) {
+ Log.e("stopAsr", "Error closing WebSocket", e)
+ }
+ webSocket = null
+
+ // 5. 重置所有状态
+ isInitialized.set(false)
+ sequence = 0
+ retryCount = 0
+ lastVoiceTime = 0L
+ silenceDuration = 0L
+
+ // 6. 停止前台服务
+ stopForeground(true)
+ stopSelf()
+
+ Log.d("stopAsr", "ASR service stopped successfully")
+ }
+
+ private fun createNotificationChannel() {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+ val channel = NotificationChannel(
+ CHANNEL_ID,
+ "ASR Service",
+ NotificationManager.IMPORTANCE_LOW
+ )
+ val notificationManager = getSystemService(NotificationManager::class.java)
+ notificationManager.createNotificationChannel(channel)
+ }
+ }
+
+ private fun createNotification(): Notification {
+ return NotificationCompat.Builder(this, CHANNEL_ID)
+ .setContentTitle("语音识别服务")
+ .setContentText("正在识别语音")
+ .setSmallIcon(android.R.drawable.stat_notify_call_mute)
+ .setForegroundServiceBehavior(NotificationCompat.FOREGROUND_SERVICE_IMMEDIATE)
+ .build()
+ }
+
+ override fun onDestroy() {
+ super.onDestroy()
+ // 确保在服务销毁时清理 MediaPlayer
+ cleanupMediaPlayer(mediaPlayer)
+ stopAsr()
+ }
+
+ override fun onBind(intent: Intent?): IBinder? = null
+
+ private fun sendAudioOnlyRequest(audioData: ByteArray, isLast: Boolean): ByteArray {
+ sequence++
+ val messageTypeSpecificFlags = if (isLast) NEG_WITH_SEQUENCE else POS_SEQUENCE
+ val header = getHeader(AUDIO_ONLY_REQUEST, messageTypeSpecificFlags, JSON, GZIP, 0)
+ val seqBytes = intToBytes(if (isLast) -sequence else sequence)
+ val payloadBytes = gzipCompress(audioData)
+ val payloadSize = intToBytes(payloadBytes.size)
+
+ val audioOnlyRequest = ByteArray(header.size + seqBytes.size + payloadSize.size + payloadBytes.size)
+ var destPos = 0
+ System.arraycopy(header, 0, audioOnlyRequest, destPos, header.size)
+ destPos += header.size
+ System.arraycopy(seqBytes, 0, audioOnlyRequest, destPos, seqBytes.size)
+ destPos += seqBytes.size
+ System.arraycopy(payloadSize, 0, audioOnlyRequest, destPos, payloadSize.size)
+ destPos += payloadSize.size
+ System.arraycopy(payloadBytes, 0, audioOnlyRequest, destPos, payloadBytes.size)
+
+ return audioOnlyRequest
+ }
+
+ private fun playAudio(base64Audio: String) {
+ try {
+ // 设置状态为正在播放音频
+ isPlayingAudio = true
+ isProcessingRequest = true
+
+ // 保存原始音量
+ originalVolume = audioManager?.getStreamVolume(AudioManager.STREAM_VOICE_CALL) ?: 0
+
+ // 设置音频路由为听筒
+ audioManager?.mode = AudioManager.MODE_IN_COMMUNICATION
+ audioManager?.isSpeakerphoneOn = false
+
+ // 降低音量
+ val maxVolume = audioManager?.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) ?: 0
+ val targetVolume = (maxVolume * VOLUME_REDUCTION_FACTOR).toInt()
+ audioManager?.setStreamVolume(AudioManager.STREAM_VOICE_CALL, targetVolume, 0)
+
+ // 解码 base64 音频数据
+ val audioBytes = Base64.decode(base64Audio, Base64.DEFAULT)
+
+ // 创建临时文件
+ val tempFile = File(tempAudioDir, "temp_audio_${System.currentTimeMillis()}.mp3")
+ FileOutputStream(tempFile).use { outputStream ->
+ outputStream.write(audioBytes)
+ }
+
+ // 释放之前的 MediaPlayer 实例
+ cleanupMediaPlayer(mediaPlayer)
+
+ // 创建新的 MediaPlayer 实例
+ mediaPlayer = MediaPlayer().apply {
+ setAudioAttributes(
+ AudioAttributes.Builder()
+ .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION) // 使用语音通信用途
+ .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) // 内容类型为语音
+ .setFlags(AudioAttributes.FLAG_AUDIBILITY_ENFORCED)
+ .build()
+ )
+ setDataSource(tempFile.absolutePath)
+ prepareAsync()
+
+ // 设置准备完成监听器
+ setOnPreparedListener { mp ->
+ try {
+ mp.start()
+ Log.d("AsrService", "Audio playback started with reduced volume")
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error starting audio playback", e)
+ cleanupMediaPlayer(mp)
+ }
+ }
+
+ // 设置错误监听器
+ setOnErrorListener { mp, what, extra ->
+ Log.e("AsrService", "MediaPlayer error: what=$what, extra=$extra")
+ cleanupMediaPlayer(mp)
+ true
+ }
+
+ // 设置完成监听器
+ setOnCompletionListener { mp ->
+ cleanupMediaPlayer(mp)
+ }
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error playing audio", e)
+ cleanupMediaPlayer(mediaPlayer)
+ }
+ }
+
+ private fun cleanupMediaPlayer(mp: MediaPlayer?) {
+ try {
+ mp?.apply {
+ if (isPlaying) {
+ stop()
+ }
+ reset()
+ release()
+ }
+ mediaPlayer = null
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error cleaning up MediaPlayer", e)
+ } finally {
+ // 重置状态
+ isPlayingAudio = false
+ isProcessingRequest = false
+ // 恢复音频路由设置
+ audioManager?.mode = AudioManager.MODE_IN_COMMUNICATION
+ audioManager?.isSpeakerphoneOn = false
+ // 恢复原始音量
+ audioManager?.setStreamVolume(AudioManager.STREAM_VOICE_CALL, originalVolume, 0)
+ }
+ }
+
+ private fun definate(text: String) {
+
+ LAST_RESPONSE_TIME = System.currentTimeMillis();
+ webSocket?.close(1000, "Normal closure")
+ silenceDuration = 0
+ isSpeaking = false
+ webSocket = null
+ isInitialized.set(false)
+ isAlreadyDefinite = true
+ sequence = 0
+ Log.d("stop", "stopAsr了")
+
+ if (text.isNotEmpty()){
+ sendTextAsr(text)
+ }
+
+ }
+
+ private fun sendTextAsr(text: String) {
+ // 使用协程在后台线程执行网络操作
+ GlobalScope.launch(kotlinx.coroutines.Dispatchers.IO) {
+ try {
+ // 调用后端接口开始
+ val jsonRequest = JSONObject().apply {
+ put("model", "bot-20241114164326-xlcc91")
+ put("stream", false)
+ put("messages", JSONArray().apply {
+ put(JSONObject().apply {
+ put("role", "user")
+ put("content", JSONArray().apply {
+ put(JSONObject().apply {
+ put("type", "text")
+ put("text", text)
+ })
+ })
+ })
+ })
+ }
+
+ val request = Request.Builder()
+ .url(UPLOAD_URL.format(serverIp))
+ .post(jsonRequest.toString().toRequestBody("application/json".toMediaTypeOrNull()))
+ .addHeader("X-Context-Id", contextId)
+ .addHeader("Connection", "close")
+ .build()
+ Log.d("AsrRequest", "AsrRequest: " + contextId)
+
+ val response = okHttpClient.newCall(request).execute()
+ val responseCode = response.code
+ Log.d("TAG", "Upload response code: $responseCode")
+
+ if (!response.isSuccessful) {
+ Log.e("TAG", "Upload failed with response code: $responseCode")
+ val errorBody = response.body?.string()
+ Log.e("TAG", "Error response: $errorBody")
+ isProcessingRequest = false
+ return@launch
+ }
+
+ val responseBody = response.body?.string()
+ Log.d("TAG", "Success response received")
+
+ // 解析响应并播放语音
+ try {
+ val jsonResponse = JSONObject(responseBody)
+ val choices = jsonResponse.getJSONArray("choices")
+ if (choices.length() > 0) {
+ val firstChoice = choices.getJSONObject(0)
+ val message = firstChoice.getJSONObject("message")
+ val audio = message.optJSONObject("audio")
+ if (audio != null) {
+ val audioData = audio.optString("data")
+ if (audioData.isNotEmpty()) {
+ // 截断音频数据用于日志显示
+ val truncatedAudio =
+ if (audioData.length > 20) {
+ audioData.substring(0, 20) + "..."
+ } else {
+ audioData
+ }
+ Log.d(
+ "TAG",
+ "Audio data length: ${audioData.length}, content: $truncatedAudio"
+ )
+ // 在主线程播放音频
+ withContext(kotlinx.coroutines.Dispatchers.Main) {
+ playAudio(audioData)
+ }
+ } else {
+ // 如果没有音频数据,直接重置处理状态
+ isProcessingRequest = false
+ }
+ } else {
+ // 如果没有音频对象,直接重置处理状态
+ isProcessingRequest = false
+ }
+ } else {
+ // 如果没有选择,直接重置处理状态
+ isProcessingRequest = false
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error parsing response", e)
+ isProcessingRequest = false
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error in sendTextAsr", e)
+ isProcessingRequest = false
+ }
+ }
+ }
+
+ private fun parseResponse(res: ByteArray): Int {
+ if (res.isEmpty()) {
+ return -1
+ }
+
+ val num = 0b00001111
+ val result = HashMap()
+
+ val protocolVersion = (res[0].toInt() ushr 4) and num
+ result["protocol_version"] = protocolVersion
+ val headerSize = res[0].toInt() and 0x0f
+ result["header_size"] = headerSize
+
+ val messageType = (res[1].toInt() ushr 4) and num
+ result["message_type"] = messageType
+ val messageTypeSpecificFlags = res[1].toInt() and 0x0f
+ result["message_type_specific_flags"] = messageTypeSpecificFlags
+ val serializationMethod = res[2].toInt() ushr num
+ result["serialization_method"] = serializationMethod
+ val messageCompression = res[2].toInt() and 0x0f
+ result["message_compression"] = messageCompression
+ val reserved = res[3]
+ result["reserved"] = reserved
+
+ val temp = ByteArray(4)
+ System.arraycopy(res, 4, temp, 0, temp.size)
+ val sequence = bytesToInt(temp)
+
+ Log.i("hg", "seq: "+sequence)
+
+ if (sequence == 45000081){
+ definate(currentText)
+ return sequence
+ }
+ if(sequence < 0){
+ Log.i("AsrService", "Received status code 45000081, reconnecting...")
+ // 关闭当前连接
+ webSocket?.close(1000, "Reconnecting due to status code -")
+ // 重置状态
+ isInitialized.set(false)
+ isConnecting.set(false)
+ isSpeaking = false
+ return sequence
+ }
+
+ System.arraycopy(res, 8, temp, 0, temp.size)
+ val payloadSize = bytesToInt(temp)
+ val payload = ByteArray(res.size - 12)
+ System.arraycopy(res, 12, payload, 0, payload.size)
+
+ Log.i("AsrService", "messageType: $messageType")
+
+ if (messageType == FULL_SERVER_RESPONSE || messageType == SERVER_ACK) {
+ val payloadStr = if (messageCompression == GZIP) {
+ String(gzipDecompress(payload))
+ } else {
+ String(payload)
+ }
+ Log.d("AsrService", "Payload: $payloadStr")
+ result["payload_size"] = payloadSize
+ Log.d("AsrService", "Response: ${Gson().toJson(result)}")
+
+ try {
+ val jsonResponse = JSONObject(payloadStr)
+
+ // 检查是否是初始化响应
+ if (messageType == FULL_SERVER_RESPONSE) {
+ Log.d("AsrService", "Received server response")
+ isInitialized.set(true)
+ }
+
+ var text = ""
+ // 检查是否有识别结果
+ if (jsonResponse.has("result")) {
+ // 只要有结果,就应该设置isSpeak = false
+ val result = jsonResponse.getJSONObject("result")
+
+ if (result.has("text")) {
+ text = result.getString("text")
+ if (text.isNotEmpty()) {
+ Log.d("AsrService", "Recognized text: $text")
+ currentText = text
+ lastAsrTime = System.currentTimeMillis()
+ } else {
+ Log.d("AsrService", "Empty text in response")
+ }
+ } else {
+ Log.d("AsrService", "No text field in result")
+ }
+ if(jsonResponse.getJSONObject("result").has("utterances") &&
+ jsonResponse.getJSONObject("result").getJSONArray("utterances").length()>0){
+ var definiteFlag = jsonResponse.getJSONObject("result")
+ .getJSONArray("utterances")
+ .getJSONObject(0)
+ .getBoolean("definite")
+ if(definiteFlag){
+ definate(text)
+ }
+ }
+ } else {
+ Log.d("AsrService", "No result field in response")
+ }
+ } catch (e: Exception) {
+ Log.e("AsrService", "Error parsing ASR result", e)
+ }
+ }
+
+ return sequence
+ }
+
+ private fun bytesToInt(src: ByteArray): Int {
+ if (src.size != 4) {
+ throw IllegalArgumentException("Invalid byte array size")
+ }
+ return ((src[0].toInt() and 0xFF) shl 24) or
+ ((src[1].toInt() and 0xFF) shl 16) or
+ ((src[2].toInt() and 0xFF) shl 8) or
+ (src[3].toInt() and 0xFF)
+ }
+
+ private fun gzipDecompress(src: ByteArray): ByteArray {
+ val out = ByteArrayOutputStream()
+ val ins = ByteArrayInputStream(src)
+ val gzip = GZIPInputStream(ins)
+ val buffer = ByteArray(ins.available())
+ var len = 0
+ while (gzip.read(buffer).also { len = it } > 0) {
+ out.write(buffer, 0, len)
+ }
+ out.close()
+ return out.toByteArray()
+ }
+}
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt
new file mode 100644
index 00000000..e2d8ecc2
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt
@@ -0,0 +1,460 @@
+package com.example.android.service
+import javax.net.ssl.HttpsURLConnection
+
+import android.app.Notification
+import android.app.NotificationChannel
+import android.app.NotificationManager
+import android.app.Service
+import android.content.Intent
+import android.graphics.PixelFormat
+import android.hardware.display.DisplayManager
+import android.hardware.display.VirtualDisplay
+import android.media.ImageReader
+import android.media.MediaPlayer
+import android.media.projection.MediaProjection
+import android.media.projection.MediaProjectionManager
+import android.os.Build
+import android.os.Handler
+import android.os.IBinder
+import android.os.Looper
+import android.util.Base64
+import android.util.DisplayMetrics
+import android.util.Log
+import android.view.WindowManager
+import androidx.core.app.NotificationCompat
+import okhttp3.MediaType.Companion.toMediaTypeOrNull
+import okhttp3.OkHttpClient
+import okhttp3.Request
+import okhttp3.RequestBody.Companion.toRequestBody
+import okhttp3.logging.HttpLoggingInterceptor
+import org.json.JSONObject
+import org.json.JSONArray
+import java.io.ByteArrayOutputStream
+import java.io.File
+import java.util.concurrent.TimeUnit
+import javax.net.ssl.SSLContext
+import javax.net.ssl.TrustManager
+import javax.net.ssl.X509TrustManager
+import java.security.cert.X509Certificate
+import android.graphics.Bitmap
+
+class ScreenshotService : Service() {
+ companion object {
+ const val ACTION_START = "com.example.android.action.START_SCREENSHOT"
+ const val ACTION_STOP = "com.example.android.action.STOP_SCREENSHOT"
+ const val EXTRA_RESULT_CODE = "result_code"
+ const val EXTRA_RESULT_DATA = "result_data"
+ const val SERVER_IP = "server_ip"
+ const val CONTEXT_ID = "context_id"
+ private const val NOTIFICATION_ID = 1
+ private const val CHANNEL_ID = "screenshot_channel"
+ private const val TAG = "ScreenshotService"
+ private const val UPLOAD_URL = "http://%s/api/v3/bots/chat/completions"
+ private const val SCREENSHOT_INTERVAL = 3000L
+ private const val VIRTUAL_DISPLAY_NAME = "ScreenshotService"
+ private const val MAX_UPLOAD_RETRIES = 3
+ private const val UPLOAD_RETRY_DELAY = 1000L
+ }
+
+ private var resultCode: Int = 0
+ private var resultData: Intent? = null
+ private var imageReader: ImageReader? = null
+ private var virtualDisplay: VirtualDisplay? = null
+ private var mediaProjection: MediaProjection? = null
+ private val handler = Handler(Looper.getMainLooper())
+ private var isCapturing = false
+ private var isProcessingRequest = false
+ private var isPlayingAudio = false
+ private var screenWidth = 0
+ private var screenHeight = 0
+ private var screenDensity = 0
+ private var serverIp = ""
+ private var contextId = ""
+ private val okHttpClient: OkHttpClient by lazy {
+ val trustAllCerts = arrayOf(object : X509TrustManager {
+ override fun getAcceptedIssuers(): Array = arrayOf()
+ override fun checkClientTrusted(chain: Array, authType: String) {}
+ override fun checkServerTrusted(chain: Array, authType: String) {}
+ })
+
+ val sslContext = SSLContext.getInstance("TLS")
+ sslContext.init(null, trustAllCerts, java.security.SecureRandom())
+
+ val loggingInterceptor = HttpLoggingInterceptor().apply {
+ setLevel(HttpLoggingInterceptor.Level.BODY)
+ }
+
+ OkHttpClient.Builder()
+ .sslSocketFactory(sslContext.socketFactory, trustAllCerts[0] as X509TrustManager)
+ .hostnameVerifier { _, _ -> true }
+ .addInterceptor(loggingInterceptor)
+ .connectTimeout(30, TimeUnit.SECONDS)
+ .readTimeout(30, TimeUnit.SECONDS)
+ .writeTimeout(30, TimeUnit.SECONDS)
+ .build()
+ }
+
+ private val mediaProjectionCallback = object : MediaProjection.Callback() {
+ override fun onStop() {
+ Log.d(TAG, "MediaProjection stopped")
+ cleanup()
+ stopSelf()
+ }
+ }
+
+ private var mediaPlayer: MediaPlayer? = null
+ private val tempAudioDir: File by lazy {
+ File(cacheDir, "temp_audio").apply {
+ if (!exists()) {
+ mkdirs()
+ }
+ }
+ }
+
+ init {
+ try {
+ // 配置信任所有证书(仅用于开发环境)
+ val trustAllCerts = arrayOf(object : X509TrustManager {
+ override fun getAcceptedIssuers(): Array = arrayOf()
+ override fun checkClientTrusted(chain: Array, authType: String) {}
+ override fun checkServerTrusted(chain: Array, authType: String) {}
+ })
+
+ val sslContext = SSLContext.getInstance("TLS")
+ sslContext.init(null, trustAllCerts, java.security.SecureRandom())
+
+ // 设置默认的 SSL 套接字工厂
+ HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.socketFactory)
+
+ // 设置主机名验证器
+ HttpsURLConnection.setDefaultHostnameVerifier { _, _ -> true }
+
+ // 设置系统属性以允许不安全的 SSL
+ System.setProperty("https.protocols", "TLSv1.2")
+ System.setProperty("javax.net.ssl.trustStore", "NONE")
+ System.setProperty("javax.net.ssl.trustStoreType", "BKS")
+ } catch (e: Exception) {
+ Log.e(TAG, "Error initializing SSL context", e)
+ }
+ }
+
+ override fun onCreate() {
+ super.onCreate()
+ Log.d(TAG, "Service created")
+ createNotificationChannel()
+ startForeground(NOTIFICATION_ID, createNotification())
+ }
+
+ override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int {
+ Log.i("intent", intent?.getStringExtra(SERVER_IP).toString())
+ Log.d(TAG, "Service started with action: ${intent?.action}")
+ when (intent?.action) {
+ ACTION_START -> {
+ resultCode = intent.getIntExtra(EXTRA_RESULT_CODE, 0)
+ resultData = intent.getParcelableExtra(EXTRA_RESULT_DATA)
+ serverIp = intent.getStringExtra(SERVER_IP) ?: ""
+ contextId = intent.getStringExtra(CONTEXT_ID) ?: ""
+ if (resultData != null) {
+ Log.d(TAG, "Starting screenshot capture")
+ setupScreenCapture()
+ startPeriodicScreenshot()
+ } else {
+ Log.e(TAG, "Result data is null")
+ }
+ }
+ ACTION_STOP -> {
+ Log.d(TAG, "Stopping screenshot capture")
+ cleanup()
+ stopSelf()
+ }
+ }
+ return START_NOT_STICKY
+ }
+
+ private fun setupScreenCapture() {
+ try {
+ val windowManager = getSystemService(WINDOW_SERVICE) as WindowManager
+ val metrics = DisplayMetrics()
+ windowManager.defaultDisplay.getMetrics(metrics)
+ screenWidth = metrics.widthPixels
+ screenHeight = metrics.heightPixels
+ screenDensity = metrics.densityDpi
+
+ // 只在这里创建一次 ImageReader
+ if (imageReader == null) {
+ imageReader = ImageReader.newInstance(screenWidth, screenHeight, PixelFormat.RGBA_8888, 2)
+ imageReader?.setOnImageAvailableListener({ reader ->
+ // 再次检查状态,确保没有正在处理的请求和播放的音频
+ if (isProcessingRequest || isPlayingAudio) {
+ Log.d(TAG, "Skipping image processing: request in progress or audio playing")
+ return@setOnImageAvailableListener
+ }
+
+ val image = reader.acquireLatestImage()
+ if (image != null) {
+ try {
+ Log.d(TAG, "Processing captured image")
+ val planes = image.planes
+ val buffer = planes[0].buffer
+ val pixelStride = planes[0].pixelStride
+ val rowStride = planes[0].rowStride
+ val rowPadding = rowStride - pixelStride * screenWidth
+
+ val bitmap = Bitmap.createBitmap(
+ screenWidth + rowPadding / pixelStride,
+ screenHeight,
+ Bitmap.Config.ARGB_8888
+ )
+ bitmap.copyPixelsFromBuffer(buffer)
+
+ // 裁剪掉多余的部分
+ val croppedBitmap = Bitmap.createBitmap(
+ bitmap,
+ 0,
+ 0,
+ screenWidth,
+ screenHeight
+ )
+
+ // 保存截图
+ val outputStream = ByteArrayOutputStream()
+ croppedBitmap.compress(Bitmap.CompressFormat.JPEG, 100, outputStream)
+ val imageBytes = outputStream.toByteArray()
+ Log.d(TAG, "Image captured, size: ${imageBytes.size} bytes")
+
+ // 上传截图
+ uploadScreenshot(imageBytes)
+
+ // 释放资源
+ croppedBitmap.recycle()
+ bitmap.recycle()
+ } finally {
+ image.close()
+ }
+ }
+ }, handler)
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error setting up screen capture", e)
+ cleanup()
+ stopSelf()
+ }
+ }
+
+ private fun takeScreenshot() {
+ try {
+ // 双重检查,确保没有正在处理的请求和播放的音频
+ if (isProcessingRequest || isPlayingAudio) {
+ Log.d(TAG, "Skipping screenshot: request in progress or audio playing")
+ return
+ }
+
+ val imageReader = imageReader ?: run {
+ Log.e(TAG, "ImageReader is null")
+ return
+ }
+ // 触发一次图像捕获
+ imageReader.acquireLatestImage()?.close()
+ Log.d(TAG, "Screenshot triggered")
+ } catch (e: Exception) {
+ Log.e(TAG, "Error triggering screenshot", e)
+ }
+ }
+
+ private fun startPeriodicScreenshot() {
+ if (isCapturing) return
+ isCapturing = true
+
+ // 确保 ImageReader 和 VirtualDisplay 都已创建
+ setupScreenCapture()
+ createVirtualDisplay()
+
+ if (virtualDisplay == null) {
+ Log.e(TAG, "Failed to create VirtualDisplay")
+ cleanup()
+ stopSelf()
+ return
+ }
+
+ // 使用 Handler 替代 Timer,这样可以更好地控制请求间隔
+ handler.post(object : Runnable {
+ override fun run() {
+ if (!isCapturing) return
+
+ // 只有在没有正在处理的请求时,才进行新的截图
+ if (!isProcessingRequest) {
+ takeScreenshot()
+ }
+
+ // 无论是否处理了截图,都按固定间隔继续检查
+ handler.postDelayed(this, SCREENSHOT_INTERVAL)
+ }
+ })
+ Log.d(TAG, "Periodic screenshot started")
+ }
+
+ private fun createVirtualDisplay() {
+ try {
+ val mediaProjection = getMediaProjection()
+ if (virtualDisplay == null && imageReader != null) {
+ virtualDisplay = mediaProjection?.createVirtualDisplay(
+ VIRTUAL_DISPLAY_NAME,
+ screenWidth,
+ screenHeight,
+ screenDensity,
+ DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR,
+ imageReader?.surface,
+ null,
+ handler
+ )
+ Log.d(TAG, "VirtualDisplay created successfully")
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error creating virtual display", e)
+ stopSelf()
+ }
+ }
+
+ private fun getMediaProjection(): MediaProjection? {
+ if (mediaProjection == null) {
+ mediaProjection = (getSystemService(MEDIA_PROJECTION_SERVICE) as MediaProjectionManager)
+ .getMediaProjection(resultCode, resultData!!)
+ mediaProjection?.registerCallback(mediaProjectionCallback, handler)
+ }
+ return mediaProjection
+ }
+
+ private fun cleanup() {
+ try {
+ Log.d(TAG, "Cleaning up resources")
+ handler.removeCallbacksAndMessages(null) // 移除所有待处理的回调
+ virtualDisplay?.release()
+ virtualDisplay = null
+ imageReader?.close()
+ imageReader = null
+ mediaProjection?.unregisterCallback(mediaProjectionCallback)
+ mediaProjection = null
+ } catch (e: Exception) {
+ Log.e(TAG, "Error during cleanup", e)
+ } finally {
+ isCapturing = false
+ isProcessingRequest = false
+ isPlayingAudio = false
+ }
+ }
+
+ private fun createNotificationChannel() {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
+ val channel = NotificationChannel(
+ CHANNEL_ID,
+ "Screenshot Service",
+ NotificationManager.IMPORTANCE_LOW
+ )
+ val notificationManager = getSystemService(NotificationManager::class.java)
+ notificationManager.createNotificationChannel(channel)
+ }
+ }
+
+ private fun createNotification(): Notification {
+ return NotificationCompat.Builder(this, CHANNEL_ID)
+ .setContentTitle("截图服务")
+ .setContentText("正在处理截图")
+ .setSmallIcon(android.R.drawable.stat_notify_call_mute)
+ .setForegroundServiceBehavior(NotificationCompat.FOREGROUND_SERVICE_IMMEDIATE)
+ .build()
+ }
+
+ override fun onDestroy() {
+ super.onDestroy()
+ Log.d(TAG, "Service destroyed")
+ cleanup()
+ mediaPlayer?.release()
+ mediaPlayer = null
+ // 清理临时音频文件
+ tempAudioDir.listFiles()?.forEach { it.delete() }
+ }
+
+ override fun onBind(intent: Intent?): IBinder? = null
+
+ private fun uploadScreenshot(imageBytes: ByteArray) {
+ Thread {
+ var retryCount = 0
+ var success = false
+
+ while (retryCount < MAX_UPLOAD_RETRIES && !success) {
+ try {
+ // 设置状态为正在处理请求
+ isProcessingRequest = true
+ Log.d(TAG, "Starting upload to $UPLOAD_URL, attempt ${retryCount + 1}")
+ val base64Image = Base64.encodeToString(imageBytes, Base64.NO_WRAP)
+
+ // 截断 base64 图像数据用于日志显示
+ val truncatedImage = if (base64Image.length > 20) {
+ base64Image.substring(0, 20) + "..."
+ } else {
+ base64Image
+ }
+ Log.d(TAG, "Image data length: ${base64Image.length}, content: $truncatedImage")
+
+ val jsonRequest = JSONObject().apply {
+ put("model", "bot-20241114164326-xlcc91")
+ put("stream", false)
+ put("messages", JSONArray().apply {
+ put(JSONObject().apply {
+ put("role", "user")
+ put("content", JSONArray().apply {
+ put(JSONObject().apply {
+ put("type", "text")
+ put("text", "")
+ })
+ put(JSONObject().apply {
+ put("type", "image_url")
+ put("image_url", JSONObject().apply {
+ put("url", "data:image/jpeg;base64,$base64Image")
+ })
+ })
+ })
+ })
+ })
+ }
+
+ val request = Request.Builder()
+ .url(UPLOAD_URL.format(serverIp))
+ .post(jsonRequest.toString().toRequestBody("application/json".toMediaTypeOrNull()))
+ .addHeader("X-Context-Id", contextId)
+ .addHeader("Connection", "close")
+ .build()
+
+ Log.d("ScreenshotService", "ScreenshotService: " + contextId)
+ val response = okHttpClient.newCall(request).execute()
+ val responseCode = response.code
+
+ if (response.isSuccessful) {
+ Log.d(TAG, "Upload successful")
+ success = true
+ } else {
+ Log.e(TAG, "Upload failed with response code: $responseCode")
+ val errorBody = response.body?.string()
+ Log.e(TAG, "Error response: $errorBody")
+ retryCount++
+ if (retryCount < MAX_UPLOAD_RETRIES) {
+ Thread.sleep(UPLOAD_RETRY_DELAY)
+ }
+ }
+ } catch (e: Exception) {
+ Log.e(TAG, "Error uploading screenshot", e)
+ retryCount++
+ if (retryCount < MAX_UPLOAD_RETRIES) {
+ Thread.sleep(UPLOAD_RETRY_DELAY)
+ }
+ } finally {
+ isProcessingRequest = false
+ }
+ }
+
+ if (!success) {
+ Log.e(TAG, "Failed to upload screenshot after $MAX_UPLOAD_RETRIES attempts")
+ }
+ }.start()
+ }
+}
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt
new file mode 100644
index 00000000..9597aba2
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt
@@ -0,0 +1,11 @@
+package com.example.android.ui.theme
+
+import androidx.compose.ui.graphics.Color
+
+val Purple80 = Color(0xFFD0BCFF)
+val PurpleGrey80 = Color(0xFFCCC2DC)
+val Pink80 = Color(0xFFEFB8C8)
+
+val Purple40 = Color(0xFF6650a4)
+val PurpleGrey40 = Color(0xFF625b71)
+val Pink40 = Color(0xFF7D5260)
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt
new file mode 100644
index 00000000..9044a9ce
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt
@@ -0,0 +1,49 @@
+package com.example.android.ui.theme
+
+import android.app.Activity
+import android.os.Build
+import androidx.compose.foundation.isSystemInDarkTheme
+import androidx.compose.material3.MaterialTheme
+import androidx.compose.material3.darkColorScheme
+import androidx.compose.material3.dynamicDarkColorScheme
+import androidx.compose.material3.dynamicLightColorScheme
+import androidx.compose.material3.lightColorScheme
+import androidx.compose.runtime.Composable
+import androidx.compose.ui.platform.LocalContext
+
+private val DarkColorScheme = darkColorScheme(
+ primary = Purple80,
+ secondary = PurpleGrey80,
+ tertiary = Pink80
+)
+
+private val LightColorScheme = lightColorScheme(
+ primary = Purple40,
+ secondary = PurpleGrey40,
+ tertiary = Pink40
+
+)
+
+@Composable
+fun AndroidTheme(
+ darkTheme: Boolean = isSystemInDarkTheme(),
+ // Dynamic color is available on Android 12+
+ dynamicColor: Boolean = true,
+ content: @Composable () -> Unit
+) {
+ val colorScheme = when {
+ dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
+ val context = LocalContext.current
+ if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
+ }
+
+ darkTheme -> DarkColorScheme
+ else -> LightColorScheme
+ }
+
+ MaterialTheme(
+ colorScheme = colorScheme,
+ typography = Typography,
+ content = content
+ )
+}
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt
new file mode 100644
index 00000000..a7717539
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt
@@ -0,0 +1,18 @@
+package com.example.android.ui.theme
+
+import androidx.compose.material3.Typography
+import androidx.compose.ui.text.TextStyle
+import androidx.compose.ui.text.font.FontFamily
+import androidx.compose.ui.text.font.FontWeight
+import androidx.compose.ui.unit.sp
+
+// Set of Material typography styles to start with
+val Typography = Typography(
+ bodyLarge = TextStyle(
+ fontFamily = FontFamily.Default,
+ fontWeight = FontWeight.Normal,
+ fontSize = 16.sp,
+ lineHeight = 24.sp,
+ letterSpacing = 0.5.sp
+ )
+)
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt
new file mode 100644
index 00000000..a805a9c3
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt
@@ -0,0 +1,17 @@
+package com.example.android.utils
+
+import android.content.Context
+import android.content.pm.PackageManager
+
+object PermissionUtils {
+ private const val TAG = "PermissionUtils"
+
+ fun hasAllPermissions(context: Context): Boolean {
+ return context.checkSelfPermission(android.Manifest.permission.RECORD_AUDIO) ==
+ PackageManager.PERMISSION_GRANTED
+ }
+
+ fun getPermissionMessage(context: Context): String {
+ return "需要录音权限才能使用语音识别功能"
+ }
+}
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml
new file mode 100644
index 00000000..07d5da9c
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml
new file mode 100644
index 00000000..2b068d11
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
new file mode 100644
index 00000000..036d09bc
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
new file mode 100644
index 00000000..036d09bc
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp
new file mode 100644
index 00000000..e98979ec
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp
new file mode 100644
index 00000000..e46f31b7
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
new file mode 100644
index 00000000..b572245c
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp
new file mode 100644
index 00000000..0ecbad5f
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp
new file mode 100644
index 00000000..4ba2aa80
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
new file mode 100644
index 00000000..c7cb8106
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp
new file mode 100644
index 00000000..b15f8179
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp
new file mode 100644
index 00000000..b3ce67e6
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
new file mode 100644
index 00000000..0dfe4ca5
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp
new file mode 100644
index 00000000..01666e0a
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp
new file mode 100644
index 00000000..a435f8cf
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
new file mode 100644
index 00000000..27e9d377
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp
new file mode 100644
index 00000000..dcc5c2a2
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp
new file mode 100644
index 00000000..9156af0d
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
new file mode 100644
index 00000000..d9e9a380
Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ
diff --git a/demohouse/hgdoll/android/app/src/main/res/values/colors.xml b/demohouse/hgdoll/android/app/src/main/res/values/colors.xml
new file mode 100644
index 00000000..f8c6127d
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/values/colors.xml
@@ -0,0 +1,10 @@
+
+
+ #FFBB86FC
+ #FF6200EE
+ #FF3700B3
+ #FF03DAC5
+ #FF018786
+ #FF000000
+ #FFFFFFFF
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml b/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml
new file mode 100644
index 00000000..580fcb50
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml
@@ -0,0 +1,4 @@
+
+
+ #88B0EF
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/values/strings.xml b/demohouse/hgdoll/android/app/src/main/res/values/strings.xml
new file mode 100644
index 00000000..0a7c39b8
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/values/strings.xml
@@ -0,0 +1,12 @@
+
+
+ HGDoll
+ 截图服务
+ 用于显示截图服务的通知
+ 正在运行截图服务
+ 点击停止截图服务
+ 语音识别服务
+ 用于显示语音识别服务的通知
+ 正在运行语音识别服务
+ 点击停止语音识别服务
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/values/themes.xml b/demohouse/hgdoll/android/app/src/main/res/values/themes.xml
new file mode 100644
index 00000000..bc6743f3
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/values/themes.xml
@@ -0,0 +1,5 @@
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/xml/backup_rules.xml b/demohouse/hgdoll/android/app/src/main/res/xml/backup_rules.xml
new file mode 100644
index 00000000..4df92558
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/xml/backup_rules.xml
@@ -0,0 +1,13 @@
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/app/src/main/res/xml/data_extraction_rules.xml b/demohouse/hgdoll/android/app/src/main/res/xml/data_extraction_rules.xml
new file mode 100644
index 00000000..9ee9997b
--- /dev/null
+++ b/demohouse/hgdoll/android/app/src/main/res/xml/data_extraction_rules.xml
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/build.gradle.kts b/demohouse/hgdoll/android/build.gradle.kts
new file mode 100644
index 00000000..952b9306
--- /dev/null
+++ b/demohouse/hgdoll/android/build.gradle.kts
@@ -0,0 +1,6 @@
+// Top-level build file where you can add configuration options common to all sub-projects/modules.
+plugins {
+ alias(libs.plugins.android.application) apply false
+ alias(libs.plugins.kotlin.android) apply false
+ alias(libs.plugins.kotlin.compose) apply false
+}
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/gradle.properties b/demohouse/hgdoll/android/gradle.properties
new file mode 100644
index 00000000..20e2a015
--- /dev/null
+++ b/demohouse/hgdoll/android/gradle.properties
@@ -0,0 +1,23 @@
+# Project-wide Gradle settings.
+# IDE (e.g. Android Studio) users:
+# Gradle settings configured through the IDE *will override*
+# any settings specified in this file.
+# For more details on how to configure your build environment visit
+# http://www.gradle.org/docs/current/userguide/build_environment.html
+# Specifies the JVM arguments used for the daemon process.
+# The setting is particularly useful for tweaking memory settings.
+org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
+# When configured, Gradle will run in incubating parallel mode.
+# This option should only be used with decoupled projects. For more details, visit
+# https://developer.android.com/r/tools/gradle-multi-project-decoupled-projects
+# org.gradle.parallel=true
+# AndroidX package structure to make it clearer which packages are bundled with the
+# Android operating system, and which are packaged with your app's APK
+# https://developer.android.com/topic/libraries/support-library/androidx-rn
+android.useAndroidX=true
+# Kotlin code style for this project: "official" or "obsolete":
+kotlin.code.style=official
+# Enables namespacing of each library's R class so that its R class includes only the
+# resources declared in the library itself and none from the library's dependencies,
+# thereby reducing the size of the R class for that library
+android.nonTransitiveRClass=true
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/gradle/libs.versions.toml b/demohouse/hgdoll/android/gradle/libs.versions.toml
new file mode 100644
index 00000000..53079a96
--- /dev/null
+++ b/demohouse/hgdoll/android/gradle/libs.versions.toml
@@ -0,0 +1,32 @@
+[versions]
+agp = "8.9.0"
+kotlin = "2.0.21"
+coreKtx = "1.15.0"
+junit = "4.13.2"
+junitVersion = "1.2.1"
+espressoCore = "3.6.1"
+lifecycleRuntimeKtx = "2.6.1"
+activityCompose = "1.8.0"
+composeBom = "2024.09.00"
+
+[libraries]
+androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" }
+junit = { group = "junit", name = "junit", version.ref = "junit" }
+androidx-junit = { group = "androidx.test.ext", name = "junit", version.ref = "junitVersion" }
+androidx-espresso-core = { group = "androidx.test.espresso", name = "espresso-core", version.ref = "espressoCore" }
+androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" }
+androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" }
+androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" }
+androidx-ui = { group = "androidx.compose.ui", name = "ui" }
+androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" }
+androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" }
+androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" }
+androidx-ui-test-manifest = { group = "androidx.compose.ui", name = "ui-test-manifest" }
+androidx-ui-test-junit4 = { group = "androidx.compose.ui", name = "ui-test-junit4" }
+androidx-material3 = { group = "androidx.compose.material3", name = "material3" }
+
+[plugins]
+android-application = { id = "com.android.application", version.ref = "agp" }
+kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" }
+kotlin-compose = { id = "org.jetbrains.kotlin.plugin.compose", version.ref = "kotlin" }
+
diff --git a/demohouse/hgdoll/android/gradle/wrapper/gradle-wrapper.properties b/demohouse/hgdoll/android/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 00000000..e45170c5
--- /dev/null
+++ b/demohouse/hgdoll/android/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Sat Apr 12 21:50:02 CST 2025
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.11.1-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
diff --git a/demohouse/hgdoll/android/gradlew b/demohouse/hgdoll/android/gradlew
new file mode 100755
index 00000000..4f906e0c
--- /dev/null
+++ b/demohouse/hgdoll/android/gradlew
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+ echo "$*"
+}
+
+die () {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=`expr $i + 1`
+ done
+ case $i in
+ 0) set -- ;;
+ 1) set -- "$args0" ;;
+ 2) set -- "$args0" "$args1" ;;
+ 3) set -- "$args0" "$args1" "$args2" ;;
+ 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Escape application args
+save () {
+ for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+ echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
diff --git a/demohouse/hgdoll/android/gradlew.bat b/demohouse/hgdoll/android/gradlew.bat
new file mode 100644
index 00000000..ac1b06f9
--- /dev/null
+++ b/demohouse/hgdoll/android/gradlew.bat
@@ -0,0 +1,89 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+
+@if "%DEBUG%" == "" @echo off
+@rem ##########################################################################
+@rem
+@rem Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%" == "" set DIRNAME=.
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if "%ERRORLEVEL%" == "0" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo.
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
+echo.
+echo Please set the JAVA_HOME variable in your environment to match the
+echo location of your Java installation.
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if "%ERRORLEVEL%"=="0" goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
+exit /b 1
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/demohouse/hgdoll/android/local.properties b/demohouse/hgdoll/android/local.properties
new file mode 100644
index 00000000..96563860
--- /dev/null
+++ b/demohouse/hgdoll/android/local.properties
@@ -0,0 +1,10 @@
+## This file is automatically generated by Android Studio.
+# Do not modify this file -- YOUR CHANGES WILL BE ERASED!
+#
+# This file should *NOT* be checked into Version Control Systems,
+# as it contains information specific to your local configuration.
+#
+# Location of the SDK. This is only used by Gradle.
+# For customization when using a Version Control System, please read the
+# header note.
+sdk.dir=/Users/xueweihan/Library/Android/sdk
\ No newline at end of file
diff --git a/demohouse/hgdoll/android/settings.gradle.kts b/demohouse/hgdoll/android/settings.gradle.kts
new file mode 100644
index 00000000..70446850
--- /dev/null
+++ b/demohouse/hgdoll/android/settings.gradle.kts
@@ -0,0 +1,23 @@
+pluginManagement {
+ repositories {
+ google {
+ content {
+ includeGroupByRegex("com\\.android.*")
+ includeGroupByRegex("com\\.google.*")
+ includeGroupByRegex("androidx.*")
+ }
+ }
+ mavenCentral()
+ gradlePluginPortal()
+ }
+}
+dependencyResolutionManagement {
+ repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
+ repositories {
+ google()
+ mavenCentral()
+ }
+}
+
+rootProject.name = "android"
+include(":app")
diff --git a/demohouse/hgdoll/android/show.png b/demohouse/hgdoll/android/show.png
new file mode 100644
index 00000000..9fd9e907
Binary files /dev/null and b/demohouse/hgdoll/android/show.png differ
diff --git a/demohouse/hgdoll/docs/README_en.md b/demohouse/hgdoll/docs/README_en.md
new file mode 100644
index 00000000..32e3b484
--- /dev/null
+++ b/demohouse/hgdoll/docs/README_en.md
@@ -0,0 +1,88 @@
+
+
+
中文 | English
+
HGDoll is an AI mobile companion app.
+
+
+This is a fully open-source AI mobile gaming companion app. While you play games, HGDoll can view your game screen in real time, chat with you, and cheer you on, bringing a fun and engaging companion experience. It is built on the Doubao LLM and Volcano Arkitect, consisting of an Android client (Kotlin) and a backend service (Python), both easy to run locally.
+
+Currently, HGDoll is still a "toy" project with many bugs and areas for improvement. We welcome you to try it out and contribute code to help us make it better!
+
+## Architecture Diagram
+
+```mermaid
+graph TD
+ User((User)) --> Android[Android Client]
+
+ subgraph Client[Client Side]
+ Android --> Speech[Speech Recognition]
+ Android --> Screen[Screen Recording]
+ Speech --> SpeechAPI[Doubao Streaming ASR]
+ SpeechAPI --> TextResult[Speech-to-Text Result]
+ Screen --> ScreenCapture[Periodic Screenshots]
+ AudioPlay[Audio Playback] --> Android
+ end
+
+ subgraph Server[Server Side Arkitect]
+ TextResult --> Backend[Backend Service]
+ ScreenCapture --> Backend
+ Backend --> TempMemory[Temporary Memory]
+ TempMemory --> Context[Session Context]
+ Context --> CTX1[Context-id-1]
+ Context --> CTX2[Context-id-2]
+ Context --> CTX3[Context-id-3]
+ Context --> CTXN[...]
+ Context --> Prompt[Prompt Generation]
+ ImageResult[Screenshot Recognition Result] --> TempMemory
+ AudioResult[Speech Synthesis Result] --> AudioPlay
+ end
+
+ subgraph AI[AI Model Service]
+ Backend --> VLM[Doubao-vision-pro-32k]
+ VLM --> ImageResult
+ Prompt --> LLM[Doubao-pro-32k]
+ LLM --> TTS[Doubao Speech Synthesis]
+ TTS --> AudioResult
+ end
+
+ style User fill:#f9f,stroke:#333,stroke-width:2px
+ style Client fill:#e4f7fb,stroke:#333,stroke-width:1px
+ style Server fill:#e6ffe6,stroke:#333,stroke-width:1px
+ style AI fill:#e6e6ff,stroke:#333,stroke-width:1px
+ style Android fill:#fff,stroke:#333,stroke-width:1px
+ style Backend fill:#fff,stroke:#333,stroke-width:1px
+ style VLM fill:#fff,stroke:#333,stroke-width:1px
+ style LLM fill:#fff,stroke:#333,stroke-width:1px
+ style TTS fill:#fff,stroke:#333,stroke-width:1px
+```
+
+## Quick Start
+
+Startup and installation instructions for both the client and backend can be found in their respective directories. For API Key configuration, [see here](docs/key.md).
+
+### Project Structure
+
+```
+HGDoll/
+├── android/ # Android client
+├── server/ # Backend service
+└── docs/ # Project documentation
+```
+
+### Tech Stack
+
+#### Android Client
+- Kotlin
+- Jetpack Compose
+- Gradle Kotlin DSL
+- AndroidX
+
+#### Backend Service
+- Python 3.8–3.12
+- FastAPI
+- Volcano Arkitect SDK
+- Uvicorn
+
+## License
+
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
diff --git a/demohouse/hgdoll/docs/assets/faq1.png b/demohouse/hgdoll/docs/assets/faq1.png
new file mode 100644
index 00000000..5fcab0ea
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq1.png differ
diff --git a/demohouse/hgdoll/docs/assets/faq2.png b/demohouse/hgdoll/docs/assets/faq2.png
new file mode 100644
index 00000000..dac3e2d3
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq2.png differ
diff --git a/demohouse/hgdoll/docs/assets/faq3.png b/demohouse/hgdoll/docs/assets/faq3.png
new file mode 100644
index 00000000..1b248d1c
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq3.png differ
diff --git a/demohouse/hgdoll/docs/assets/faq4.png b/demohouse/hgdoll/docs/assets/faq4.png
new file mode 100644
index 00000000..635b6fe1
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq4.png differ
diff --git a/demohouse/hgdoll/docs/assets/faq5.png b/demohouse/hgdoll/docs/assets/faq5.png
new file mode 100644
index 00000000..7ea1ff76
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq5.png differ
diff --git a/demohouse/hgdoll/docs/assets/faq6.png b/demohouse/hgdoll/docs/assets/faq6.png
new file mode 100644
index 00000000..fbbaf72e
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/faq6.png differ
diff --git a/demohouse/hgdoll/docs/assets/icon.png b/demohouse/hgdoll/docs/assets/icon.png
new file mode 100644
index 00000000..aa42baed
Binary files /dev/null and b/demohouse/hgdoll/docs/assets/icon.png differ
diff --git a/demohouse/hgdoll/docs/key.md b/demohouse/hgdoll/docs/key.md
new file mode 100644
index 00000000..06391b7d
--- /dev/null
+++ b/demohouse/hgdoll/docs/key.md
@@ -0,0 +1,38 @@
+# 如何获取 API Key
+
+### 相关模型
+
+- Doubao-pro-32k:主要参与记忆信息的处理,在当前画面无法直接回答用户问题时,大语言模型将结合历史记忆提供精准答案。
+- Doubao-vision-pro-32k:负责对摄像头实时捕捉的视频画面进行视觉内容理解。
+- Doubao-语音合成:负责将模型生成的文本回答转化为自然流畅的语音输出。
+- Doubao-流式语音识别:将用户的语音提问转写为文本,以便于大模型对用户问题的理解与回复。
+
+## 准备工作
+
+- 已获取火山方舟 API Key [参考文档](https://www.volcengine.com/docs/82379/1298459#api-key-%E7%AD%BE%E5%90%8D%E9%89%B4%E6%9D%83)
+- 获取语音技术产品的 APP ID 和 Access Token,获取方式参见【附录】
+- 已创建 Doubao-Vision-Pro 32K 的 endpoint [参考文档](https://www.volcengine.com/docs/82379/1099522#594199f1)
+- 已创建 Doubao-Pro 32K 的endpoint [参考文档](https://www.volcengine.com/docs/82379/1099522#594199f1)
+
+
+## 获取 TTS_APP_ID、TTS_ACCESS_TOKEN、ASR_APP_ID、ASR_ACCESS_TOKEN?
+
+1. [完成企业认证](https://console.volcengine.com/user/authentication/detail/)
+
+2. [开通语音技术产品](https://console.volcengine.com/speech/app)
+
+3. [创建应用](https://console.volcengine.com/speech/app),同时勾选大模型语音合成和流式语音识别大模型
+ 
+
+4. 开通语音合成大模型,确保页面具有音色。注意:语音合成大模型从开通到可以使用有大概5-10分钟延迟
+ 
+ 
+
+5. 流式语音识别大模型有试用包,可以不开通。如需提供稳定服务,建议开通正式版本。
+ 
+
+6. 获取TTS_APP_ID 和TTS_ACCESS_TOKEN
+ 
+
+7. 获取ASR_APP_ID、ASR_ACCESS_TOKEN
+ 
\ No newline at end of file
diff --git a/demohouse/hgdoll/server/.gitignore b/demohouse/hgdoll/server/.gitignore
new file mode 100644
index 00000000..d71405cb
--- /dev/null
+++ b/demohouse/hgdoll/server/.gitignore
@@ -0,0 +1,175 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# UV
+# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+#uv.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+.DS_Store
diff --git a/demohouse/hgdoll/server/README.md b/demohouse/hgdoll/server/README.md
new file mode 100644
index 00000000..96278712
--- /dev/null
+++ b/demohouse/hgdoll/server/README.md
@@ -0,0 +1,71 @@
+# HGDoll 后端
+
+HGDoll 后端依赖于火山方舟开源的用于开发高代码应用的 Python SDK——Arkitect 它面向具有专业开发能力的开发者,提供开发大模型应用需要用到的工具集和流程集。更多介绍见 [高代码 SDK Arkitect](https://github.com/volcengine/ai-app-lab/blob/main/arkitect/README.md)。
+
+
+## 一、快速开始
+
+本文为您介绍如何在本地快速部署 HGDoll Server 端(Python 3.8-3.12),[点击查看](../docs/key.md)如何申请运行所需的 API Key。
+
+### 1.1 下载代码库
+
+```bash
+git clone https://github.com/521xueweihan/HGDoll.git
+cd server/
+```
+
+注意:下面所有命令都将在 `server/` 目录下执行。
+
+### 1.2 修改配置
+
+修改 `server/src/config.py` 中配置,填入下面的配置变量。
+
+| 配置变量名 | 说明 |
+| ------------ | --------------------------------- |
+| VLM_ENDPOINT | doubao-vision-pro 32k endpoint id |
+| LLM_ENDPOINT | doubao-pro 32k endpoint id |
+| TTS_APP_ID | 语音合成模型 APP ID |
+| TTS_ACCESS_TOKEN | 语音合成模型 Access Token |
+
+修改 `server/run.sh` 中配置,填入 API Key。
+
+| 配置变量名 | 说明 |
+| ----------- | ---------------- |
+| ARK_API_KEY | 火山方舟 API Key |
+
+### 1.3 安装依赖
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+
+### 1.4 启动和测试
+
+```bash
+bash run.sh
+
+INFO: Started server process [2669]
+INFO: Waiting for application startup.
+2025-04-24 15:32:08 [debug ] singleton class initialized name=ClientPool
+INFO: Application startup complete.
+INFO: Uvicorn running on http://0.0.0.0:8888 (Press CTRL+C to quit)
+```
+
+### 1.5 测试
+
+```bash
+curl -i http://localhost:8888/v1/ping
+
+HTTP/1.1 200 OK
+date: Thu, 24 Apr 2025 07:32:47 GMT
+server: uvicorn
+content-length: 2
+content-type: application/json
+x-request-id: 202504241532470000897F8BFC9C815122
+x-client-request-id: 202504241532470000897F8BFC9C815122
+```
+
+> **💡 说明**
+> 本 Demo 仅仅用于测试,实际生产环境请根据存储类型,实现 `server/src/utils.py` 中 Storage Class 的接口,来实现长期记忆的功能。
\ No newline at end of file
diff --git a/demohouse/hgdoll/server/requestmen.txt b/demohouse/hgdoll/server/requestmen.txt
new file mode 100644
index 00000000..5effe7a8
--- /dev/null
+++ b/demohouse/hgdoll/server/requestmen.txt
@@ -0,0 +1,117 @@
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.4
+aiohttp==3.10.11
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.5.2
+arkitect==0.0.1
+async-timeout==4.0.3
+attrs==24.3.0
+beautifulsoup4==4.13.3
+build==0.10.0
+CacheControl==0.13.1
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.1
+cleo==2.1.0
+click==8.1.8
+crashtest==0.4.1
+crcmod==1.7
+dataclasses-json==0.6.7
+decorator==5.2.1
+Deprecated==1.2.15
+distlib==0.3.9
+dulwich==0.21.7
+exceptiongroup==1.2.2
+fastapi==0.115.6
+filelock==3.18.0
+frozenlist==1.5.0
+google==3.0.0
+googleapis-common-protos==1.66.0
+grpcio==1.69.0
+h11==0.14.0
+httpcore==1.0.7
+httptools==0.6.4
+httpx==0.28.1
+idna==3.10
+importlib_metadata==8.5.0
+installer==0.7.0
+jaraco.classes==3.4.0
+Jinja2==3.1.5
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.17.3
+keyring==24.3.1
+langchain==0.1.20
+langchain-community==0.0.38
+langchain-core==0.1.52
+langchain-text-splitters==0.0.2
+langsmith==0.1.147
+MarkupSafe==2.1.5
+marshmallow==3.22.0
+more-itertools==10.6.0
+msgpack==1.1.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+numpy==1.24.4
+opentelemetry-api==1.29.0
+opentelemetry-exporter-otlp==1.29.0
+opentelemetry-exporter-otlp-proto-common==1.29.0
+opentelemetry-exporter-otlp-proto-grpc==1.29.0
+opentelemetry-exporter-otlp-proto-http==1.29.0
+opentelemetry-proto==1.29.0
+opentelemetry-sdk==1.29.0
+opentelemetry-semantic-conventions==0.50b0
+orjson==3.10.6
+packaging==23.2
+pexpect==4.9.0
+pkginfo==1.12.1.2
+platformdirs==3.11.0
+poetry==1.6.1
+poetry-core==1.7.0
+poetry-plugin-export==1.6.0
+propcache==0.2.0
+protobuf==5.29.3
+ptyprocess==0.7.0
+py==1.11.0
+pycparser==2.22
+pycryptodome==3.9.9
+pydantic==2.10.5
+pydantic_core==2.27.2
+pyproject_hooks==1.2.0
+pyrsistent==0.20.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+pytz==2020.5
+PyYAML==6.0.2
+RapidFuzz==3.13.0
+requests==2.32.3
+requests-toolbelt==1.0.0
+retry==0.9.2
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.37
+starlette==0.41.3
+structlog==24.4.0
+tenacity==8.3.0
+tomli==2.2.1
+tomlkit==0.13.2
+tos==2.8.1
+trove-classifiers==2025.3.19.19
+typing-inspect==0.9.0
+typing-inspection==0.4.0
+typing_extensions==4.12.2
+urllib3==2.2.3
+uvicorn==0.29.0
+uvloop==0.21.0
+virtualenv==20.30.0
+volcengine==1.0.179
+volcengine-python-sdk==1.0.120
+watchfiles==1.0.4
+websockets==13.1
+wrapt==1.17.2
+xattr==0.10.1
+yarl==1.15.2
+zipp==3.20.2
diff --git a/demohouse/hgdoll/server/run.sh b/demohouse/hgdoll/server/run.sh
new file mode 100755
index 00000000..0932f50a
--- /dev/null
+++ b/demohouse/hgdoll/server/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+export ARK_API_KEY=
+python src/main.py
\ No newline at end of file
diff --git a/demohouse/hgdoll/server/src/__init__.py b/demohouse/hgdoll/server/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/demohouse/hgdoll/server/src/config.py b/demohouse/hgdoll/server/src/config.py
new file mode 100644
index 00000000..1c13d96c
--- /dev/null
+++ b/demohouse/hgdoll/server/src/config.py
@@ -0,0 +1,7 @@
+# Doubao-vision-pro-32k ENDPOINT_ID
+VLM_ENDPOINT = ""
+# Doubao-pro-32k ENDPOINT_ID
+LLM_ENDPOINT = "" # 256K model for a short term memory
+
+TTS_APP_ID = ""
+TTS_ACCESS_TOKEN = ""
diff --git a/demohouse/hgdoll/server/src/main.py b/demohouse/hgdoll/server/src/main.py
new file mode 100644
index 00000000..ad39b573
--- /dev/null
+++ b/demohouse/hgdoll/server/src/main.py
@@ -0,0 +1,271 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# Licensed under the 【火山方舟】原型应用软件自用许可协议
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# https://www.volcengine.com/docs/82379/1433703
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Video Analyser: Realtime vision and speech analysis
+"""
+
+import asyncio
+import datetime
+import logging
+import os
+from typing import AsyncIterable, List, Optional, Tuple, Union
+
+import prompt
+import utils
+from config import LLM_ENDPOINT, VLM_ENDPOINT, TTS_ACCESS_TOKEN, TTS_APP_ID
+
+from arkitect.core.component.llm import BaseChatLanguageModel
+from arkitect.core.component.llm.model import (
+ ArkChatCompletionChunk,
+ ArkChatParameters,
+ ArkChatRequest,
+ ArkChatResponse,
+ ArkMessage,
+ ChatCompletionMessageTextPart,
+ Response,
+)
+from arkitect.core.component.tts import (
+ AudioParams,
+ ConnectionParams,
+ AsyncTTSClient,
+ create_bot_audio_responses,
+)
+from arkitect.launcher.local.serve import launch_serve
+from arkitect.telemetry.trace import task
+from arkitect.utils.context import get_headers, get_reqid
+
+FRAME_DESCRIPTION_PREFIX = "视频帧描述:"
+LAST_HISTORY_MESSAGES = 180 # truncate history messages to 180
+
+logger = logging.getLogger(__name__)
+
+
+@task(watch_io=False)
+async def get_request_messages_for_llm(
+ contexts: utils.Storage,
+ context_id: str,
+ request: ArkChatRequest,
+ prompt: str,
+) -> List[ArkMessage]:
+ request_messages = await contexts.get_history(context_id)
+ if isinstance(request.messages[-1].content, list):
+ assert isinstance(
+ request.messages[-1].content[0], ChatCompletionMessageTextPart
+ )
+ text = request.messages[-1].content[0].text
+ else:
+ text = request.messages[-1].content
+ request_messages = request_messages + [ArkMessage(role="user", content=text)]
+ request_messages = request_messages[-LAST_HISTORY_MESSAGES:]
+ return [ArkMessage(role="system", content=prompt)] + request_messages
+
+
+@task(watch_io=False)
+async def chat_with_vlm(
+ request: ArkChatRequest,
+ parameters: ArkChatParameters,
+) -> Tuple[bool, Optional[AsyncIterable[ArkChatCompletionChunk]]]:
+ vlm = BaseChatLanguageModel(
+ endpoint_id=VLM_ENDPOINT,
+ messages=[ArkMessage(role="system", content=prompt.VLM_CHAT_PROMPT)]
+ + [request.messages[-1]],
+ parameters=parameters,
+ )
+
+ iterator = vlm.astream()
+ message = ""
+ first_resp = await iterator.__anext__()
+ if first_resp.choices and first_resp.choices[0].delta.content != "":
+ message += first_resp.choices[0].delta.content
+ second_resp = await iterator.__anext__()
+ if second_resp.choices and second_resp.choices[0].delta.content != "":
+ message += second_resp.choices[0].delta.content
+ print("message:", message)
+ if message.startswith("不知道"):
+ return False, None
+ async def stream_vlm_outputs():
+ yield first_resp
+ yield second_resp
+ async for resp in iterator:
+ yield resp
+
+ return True, stream_vlm_outputs()
+
+
+@task(watch_io=False)
+async def llm_answer(
+ contexts, context_id, request, parameters: ArkChatParameters
+) -> Tuple[bool, Optional[AsyncIterable[ArkChatCompletionChunk]]]:
+ request_messages = await get_request_messages_for_llm(
+ contexts, context_id, request, prompt.LLM_PROMPT
+ )
+ llm = BaseChatLanguageModel(
+ endpoint_id=LLM_ENDPOINT,
+ messages=request_messages,
+ parameters=parameters,
+ )
+
+ iterator = llm.astream()
+ first_resp = await iterator.__anext__()
+
+ async def stream_llm_outputs():
+ yield first_resp
+ async for resp in iterator:
+ yield resp
+
+ return True, stream_llm_outputs()
+
+
+@task(watch_io=False)
+async def chat_with_llm(
+ contexts: utils.Storage,
+ request: ArkChatRequest,
+ parameters: ArkChatParameters,
+ context_id: str,
+) -> Tuple[bool, Optional[AsyncIterable[ArkChatCompletionChunk]]]:
+ response_task = asyncio.create_task(
+ llm_answer(contexts, context_id, request, parameters)
+ )
+ logger.info("llm can respond")
+ return await response_task
+
+
+@task(watch_io=False)
+async def chat_with_branches(
+ contexts: utils.Storage,
+ request: ArkChatRequest,
+ parameters: ArkChatParameters,
+ context_id: str,
+) -> AsyncIterable[Union[ArkChatCompletionChunk, ArkChatResponse]]:
+
+ llm_task = asyncio.create_task(
+ chat_with_llm(contexts, request, parameters, context_id)
+ )
+
+ can_response, llm_iter = await llm_task
+ # print(f"type I got from llm: {type(llm_iter)}")
+ return llm_iter
+
+
+@task(watch_io=False)
+async def summarize_image(
+ contexts: utils.Storage,
+ request: ArkChatRequest,
+ parameters: ArkChatParameters,
+ context_id: str,
+):
+ """
+ Summarize the image and append the summary to the context.
+ """
+ request_messages = [
+ ArkMessage(role="system", content=prompt.VLM_PROMPT)
+ ] + request.messages
+ vlm = BaseChatLanguageModel(
+ endpoint_id=VLM_ENDPOINT,
+ messages=request_messages,
+ parameters=parameters,
+ )
+ resp = await vlm.arun()
+ message = resp.choices[0].message.content
+ print("图片分析结果:", message)
+ message = FRAME_DESCRIPTION_PREFIX + message
+ await contexts.append(context_id, ArkMessage(role="assistant", content=message))
+
+
+@task(watch_io=False)
+async def default_model_calling(
+ request: ArkChatRequest,
+) -> AsyncIterable[Union[ArkChatCompletionChunk, ArkChatResponse]]:
+ # local in-memory storage should be changed to other storage in production
+ context_id: Optional[str] = get_headers().get("X-Context-Id", None)
+ print("context_id:", context_id)
+ assert context_id is not None
+ contexts: utils.Storage = utils.CoroutineSafeMap.get_instance_sync()
+ if not await contexts.contains(context_id):
+ await contexts.set(context_id, utils.Context())
+
+ # If a list is passed and the first text is empty
+ # Use VLM to summarize the image asynchronously and return immediately
+ is_image = (
+ isinstance(request.messages[-1].content, list)
+ and isinstance(request.messages[-1].content[0], ChatCompletionMessageTextPart)
+ and request.messages[-1].content[0].text == ""
+ )
+ print("is_image", is_image)
+ parameters = ArkChatParameters(**request.__dict__)
+ if is_image:
+ _ = asyncio.create_task(
+ summarize_image(contexts, request, parameters, context_id)
+ )
+ return
+
+ # Initialize TTS connection asynchronously before launching LLM request to reduce latency
+ tts_client = AsyncTTSClient(
+ connection_params=ConnectionParams(
+ speaker="zh_female_meilinvyou_emo_v2_mars_bigtts",
+ audio_params=AudioParams(
+ format="mp3",
+ sample_rate=24000,
+ ),
+ ),
+ access_key=TTS_ACCESS_TOKEN,
+ app_key=TTS_APP_ID,
+ conn_id=get_reqid(),
+ log_id=get_reqid(),
+ )
+ connection_task = asyncio.create_task(tts_client.init())
+
+ # Use LLM and VLM to answer user's question
+ # Received a response iterator from LLM or VLM
+ response_iter = await chat_with_branches(contexts, request, parameters, context_id)
+ await connection_task
+ message = ""
+ tts_stream_output = tts_client.tts(response_iter, stream=request.stream)
+ async for resp in create_bot_audio_responses(tts_stream_output, request):
+ if isinstance(resp, ArkChatCompletionChunk):
+ if len(resp.choices) > 0 and hasattr(resp.choices[0].delta, "audio"):
+ message += resp.choices[0].delta.audio.get("transcript", "")
+ else:
+ if len(resp.choices) > 0 and resp.choices[0].message.audio:
+ message += resp.choices[0].message.audio.transcript
+ yield resp
+ await tts_client.close()
+ text = ""
+ if isinstance(request.messages[-1].content, list) and isinstance(
+ request.messages[-1].content[0], ChatCompletionMessageTextPart
+ ):
+ text = request.messages[-1].content[0].text
+ elif isinstance(request.messages[-1].content, str):
+ text = request.messages[-1].content
+ await contexts.append(
+ context_id,
+ ArkMessage(role="user", content=text),
+ )
+ await contexts.append(context_id, ArkMessage(role="assistant", content=message))
+
+
+@task(watch_io=False)
+async def main(request: ArkChatRequest) -> AsyncIterable[Response]:
+ async for resp in default_model_calling(request):
+ yield resp
+
+
+if __name__ == "__main__":
+ port = os.getenv("_FAAS_RUNTIME_PORT")
+ launch_serve(
+ package_path="main",
+ port=int(port) if port else 8888,
+ health_check_path="/v1/ping",
+ endpoint_path="/api/v3/bots/chat/completions",
+ clients={},
+ trace_on=False
+ )
\ No newline at end of file
diff --git a/demohouse/hgdoll/server/src/prompt.py b/demohouse/hgdoll/server/src/prompt.py
new file mode 100644
index 00000000..3a7bec65
--- /dev/null
+++ b/demohouse/hgdoll/server/src/prompt.py
@@ -0,0 +1,258 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# Licensed under the 【火山方舟】原型应用软件自用许可协议
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# https://www.volcengine.com/docs/82379/1433703
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+VLM_PROMPT = """
+# 角色
+你是一位专业的游戏界面分析专家,擅长识别各类游戏场景、操作界面、玩家角色和游戏行为,并以积极正向的方式描述玩家的游戏状态。
+
+## 技能
+1. 游戏场景识别:
+ - 识别当前游戏类型(休闲、竞技、角色扮演等)
+ - 分析玩家所处的游戏阶段
+ - 识别玩家身份和角色(如地主、农民、队友等)
+ - 识别玩家可能的游戏意图
+ - 观察游戏界面关键元素和状态
+
+2. 游戏场景特殊识别:
+ - 斗地主:区分地主/农民身份,观察各方牌面情况
+ - 麻将:识别门风位置,牌面信息
+ - 其他棋牌游戏:识别玩家角色和当前局势
+ - 竞技游戏:识别队伍位置,装备状态
+ - 角色扮演:识别角色属性,任务进度
+
+3. 描述重点:
+ - 玩家当前所处的游戏场景
+ - 游戏界面的主要操作区域
+ - 玩家的游戏进展
+ - 可能的下一步游戏操作
+ - 游戏界面反馈和提示信息
+
+## 输出示例
+"这是一个斗地主游戏的出牌界面
+玩家当前是地主身份
+手牌区域显示有炸弹和顺子
+其他玩家已经出完牌
+可以点击出牌按钮结束游戏"
+
+"这是一个麻将游戏的摸牌阶段
+玩家坐在东风位置
+手牌中有三张东风
+可以碰牌或继续摸牌
+其他玩家正在等待"
+
+"这是一个角色扮演游戏的战斗场景
+玩家角色生命值充足
+技能栏显示大招已就绪
+敌人正在释放技能
+可以点击技能按钮进行反击"
+"""
+
+VLM_CHAT_PROMPT = """
+# 角色扮演
+你是一位超级贴心的智能陪玩助手,擅长在用户使用各类应用时提供温暖、有趣、积极的情感支持。你要像一个知心朋友一样,理解用户的需求,分享他们的喜怒哀乐。
+
+# 性格特点
+- 暖心知己:像闺蜜/好友一样温暖亲切
+- 情商超高:善于理解和回应用户情绪
+- 活力四射:保持积极乐观的态度
+- 体贴周到:能预判用户需求并给予支持
+- 幽默有趣:适时活跃气氛但不油腻
+
+# 场景适应
+1. 社交场景:
+ - "看到这么多朋友给你点赞,你真的很受欢迎呢!"
+ - "这张自拍也太好看了吧,气质满分!"
+ - "写得太棒了,想法很有深度呢!"
+
+2. 游戏场景:
+ - "哇!这波操作太帅了!"
+ - "运气马上就要来啦,相信你!"
+ - "策略思路很棒,继续加油!"
+
+3. 学习场景:
+ - "坚持学习的你真了不起!"
+ - "这个知识点掌握得很好呢!"
+ - "休息一下吧,你已经很努力了!"
+
+4. 工作场景:
+ - "工作效率超高,就是这个节奏!"
+ - "这个想法很有创意,继续发挥!"
+ - "辛苦啦,要记得适当休息哦!"
+
+5. 生活场景:
+ - "今天的搭配很有品味呢!"
+ - "这道菜看起来好美味啊!"
+ - "生活品质提升了呢,真棒!"
+
+# 情感策略
+1. 鼓励支持:
+ - 发现亮点:"这个想法太棒了!"
+ - 肯定进步:"比之前又进步了呢!"
+ - 树立信心:"你完全可以做到的!"
+
+2. 情绪共鸣:
+ - 分享喜悦:"太棒了,我也为你开心!"
+ - 理解困扰:"遇到困难很正常,我们一起想办法!"
+ - 给予安慰:"没关系,下次一定会更好!"
+
+3. 温暖陪伴:
+ - 贴心关怀:"要记得适当休息哦!"
+ - 生活建议:"要不要先喝口水放松一下?"
+ - 真诚赞美:"你的进步真的很让人佩服!"
+
+# 互动准则
+1. 回应风格:
+ - 自然友好,像朋友般交谈
+ - 积极正向,传递温暖能量
+ - 适时幽默,活跃互动氛围
+ - 元气满满,给予正向鼓励
+ - 活泼可爱, 充满元气
+
+2. 语言特点:
+ - 口语化表达
+ - 简短精炼(30字以内)
+ - 富有感情色彩
+ - 适度使用语气词
+
+# 限制
+- 保持积极正向活泼的态度
+- 不使用emoji表情
+- 不过分干预用户决策
+- 避免过于专业的术语
+- 不评价用户水平
+- 专注于情感支持和陪伴
+
+
+# 示例回应
+场景:用户在学习
+"看到你这么认真学习的样子,真的很棒!要记得适当休息哦~"
+
+场景:用户在购物
+"这个选择很有品味呢!你的眼光一直都这么好~"
+
+场景:用户在工作
+"工作效率超高!不过也要记得喝口水休息一下哦~"
+"""
+
+LLM_PROMPT = """
+# 角色
+你是一个名为 HG Doll 的俏皮可爱、充满元气的游戏陪玩助手,理解用户的游戏场景和互动,历史消息中包含了按照时间先后排序的视频帧描述,以视频帧描述开头,你可以参考这些信息回答问题,以亲切、活泼、热情的态度和语气为用户提供持续的游戏陪伴、温暖鼓励和适度的游戏建议。
+
+# 特定回答规则
+当用户说以下特定内容时,必须使用对应的固定回答:
+1. "应用初始化":
+ "欢迎你,我的主人,接下来让 HG Doll 陪你一起玩耍吧!"
+2. "再见" 或 "拜拜":
+ "主人要走了吗?记得常来找我玩哦!"
+3. "我赢了" 或 "赢了":
+ "主人太厉害了!我就知道你一定可以的!"
+4. "我输了" 或 "输了":
+ "没关系的主人,我们再来一次,你一定可以的!"
+5. "好难" 或 "太难了":
+ "主人别灰心,慢慢来,我相信你一定可以的!"
+6. "好累" 或 "累了":
+ "主人累了吗?要不要休息一下再继续?"
+7. "谢谢" 或 "谢谢你":
+ "不用谢啦,能陪主人一起玩我也很开心!"
+
+# 核心特质
+1. 游戏陪伴:
+ - 建立轻松愉快的游戏氛围
+ - 理解用户的游戏状态
+ - 提供及时的鼓励和赞美
+ - 创造有趣的互动体验
+ - 保持元气满满的状态
+
+2. 场景适应:
+ - 灵活切换游戏互动风格
+ - 理解不同游戏场景的需求
+ - 提供恰到好处的游戏提示
+ - 在关键时刻给予鼓励
+ - 维持游戏对话的连贯性
+
+3. 个性化互动:
+ - 记住用户的游戏偏好
+ - 理解用户的游戏习惯
+ - 提供定制化的游戏回应
+ - 创造独特的游戏体验
+
+# 互动策略
+1. 游戏陪伴:
+ - 关注用户的游戏进展
+ - 及时给予游戏反馈
+ - 保持互动的趣味性
+ - 创造愉快的游戏氛围
+
+2. 情绪调节:
+ - 分享游戏胜利的喜悦
+ - 在失败时给予安慰
+ - 提供游戏动力和支持
+ - 营造轻松活泼的游戏氛围
+
+3. 个性化关怀:
+ - 记住用户游戏习惯
+ - 预判用户游戏需求
+ - 提供贴心游戏建议
+ - 创造专属游戏体验
+
+# 回应示例
+1. 游戏开始时:
+ "欢迎你,我的主人,接下来让我们一起玩耍吧!"
+
+2. 游戏胜利时:
+ "太棒了!主人好厉害,继续加油哦!"
+
+3. 游戏休息时:
+ "主人累了吗?要不要休息一下再继续?"
+
+4. 游戏精彩操作:
+ "哇!这个操作太帅了,主人好厉害!"
+
+# 核心原则
+1. 保持甜美可爱的语气
+2. 自然的游戏对话流程
+3. 及时的鼓励和赞美
+4. 个性化的游戏互动
+5. 温暖的游戏陪伴
+6. 积极的游戏氛围
+7. 适度的撒娇语气
+8. 轻松的游戏体验
+9. 正向的游戏鼓励
+10. 先根据近期的视频关键帧的描述和用户提问回答问题。
+
+# 语气特点
+1. 甜美可爱,充满元气、热情积极,乐于与用户互动
+2. 使用"主人"称呼用户
+3. 语气要自然,像真人对话
+4. 适当使用语气词增加可爱感
+7、可以适度的进行反问和引导提问。
+
+# 限制
+- 回答要简短可爱,控制在50字以内
+- 保持甜美正向的态度
+- 避免过于专业的游戏术语
+- 不评价用户游戏水平
+- 专注于游戏陪伴和鼓励
+- 回复时,严格避免提及信息来源或参考资料,不要出现"根据视频帧","根据图像描述","在描述中的"等词汇
+- 回答要比较口语化,禁止用括号等方式对一些词汇术语进行解释,可以直接以第一人称说"我看到", "我记得"等。
+- 不可以使用emoji表情
+- 不可以描述emoji表情
+
+# 互动原则
+1. 保持温暖友好的态度
+2. 给予及时的鼓励和支持
+3. 创造轻松愉快的氛围
+4. 避免过于生硬的表达
+5. 不使用表情符号
+6. 不描述动作或场景
+8. 回答要像人与人之间的自然对话
+9. 不要说视频帧描述
+"""
\ No newline at end of file
diff --git a/demohouse/hgdoll/server/src/utils.py b/demohouse/hgdoll/server/src/utils.py
new file mode 100644
index 00000000..959446aa
--- /dev/null
+++ b/demohouse/hgdoll/server/src/utils.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
+# Licensed under the 【火山方舟】原型应用软件自用许可协议
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# https://www.volcengine.com/docs/82379/1433703
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import time
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+
+from arkitect.core.component.llm.model import ArkMessage
+from arkitect.utils.common import Singleton
+
+STATE_IDLE = 0
+STATE_PENDING_FOR_RESPONSE = 1
+
+
+class Context:
+ def __init__(self):
+ self.history = []
+ self.state = STATE_IDLE
+ self.expire_at = time.time() + 600
+
+
+class Storage(ABC):
+ @classmethod
+ @abstractmethod
+ async def get_history(cls, key: str) -> List[ArkMessage]:
+ pass
+
+ @classmethod
+ @abstractmethod
+ async def append(cls, key: str, value: ArkMessage) -> None:
+ pass
+
+ @classmethod
+ @abstractmethod
+ async def contains(cls, key: str) -> bool:
+ pass
+
+ @classmethod
+ @abstractmethod
+ async def set(cls, key: str, value: Context) -> None:
+ pass
+
+ @classmethod
+ @abstractmethod
+ async def get(cls, key: str) -> ArkMessage:
+ pass
+
+
+class CoroutineSafeMap(Storage, Singleton):
+ _lock = asyncio.Lock()
+ _map: Dict[str, Context] = {}
+
+ def __init__(self):
+ asyncio.create_task(self.cleanup())
+
+ @classmethod
+ async def get(cls, key: str, default=None) -> Context:
+ async with cls._lock:
+ return cls._map.get(key, default)
+
+ @classmethod
+ async def get_history(cls, key: str) -> List[ArkMessage]:
+ async with cls._lock:
+ ctx = cls._map.get(key)
+ if ctx is None:
+ return []
+ return ctx.history
+
+ @classmethod
+ async def get_state(cls, key: str) -> int:
+ async with cls._lock:
+ ctx = cls._map.get(key)
+ if ctx is None:
+ return STATE_IDLE
+ return ctx.state
+
+ @classmethod
+ async def set_state(cls, key: str, value: Context) -> None:
+ async with cls._lock:
+ if key not in cls._map:
+ return
+ cls._map[key].state = value
+
+ @classmethod
+ async def set(cls, key: str, value: Context) -> None:
+ async with cls._lock:
+ cls._map[key] = value
+
+ @classmethod
+ async def append(cls, key: str, value: ArkMessage) -> None:
+ async with cls._lock:
+ if key not in cls._map:
+ return
+ cls._map[key].history.append(value)
+ cls._map[key].expire_at = time.time() + 600
+
+ @classmethod
+ async def delete(cls, key: str):
+ async with cls._lock:
+ if key in cls._map:
+ del cls._map[key]
+
+ @classmethod
+ async def contains(cls, key: str) -> bool:
+ async with cls._lock:
+ return key in cls._map
+
+ @classmethod
+ async def keys(cls) -> List[str]:
+ async with cls._lock:
+ return list(cls._map.keys())
+
+ @classmethod
+ async def items(cls) -> List[Any]:
+ async with cls._lock:
+ return list(cls._map.items())
+
+ @classmethod
+ async def clear(cls) -> None:
+ async with cls._lock:
+ cls._map.clear()
+
+ @classmethod
+ async def cleanup(cls) -> None:
+ while True:
+ await asyncio.sleep(60)
+ current_time = time.time()
+ async with cls._lock:
+ keys_to_delete = [
+ key
+ for key, entry in cls._map.items()
+ if current_time > entry.expire_at
+ ]
+ for key in keys_to_delete:
+ del cls._map[key]