diff --git a/demohouse/hgdoll/.gitignore b/demohouse/hgdoll/.gitignore new file mode 100644 index 00000000..af56f610 --- /dev/null +++ b/demohouse/hgdoll/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +.idea/ diff --git a/demohouse/hgdoll/LICENSE b/demohouse/hgdoll/LICENSE new file mode 100644 index 00000000..c5ca71e8 --- /dev/null +++ b/demohouse/hgdoll/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 削微寒 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/demohouse/hgdoll/README.md b/demohouse/hgdoll/README.md new file mode 100644 index 00000000..29c268b4 --- /dev/null +++ b/demohouse/hgdoll/README.md @@ -0,0 +1,92 @@ +

+ +
中文 | English +
HGDoll 是一款 AI 手机陪玩应用. +

+ +这是一款完全开源的 AI 手机陪玩应用。在你游戏时,HGDoll 可实时看到你的游戏画面,陪你聊天、为你加油鼓劲,带来有趣的陪伴体验。它基于豆包大模型和火山方舟 Arkitect 构建,包含[安卓客户端](android/README.md)(Kotlin)和[后端服务](server/README.md)(Python)两部分,支持本地运行轻松上手。 + +https://github.com/user-attachments/assets/704d7f2a-3206-45f2-8760-d9cf9577ca7c + +目前,HGDoll 还只是一个“小玩具”,仍有许多 Bug 和改进空间,我会持续更新和完善,同时欢迎大家上手体验,一起贡献代码。 + +## 架构图 + +```mermaid +graph TD + User((用户)) --> Android[安卓客户端] + + subgraph Client[客户端] + Android --> Speech[语音识别] + Android --> Screen[屏幕录制] + Speech --> SpeechAPI[Doubao-流式语音识别] + SpeechAPI --> TextResult[语音转文字结果] + Screen --> ScreenCapture[定时截图] + AudioPlay[语音播放] --> Android + end + + subgraph Server[Server 端 Arkitect] + TextResult --> Backend[后端服务] + ScreenCapture --> Backend + Backend --> TempMemory[临时记忆体] + TempMemory --> Context[会话上下文] + Context --> CTX1[Context-id-1] + Context --> CTX2[Context-id-2] + Context --> CTX3[Context-id-3] + Context --> CTXN[...] + Context --> Prompt[Prompt 生成] + ImageResult[截图识别结果] --> TempMemory + AudioResult[语音合成结果] --> AudioPlay + end + + subgraph AI[AI 模型服务] + Backend --> VLM[Doubao-vision-pro-32k] + VLM --> ImageResult + Prompt --> LLM[Doubao-pro-32k] + LLM --> TTS[Doubao-语音合成] + TTS --> AudioResult + end + + style User fill:#f9f,stroke:#333,stroke-width:2px + style Client fill:#e4f7fb,stroke:#333,stroke-width:1px + style Server fill:#e6ffe6,stroke:#333,stroke-width:1px + style AI fill:#e6e6ff,stroke:#333,stroke-width:1px + style Android fill:#fff,stroke:#333,stroke-width:1px + style Backend fill:#fff,stroke:#333,stroke-width:1px + style VLM fill:#fff,stroke:#333,stroke-width:1px + style LLM fill:#fff,stroke:#333,stroke-width:1px + style TTS fill:#fff,stroke:#333,stroke-width:1px +``` + + +## 快速开始 + +客户端、后端的启动和安装步骤都在对应目录下,需要配置必要的 API Key 申请方法,[点击查看](docs/key.md) + +### 项目结构 + +``` +HGDoll/ +├── android/ # 安卓客户端 +├── server/ # 后端服务 +└── docs/ # 项目文档 +``` + +### 技术栈 + +#### 安卓客户端 +- Kotlin +- Jetpack Compose +- Gradle Kotlin DSL +- AndroidX + +#### 后端服务 +- Python 3.8-3.12 +- FastAPI +- 火山方舟 Arkitect SDK +- Uvicorn + + +## 许可证 + +本项目采用 MIT 许可证,详见 [LICENSE](LICENSE) 文件。 diff --git a/demohouse/hgdoll/android/.gitignore b/demohouse/hgdoll/android/.gitignore new file mode 100644 index 00000000..093721fc --- /dev/null +++ b/demohouse/hgdoll/android/.gitignore @@ -0,0 +1,29 @@ +# Compiled class file +*.class + +# Log file +*.log + +# BlueJ files +*.ctxt + +# Mobile Tools for Java (J2ME) +.mtj.tmp/ + +# Package Files # +*.jar +*.war +*.nar +*.ear +*.zip +*.tar.gz +*.rar + +# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml +hs_err_pid* +replay_pid* + +# Kotlin Gradle plugin data, see https://kotlinlang.org/docs/whatsnew20.html#new-directory-for-kotlin-data-in-gradle-projects +.kotlin/ +.DS_Store +.gradle/ diff --git a/demohouse/hgdoll/android/README.md b/demohouse/hgdoll/android/README.md new file mode 100644 index 00000000..dbdbb649 --- /dev/null +++ b/demohouse/hgdoll/android/README.md @@ -0,0 +1,60 @@ +# HGDoll 安卓客户端 + +![](show.png) + +HGDoll 是一个基于 Kotlin 和 Jetpack Compose 开发的安卓客户端应用,它采用豆包语音大模型服务,[点击查看](../docs/key.md)如何申请运行所需的 API Key。 + +## 环境要求 + +- Android Studio Hedgehog | 2023.1.1 或更高版本 +- JDK 17 或更高版本 +- Gradle 8.0 或更高版本 +- Android SDK 34 (Android 14) 或更高版本 + +## 快速开始 + +### 1. 克隆代码库 + +```bash +git clone https://github.com/521xueweihan/HGDoll.git +cd android/ +``` + +### 2. 配置开发环境 + +1. 打开 Android Studio +2. 选择 "Open an existing project" +3. 选择克隆下来的 `android` 目录 +4. 等待 Gradle 同步完成 + +### 3. 运行应用 + +1. 连接 Android 设备或启动模拟器 +2. 点击 Android Studio 工具栏中的 "Run" 按钮(绿色三角形) +3. 选择目标设备 +4. 等待应用安装和启动 +5. 在 App 中输入对应的 ASR Token、ASR App ID、本地 Server IP + + +## 项目结构 + +- `app/` - 主应用模块 +- `gradle/` - Gradle 包装器文件 +- `build.gradle.kts` - 项目级构建配置 +- `settings.gradle.kts` - 项目设置文件 +- `gradle.properties` - Gradle 属性配置 + +## 技术栈 + +- Kotlin - 主要编程语言 +- Jetpack Compose - 现代 UI 工具包 +- Gradle Kotlin DSL - 构建脚本 +- AndroidX - Android 扩展库 + +## 贡献指南 + +1. Fork 项目 +2. 创建特性分支 (`git checkout -b feature/AmazingFeature`) +3. 提交更改 (`git commit -m 'Add some AmazingFeature'`) +4. 推送到分支 (`git push origin feature/AmazingFeature`) +5. 创建 Pull Request \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/.gitignore b/demohouse/hgdoll/android/app/.gitignore new file mode 100644 index 00000000..42afabfd --- /dev/null +++ b/demohouse/hgdoll/android/app/.gitignore @@ -0,0 +1 @@ +/build \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/build.gradle.kts b/demohouse/hgdoll/android/app/build.gradle.kts new file mode 100644 index 00000000..fcea8cb9 --- /dev/null +++ b/demohouse/hgdoll/android/app/build.gradle.kts @@ -0,0 +1,73 @@ +plugins { + alias(libs.plugins.android.application) + alias(libs.plugins.kotlin.android) + alias(libs.plugins.kotlin.compose) +} + +android { + namespace = "com.example.android" + compileSdk = 35 + + defaultConfig { + applicationId = "com.example.android" + minSdk = 24 + targetSdk = 35 + versionCode = 1 + versionName = "1.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + } + + buildTypes { + release { + isMinifyEnabled = false + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_11 + targetCompatibility = JavaVersion.VERSION_11 + } + kotlinOptions { + jvmTarget = "11" + } + buildFeatures { + compose = true + } +} + +dependencies { + implementation(libs.androidx.core.ktx) + implementation(libs.androidx.lifecycle.runtime.ktx) + implementation(libs.androidx.activity.compose) + implementation(platform(libs.androidx.compose.bom)) + implementation(libs.androidx.ui) + implementation(libs.androidx.ui.graphics) + implementation(libs.androidx.ui.tooling.preview) + implementation(libs.androidx.material3) + + // 添加屏幕录制相关依赖 + implementation("androidx.media:media:1.6.0") + implementation("com.github.bumptech.glide:glide:4.12.0") + + // 添加音频录制相关依赖 + implementation("com.google.android.exoplayer:exoplayer-core:2.19.1") + implementation("com.google.android.exoplayer:exoplayer-ui:2.19.1") + + // 网络请求相关依赖 + implementation("com.squareup.retrofit2:retrofit:2.9.0") + implementation("com.squareup.retrofit2:converter-gson:2.9.0") + implementation("com.squareup.okhttp3:okhttp:4.11.0") + implementation("com.squareup.okhttp3:logging-interceptor:4.11.0") + + testImplementation(libs.junit) + androidTestImplementation(libs.androidx.junit) + androidTestImplementation(libs.androidx.espresso.core) + androidTestImplementation(platform(libs.androidx.compose.bom)) + androidTestImplementation(libs.androidx.ui.test.junit4) + debugImplementation(libs.androidx.ui.tooling) + debugImplementation(libs.androidx.ui.test.manifest) +} \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/proguard-rules.pro b/demohouse/hgdoll/android/app/proguard-rules.pro new file mode 100644 index 00000000..481bb434 --- /dev/null +++ b/demohouse/hgdoll/android/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml b/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml new file mode 100644 index 00000000..c09ec71c --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/AndroidManifest.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png b/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png new file mode 100644 index 00000000..62490683 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/ic_launcher-playstore.png differ diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt new file mode 100644 index 00000000..35ed5e50 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/MainActivity.kt @@ -0,0 +1,341 @@ +package com.example.android + +import android.content.BroadcastReceiver +import android.content.Context +import android.content.Intent +import android.content.IntentFilter +import android.media.projection.MediaProjectionManager +import android.os.Build +import android.os.Bundle +import android.util.Log +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.result.contract.ActivityResultContracts +import androidx.compose.foundation.layout.* +import androidx.compose.material3.* +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.graphics.Brush +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.text.input.PasswordVisualTransformation +import androidx.compose.ui.text.style.TextAlign +import androidx.compose.ui.unit.dp +import androidx.compose.ui.unit.sp +import androidx.core.content.ContextCompat +import com.example.android.service.ScreenshotService +import com.example.android.service.AsrService +import com.example.android.ui.theme.AndroidTheme +import com.example.android.utils.PermissionUtils +import kotlinx.coroutines.* +import java.util.UUID + +class MainActivity : ComponentActivity() { + companion object { + private const val TAG = "MainActivity" + const val EXTRA_RESULT_CODE = "result_code" + const val EXTRA_RESULT_DATA = "result_data" + } + private var isRecording = false + private var showPermissionDialog = mutableStateOf(false) + private var permissionDialogMessage = mutableStateOf("") + private var showSettingsButton = mutableStateOf(false) + private val scope = CoroutineScope(Dispatchers.Main + Job()) + private var isAsrRecording = false + private var asrResult = "" + private var asrAccessToken = "" + private var asrAppId = "" + private var server_ip = "" + private var context_id = "" + private var isRequestingAudioPermission = false + private var isRequestingScreenPermission = false + + private val mediaProjectionLauncher = registerForActivityResult( + ActivityResultContracts.StartActivityForResult() + ) { result -> + isRequestingScreenPermission = false + if (result.resultCode == RESULT_OK) { + Log.d(TAG, "Screen capture result: ${result.resultCode}") + startRecording(result.resultCode, result.data) + // 屏幕录制权限已授予,启动 ASR 服务 + startAsr() + isRecording = true + } else { + // 屏幕录制权限被拒绝 + showPermissionDialog.value = true + permissionDialogMessage.value = "需要屏幕录制权限才能使用截图功能" + showSettingsButton.value = true + } + } + + + private val asrReceiver = object : BroadcastReceiver() { + override fun onReceive(context: Context?, intent: Intent?) { + if (intent?.action == "com.example.android.ASR_RESULT") { + val text = intent.getStringExtra("text") ?: "" + asrResult = text + } + } + } + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + setContent { + AndroidTheme { + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + var isRecording by remember { mutableStateOf(false) } + var asrResult by remember { mutableStateOf("") } + var asrAccessToken by remember { mutableStateOf("") } + var asrAppId by remember { mutableStateOf("") } + var server_ip by remember { mutableStateOf("") } + + // 同步状态变量 + LaunchedEffect(asrAccessToken, asrAppId, server_ip) { + this@MainActivity.asrAccessToken = asrAccessToken + this@MainActivity.asrAppId = asrAppId + this@MainActivity.server_ip = server_ip + } + + Column( + modifier = Modifier + .fillMaxSize() + .padding(16.dp), + horizontalAlignment = Alignment.CenterHorizontally, + verticalArrangement = Arrangement.Center + ) { + Text( + text = "HGDoll", + fontSize = 32.sp, + style = MaterialTheme.typography.headlineLarge.copy( + brush = Brush.linearGradient( + colors = listOf( + Color(0xFFE91E63), // 粉色 + Color(0xFF2196F3), // 蓝色 + Color(0xFF4CAF50), // 绿色 + Color(0xFFFFC107) // 黄色 + ) + ) + ), + fontWeight = FontWeight.Bold, + textAlign = TextAlign.Center, + modifier = Modifier.padding(bottom = 8.dp) + ) + + Text( + text = "基于豆包系列大模型打造的 AI 开源应用", + fontSize = 16.sp, + color = Color.Gray, + textAlign = TextAlign.Center, + modifier = Modifier.padding(bottom = 32.dp) + ) + Button( + onClick = { + if (isRecording) { + Log.d(TAG, "Stopping recording with context_id: $context_id") + stopRecording() + stopAsr() + context_id = "" + isRecording = false + } else { + context_id = if (context_id.isEmpty()) { + UUID.randomUUID().toString() + } else { + context_id + } + Log.d(TAG, "Generated new context_id: $context_id") + // 有权限,启动服务 + startRecording() + startAsr() + isRecording = true + } + } + ) { + Text(if (isRecording) "停止录制" else "开始录制") + } + + Spacer(modifier = Modifier.height(16.dp)) + + OutlinedTextField( + value = asrAccessToken, + onValueChange = { asrAccessToken = it }, + label = { Text("ASR Access Token") }, + modifier = Modifier.fillMaxWidth(), + singleLine = true, + visualTransformation = PasswordVisualTransformation() + ) + + Spacer(modifier = Modifier.height(8.dp)) + + OutlinedTextField( + value = asrAppId, + onValueChange = { asrAppId = it }, + label = { Text("ASR App ID") }, + modifier = Modifier.fillMaxWidth(), + singleLine = true, + visualTransformation = PasswordVisualTransformation() + ) + + Spacer(modifier = Modifier.height(8.dp)) + + OutlinedTextField( + value = server_ip, + onValueChange = { server_ip = it }, + label = { Text("Server IP") }, + modifier = Modifier.fillMaxWidth(), + singleLine = true + ) + + if (asrResult.isNotEmpty()) { + Spacer(modifier = Modifier.height(16.dp)) + Text( + text = "识别结果:$asrResult", + modifier = Modifier.padding(16.dp) + ) + } + } + } + } + } + + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) { + registerReceiver( + asrReceiver, + IntentFilter("com.example.android.ASR_RESULT"), + Context.RECEIVER_NOT_EXPORTED + ) + } + } + + + override fun onResume() { + super.onResume() + if (showSettingsButton.value) { + showSettingsButton.value = false + checkAndRequestPermissions() + } + } + + private fun checkAndRequestPermissions() { + if (!PermissionUtils.hasAllPermissions(this)) { + isRequestingAudioPermission = true + requestPermissions(arrayOf(android.Manifest.permission.RECORD_AUDIO), 1) + return + } + // 录音权限已授予,申请屏幕录制权限 + requestScreenCapturePermission() + } + + private fun requestScreenCapturePermission() { + try { + val projectionManager = getSystemService(MediaProjectionManager::class.java) + val captureIntent = projectionManager.createScreenCaptureIntent() + Log.d(TAG, "Launching screen capture intent...") + isRequestingScreenPermission = true + mediaProjectionLauncher.launch(captureIntent) + } catch (e: Exception) { + Log.e(TAG, "Error starting capture", e) + showPermissionDialog.value = true + permissionDialogMessage.value = "启动屏幕截图失败,请重试" + showSettingsButton.value = false + } + } + + private fun startRecording(resultCode: Int? = null, data: Intent? = null) { + Log.d(TAG, "Starting recording with context_id: $context_id") + Log.d(TAG, "Starting work...") + + if (resultCode == null) { + // 直接调用权限检查 + checkAndRequestPermissions() + return + } + + try { + // 启动截图服务 + val screenshotIntent = Intent(this, ScreenshotService::class.java).apply { + action = ScreenshotService.ACTION_START + putExtra(EXTRA_RESULT_CODE, resultCode) + putExtra(EXTRA_RESULT_DATA, data) + putExtra(ScreenshotService.SERVER_IP, server_ip) + putExtra(ScreenshotService.CONTEXT_ID, context_id) + } + ContextCompat.startForegroundService(this, screenshotIntent) + Log.d(TAG, "Screenshot service started") + } catch (e: Exception) { + Log.e(TAG, "Error starting capture", e) + showPermissionDialog.value = true + permissionDialogMessage.value = "启动录制失败,请重试" + showSettingsButton.value = false + } + } + + override fun onRequestPermissionsResult(requestCode: Int, permissions: Array, grantResults: IntArray) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults) + when (requestCode) { + 1 -> { + isRequestingAudioPermission = false + if (grantResults.isNotEmpty() && grantResults[0] == android.content.pm.PackageManager.PERMISSION_GRANTED) { + // 录音权限已授予,申请屏幕录制权限 + requestScreenCapturePermission() + } else { + // 权限被拒绝 + showPermissionDialog.value = true + permissionDialogMessage.value = "需要录音权限才能使用语音识别功能" + showSettingsButton.value = true + } + } + } + } + + private fun stopRecording() { + Log.d(TAG, "Stopping capture...") + isRecording = false + + try { + // 停止截图服务 + val screenshotIntent = Intent(this, ScreenshotService::class.java).apply { + action = ScreenshotService.ACTION_STOP + } + startService(screenshotIntent) + Log.d(TAG, "Screenshot service stopped") + } catch (e: Exception) { + Log.e(TAG, "Error stopping capture", e) + showPermissionDialog.value = true + permissionDialogMessage.value = "停止录制失败,请重试" + showSettingsButton.value = false + } + } + + + private fun startAsr() { + Log.d(TAG, "Starting ASR with context_id: $context_id") + // 不再检查权限,因为权限检查已经在 checkAndRequestPermissions 中处理 + val intent = Intent(this, AsrService::class.java).apply { + action = AsrService.ACTION_START + putExtra(AsrService.EXTRA_ACCESS_TOKEN, asrAccessToken) + putExtra(AsrService.EXTRA_APP_ID, asrAppId) + putExtra(AsrService.SERVER_IP, server_ip) + putExtra(AsrService.CONTEXT_ID, context_id) + } + ContextCompat.startForegroundService(this, intent) + isAsrRecording = true + } + + private fun stopAsr() { + val intent = Intent(this, AsrService::class.java).apply { + action = AsrService.ACTION_STOP + } + startService(intent) + isAsrRecording = false + } + + override fun onDestroy() { + super.onDestroy() + scope.cancel() + unregisterReceiver(asrReceiver) + } +} diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt new file mode 100644 index 00000000..b1098563 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/AsrService.kt @@ -0,0 +1,1158 @@ +package com.example.android.service + +import android.app.Notification +import android.app.NotificationChannel +import android.app.NotificationManager +import android.app.Service +import android.content.Intent +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaPlayer +import android.media.MediaRecorder +import android.os.Build +import android.os.IBinder +import android.util.Log +import androidx.core.app.NotificationCompat +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.Response +import okhttp3.WebSocket +import okhttp3.WebSocketListener +import org.json.JSONObject +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean +import okio.ByteString +import java.util.zip.GZIPOutputStream +import java.util.zip.GZIPInputStream +import com.google.gson.Gson +import android.util.Base64 +import okhttp3.MediaType.Companion.toMediaTypeOrNull +import okhttp3.RequestBody.Companion.toRequestBody +import org.json.JSONArray +import java.io.ByteArrayOutputStream +import java.io.ByteArrayInputStream +import java.io.File +import java.io.FileOutputStream +import android.media.AudioManager +import android.media.AudioAttributes +import android.content.Context +import android.media.AudioFocusRequest +import java.net.SocketTimeoutException +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext + +class AsrService : Service() { + companion object { + const val ACTION_START = "com.example.android.action.START_ASR" + const val ACTION_STOP = "com.example.android.action.STOP_ASR" + const val EXTRA_ACCESS_TOKEN = "extra_access_token" + const val EXTRA_APP_ID = "extra_app_id" + const val SERVER_IP = "server_ip" + const val CONTEXT_ID = "context_id" + private const val NOTIFICATION_ID = 3 + private const val CHANNEL_ID = "asr_service_channel" + private const val SAMPLE_RATE = 16000 + private const val CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO + private const val AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT + private const val ASR_URL = "wss://openspeech.bytedance.com/api/v3/sauc/bigmodel" + private const val UPLOAD_URL = "http://%s/api/v3/bots/chat/completions" + private const val ASR_RESOURCE_ID = "volc.bigasr.sauc.duration" + private const val CONNECT_ID = "67ee89ba-7050-4c04-a3d7-ac61a63499b3" + private const val BUFFER_SIZE = 16000 // 200ms of audio data (16kHz * 2bytes * 0.2s) + private const val AUDIO_FORMAT_CONFIG = "pcm" + private const val AUDIO_BITS = 16 + private const val AUDIO_CHANNELS = 1 + private const val MAX_RETRIES = 3 + private const val INIT_TIMEOUT = 10000L // 增加到 10 秒 + private const val FORCE_DEFINITE_DURATION = 10000L // 10s + private const val SEND_INTERVAL = 1000L // 增加发送间隔 + private const val RECONNECT_DELAY = 5000L // 增加到 5 秒 + private const val PING_INTERVAL = 30000L // 30秒 + private const val PONG_TIMEOUT = 15000L // 15秒 + private const val MAX_PING_FAILURES = 3 // 最大ping失败次数 + + // 协议相关常量 + private const val PROTOCOL_VERSION = 0b0001 + private const val DEFAULT_HEADER_SIZE = 0b0001 + private const val FULL_CLIENT_REQUEST = 0b0001 + private const val AUDIO_ONLY_REQUEST = 0b0010 + private const val FULL_SERVER_RESPONSE = 0b1001 + private const val SERVER_ACK = 0b1011 + private const val POS_SEQUENCE = 0b0001 + private const val NEG_WITH_SEQUENCE = 0b0011 + private const val JSON = 0b0001 + private const val GZIP = 0b0001 + private const val RAW = 0x0002 + } + + + private var audioRecord: AudioRecord? = null + private var recordingThread: Thread? = null + private val isRecording = AtomicBoolean(false) + private var isInitialized = AtomicBoolean(false) + private var webSocket: WebSocket? = null + private var accessToken = "" + private var appId = "" + private var serverIp = "" + private var contextId = "" + + private var sequence = 0 + private var retryCount = 0 + private var lastVoiceTime = 0L // 上次检测到语音的时间 + private var silenceDuration = 0L // 静音持续时间 + private val SILENCE_THRESHOLD = 300 // 降低静音阈值 + private val MIN_SILENCE_DURATION = 3000L // 增加最小静音持续时间 + private var isAlreadyDefinite = false // 是否已经收到过 definite result + private var lastAsrTime = 0L // 上次收到识别结果的时间 + private var currentText = "" // 当前的识别文本 + private var startTime = 0L // 开始时间 + private var isSpeaking = false // 是否正在说话 + private var voiceBuffer = ByteArrayOutputStream() // 存储语音数据 + private var LAST_RESPONSE_TIME = 0L + private var isProcessingRequest = false + private var isPlayingAudio = false + private var mediaPlayer: MediaPlayer? = null + private val tempAudioDir: File by lazy { + File(cacheDir, "temp_audio").apply { + if (!exists()) { + mkdirs() + } + } + } + + private val okHttpClient = OkHttpClient.Builder() + .connectTimeout(30, TimeUnit.SECONDS) + .readTimeout(60, TimeUnit.SECONDS) + .writeTimeout(60, TimeUnit.SECONDS) + .pingInterval(5, TimeUnit.SECONDS) // 减少 ping 间隔到 5 秒 + .retryOnConnectionFailure(true) + .build() + + private var audioManager: AudioManager? = null + + private var lastPingTime = 0L + private var isConnecting = AtomicBoolean(false) + + private var audioFocusRequest: AudioFocusRequest? = null + private var audioFocusGranted = false + + private var pingFailureCount = 0 + private var lastPongTime = 0L + + private var statusCodeRetryCount = 0 + + private var originalVolume = 0 + private val VOLUME_REDUCTION_FACTOR = 0.5f // 降低到原始音量的50% + + override fun onCreate() { + super.onCreate() + createNotificationChannel() + audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager + } + + override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + Log.i("intent", intent.toString()) + when (intent?.action) { + ACTION_START -> { + accessToken = intent.getStringExtra(EXTRA_ACCESS_TOKEN) ?: "" + appId = intent.getStringExtra(EXTRA_APP_ID) ?: "" + serverIp = intent.getStringExtra(SERVER_IP) ?: "" + contextId = intent.getStringExtra(CONTEXT_ID) ?: "" + startAsr() + } + ACTION_STOP -> stopAsr() + } + return START_NOT_STICKY + } + + private fun getHeader(messageType: Int, messageTypeSpecificFlags: Int, serialMethod: Int, compressionType: Int, reservedData: Int): ByteArray { + val header = ByteArray(4) + header[0] = ((PROTOCOL_VERSION shl 4) or DEFAULT_HEADER_SIZE).toByte() + header[1] = ((messageType shl 4) or messageTypeSpecificFlags).toByte() + header[2] = ((serialMethod shl 4) or compressionType).toByte() + header[3] = reservedData.toByte() + return header + } + + private fun intToBytes(a: Int): ByteArray { + return byteArrayOf( + ((a ushr 24) and 0xFF).toByte(), + ((a ushr 16) and 0xFF).toByte(), + ((a ushr 8) and 0xFF).toByte(), + (a and 0xFF).toByte() + ) + } + + private fun gzipCompress(src: ByteArray): ByteArray { + val out = ByteArrayOutputStream() + val gzip = GZIPOutputStream(out) + gzip.write(src) + gzip.close() + return out.toByteArray() + } + + private fun connectWebSocket() { + if (isConnecting.get()) { + Log.d("AsrService", "Already connecting, skipping...") + return + } + + isConnecting.set(true) + Log.d("AsrService", "Connecting to WebSocket...") + + try { + // 确保关闭旧的连接 + webSocket?.close(1000, "Reconnecting") + Thread.sleep(100) + } catch (e: Exception) { + Log.e("AsrService", "Error closing old WebSocket", e) + } + + val request = Request.Builder() + .url(ASR_URL) + .addHeader("X-Api-App-Key", appId) + .addHeader("X-Api-Access-Key", accessToken) + .addHeader("X-Api-Resource-Id", ASR_RESOURCE_ID) + .addHeader("X-Api-Connect-Id", CONNECT_ID) + .addHeader("Content-Type", "application/json") + .addHeader("User-Agent", "OkHttp Android") + .build() + + webSocket = okHttpClient.newWebSocket(request, object : WebSocketListener() { + override fun onOpen(webSocket: WebSocket, response: Response) { + Log.d("AsrService", "WebSocket connection opened") + retryCount = 0 + pingFailureCount = 0 + lastPingTime = 0 + lastPongTime = 0 + isConnecting.set(false) + + // 发送初始化消息 + val user = JSONObject().apply { + put("uid", "ARK_VLM_DEMO") + } + val audio = JSONObject().apply { + put("format", AUDIO_FORMAT_CONFIG) + put("sample_rate", SAMPLE_RATE) + put("bits", AUDIO_BITS) + put("channel", AUDIO_CHANNELS) + } + val request = JSONObject().apply { + put("model_name", "bigmodel") + put("result_type", "single") + put("show_utterances", true) + put("end_window_size", 600) + put("force_to_speech_time", 1500) + } + val payload = JSONObject().apply { + put("user", user) + put("audio", audio) + put("request", request) + } + + val payloadStr = payload.toString() + val payloadBytes = gzipCompress(payloadStr.toByteArray()) + val header = getHeader(FULL_CLIENT_REQUEST, POS_SEQUENCE, JSON, GZIP, 0) + val payloadSize = intToBytes(payloadBytes.size) + sequence = 1 + val seqBytes = intToBytes(sequence) + + val fullClientRequest = ByteArray(header.size + seqBytes.size + payloadSize.size + payloadBytes.size) + var destPos = 0 + System.arraycopy(header, 0, fullClientRequest, destPos, header.size) + destPos += header.size + System.arraycopy(seqBytes, 0, fullClientRequest, destPos, seqBytes.size) + destPos += seqBytes.size + System.arraycopy(payloadSize, 0, fullClientRequest, destPos, payloadSize.size) + destPos += payloadSize.size + System.arraycopy(payloadBytes, 0, fullClientRequest, destPos, payloadBytes.size) + + try { + val result = webSocket.send(ByteString.of(*fullClientRequest)) + if (!result) { + Log.e("AsrService", "Failed to send initialization message") + throw Exception("Failed to send initialization message") + } + Log.d("AsrService", "Initialization message sent successfully") + } catch (e: Exception) { + Log.e("AsrService", "Error during initialization", e) + webSocket.close(1000, "Initialization failed") + isInitialized.set(false) + } + } + + override fun onMessage(webSocket: WebSocket, text: String) { + Log.d("AsrService", "Received pong from server") + lastPongTime = System.currentTimeMillis() + pingFailureCount = 0 + } + + override fun onMessage(webSocket: WebSocket, bytes: ByteString) { + Log.d("AsrService", "Received message from server") + lastPongTime = System.currentTimeMillis() + pingFailureCount = 0 + val res = bytes.toByteArray() + val sequence = parseResponse(res) + LAST_RESPONSE_TIME = System.currentTimeMillis() + } + + override fun onFailure(webSocket: WebSocket, t: Throwable, response: Response?) { + Log.e("AsrService", "WebSocket error: ${t.message}", t) + isInitialized.set(false) + isConnecting.set(false) + + if (t is SocketTimeoutException) { + // 如果是超时错误,立即重试 + Log.d("AsrService", "Socket timeout, retrying immediately") + connectWebSocket() + } else if (retryCount < MAX_RETRIES) { + retryCount++ + val delay = RECONNECT_DELAY * retryCount + Log.d("AsrService", "Retrying connection in ${delay}ms, attempt $retryCount") + Thread.sleep(delay) + connectWebSocket() + } else { + Log.e("AsrService", "Max retries reached, stopping ASR") + stopAsr() + } + } + + override fun onClosing(webSocket: WebSocket, code: Int, reason: String) { + Log.d("AsrService", "WebSocket closing: $code - $reason") + isInitialized.set(false) + isConnecting.set(false) + } + + override fun onClosed(webSocket: WebSocket, code: Int, reason: String) { + Log.d("AsrService", "WebSocket closed: $code - $reason") + isInitialized.set(false) + isConnecting.set(false) + } + }) + } + + private fun checkConnection() { + if (!isInitialized.get() || webSocket == null) { + return + } + + val currentTime = System.currentTimeMillis() + + // 检查是否需要发送ping + if (currentTime - lastPingTime > PING_INTERVAL) { + try { + val result = webSocket?.send("") + if (result == true) { + lastPingTime = currentTime + Log.d("AsrService", "Sent ping to server") + } else { + Log.e("AsrService", "Failed to send ping") + handlePingFailure() + } + } catch (e: Exception) { + Log.e("AsrService", "Error sending ping", e) + handlePingFailure() + } + } + + // 检查pong是否超时 + if (lastPingTime > 0 && currentTime - lastPingTime > PONG_TIMEOUT) { + Log.e("AsrService", "Pong timeout, reconnecting...") + handlePingFailure() + } + + // 检查服务器响应超时 + if (currentTime - LAST_RESPONSE_TIME > INIT_TIMEOUT) { + Log.e("AsrService", "No response from server for too long, reconnecting...") + handlePingFailure() + } + } + + private fun handlePingFailure() { + pingFailureCount++ + if (pingFailureCount >= MAX_PING_FAILURES) { + Log.e("AsrService", "Max ping failures reached, reconnecting...") + isInitialized.set(false) + connectWebSocket() + pingFailureCount = 0 + } + } + + private fun requestAudioFocus() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + audioFocusRequest = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT) + .setAudioAttributes( + AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_MEDIA) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build() + ) + .setOnAudioFocusChangeListener { focusChange -> + when (focusChange) { + AudioManager.AUDIOFOCUS_GAIN -> { + Log.d("AsrService", "Audio focus gained") + audioFocusGranted = true + // 恢复录音 + if (isRecording.get() && audioRecord?.state == AudioRecord.STATE_INITIALIZED) { + audioRecord?.startRecording() + } + } + AudioManager.AUDIOFOCUS_LOSS -> { + Log.d("AsrService", "Audio focus lost") + audioFocusGranted = false + // 暂停录音 + audioRecord?.stop() + } + AudioManager.AUDIOFOCUS_LOSS_TRANSIENT -> { + Log.d("AsrService", "Audio focus lost transiently") + audioFocusGranted = false + // 暂停录音 + audioRecord?.stop() + } + AudioManager.AUDIOFOCUS_LOSS_TRANSIENT_CAN_DUCK -> { + Log.d("AsrService", "Audio focus lost transiently can duck") + audioFocusGranted = false + // 暂停录音,因为 AudioRecord 不支持动态音量调整 + audioRecord?.stop() + } + } + } + .build() + + val result = audioManager?.requestAudioFocus(audioFocusRequest!!) + audioFocusGranted = result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED + Log.d("AsrService", "Audio focus request result: $result") + } + } + + private fun abandonAudioFocus() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + audioFocusRequest?.let { + audioManager?.abandonAudioFocusRequest(it) + audioFocusGranted = false + Log.d("AsrService", "Audio focus abandoned") + } + } + } + + private fun startAsr() { + if (isRecording.get()) { + return + } + + try { + // 发送初始文本 + GlobalScope.launch(kotlinx.coroutines.Dispatchers.IO) { + try { + sendTextAsr("应用初始化") + } catch (e: Exception) { + Log.e("AsrService", "Error sending initial text", e) + } + } + + // 请求音频焦点 + requestAudioFocus() + if (!audioFocusGranted) { + Log.e("AsrService", "Failed to get audio focus") + return + } + + // 先停止并释放之前的录音实例 + try { + audioRecord?.stop() + audioRecord?.release() + Thread.sleep(2000) // 增加等待时间到2秒 + } catch (e: Exception) { + Log.e("AsrService", "Error releasing previous AudioRecord", e) + } + + // 检查麦克风是否可用 + val audioManager = getSystemService(Context.AUDIO_SERVICE) as AudioManager + if (!audioManager.isMicrophoneMute) { + Log.d("AsrService", "Microphone is not muted") + } else { + Log.w("AsrService", "Microphone is muted, trying to unmute") + audioManager.isMicrophoneMute = false + } + + // 重置音频路由 + audioManager.mode = AudioManager.MODE_IN_COMMUNICATION + audioManager.isSpeakerphoneOn = false + audioManager.setStreamVolume(AudioManager.STREAM_VOICE_CALL, + audioManager.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) / 2, 0) + + // 设置音频参数,启用回声消除和噪声抑制 + audioManager.setParameters("noise_suppression=on") + audioManager.setParameters("echo_cancellation=on") + audioManager.setParameters("agc=on") + + // 获取推荐的缓冲区大小 + val bufferSize = AudioRecord.getMinBufferSize( + SAMPLE_RATE, + CHANNEL_CONFIG, + AUDIO_FORMAT + ) + Log.d("AsrService", "AudioRecord buffer size: $bufferSize") + + // 使用更大的缓冲区大小 + val actualBufferSize = bufferSize * 2 + Log.d("AsrService", "Using actual buffer size: $actualBufferSize") + + // 尝试不同的音频源和配置,优先使用VOICE_COMMUNICATION + val configs = arrayOf( + Triple(MediaRecorder.AudioSource.VOICE_COMMUNICATION, SAMPLE_RATE, actualBufferSize), + Triple(MediaRecorder.AudioSource.VOICE_RECOGNITION, SAMPLE_RATE, actualBufferSize), + Triple(MediaRecorder.AudioSource.CAMCORDER, SAMPLE_RATE, actualBufferSize), + Triple(MediaRecorder.AudioSource.MIC, SAMPLE_RATE, actualBufferSize) + ) + + var audioRecordInitialized = false + var lastException: Exception? = null + + for ((audioSource, sampleRate, bufferSize) in configs) { + try { + Log.d("AsrService", "Trying audio source: $audioSource, sample rate: $sampleRate, buffer size: $bufferSize") + + // 创建新的AudioRecord实例 + audioRecord = AudioRecord( + audioSource, + sampleRate, + CHANNEL_CONFIG, + AUDIO_FORMAT, + bufferSize + ) + + if (audioRecord?.state == AudioRecord.STATE_INITIALIZED) { + Log.d("AsrService", "Successfully initialized AudioRecord with source: $audioSource") + audioRecordInitialized = true + break + } else { + Log.e("AsrService", "Failed to initialize AudioRecord with source: $audioSource") + audioRecord?.release() + } + } catch (e: Exception) { + Log.e("AsrService", "Error initializing AudioRecord with source: $audioSource", e) + lastException = e + audioRecord?.release() + Thread.sleep(1000) // 在尝试下一个配置之前等待更长时间 + } + } + + if (!audioRecordInitialized) { + // 如果所有配置都失败,尝试使用默认配置 + try { + Log.d("AsrService", "Trying default configuration as last resort") + audioRecord = AudioRecord( + MediaRecorder.AudioSource.MIC, + SAMPLE_RATE, + CHANNEL_CONFIG, + AUDIO_FORMAT, + AudioRecord.getMinBufferSize(SAMPLE_RATE, CHANNEL_CONFIG, AUDIO_FORMAT) + ) + + if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) { + throw lastException ?: Exception("Failed to initialize AudioRecord with any configuration") + } + } catch (e: Exception) { + Log.e("AsrService", "Failed to initialize AudioRecord with default configuration", e) + throw e + } + } + + // 设置音频属性 + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.LOLLIPOP) { + val audioAttributes = AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .setFlags(AudioAttributes.FLAG_AUDIBILITY_ENFORCED) + .build() + audioRecord?.setPreferredDevice(audioManager.getDevices(AudioManager.GET_DEVICES_INPUTS)?.firstOrNull()) + } + + // 启动录音前先读取一些数据来预热 + val warmupBuffer = ByteArray(actualBufferSize) + audioRecord?.startRecording() + audioRecord?.read(warmupBuffer, 0, actualBufferSize) + Log.d("AsrService", "AudioRecord warmup completed") + + // 重置状态 + isRecording.set(true) + isAlreadyDefinite = false + currentText = "" + startTime = System.currentTimeMillis() + lastAsrTime = System.currentTimeMillis() + voiceBuffer.reset() + sequence = 0 + isSpeaking = false + lastVoiceTime = System.currentTimeMillis() + silenceDuration = 0 + + startForeground(NOTIFICATION_ID, createNotification()) + + // 开始发送音频数据 + recordingThread = Thread { + val buffer = ByteArray(BUFFER_SIZE) + var totalBytesRead = 0 + var lastSendTime = System.currentTimeMillis() + var consecutiveSilenceCount = 0 + + while (isRecording.get()) { + try { + val read = audioRecord?.read(buffer, 0, BUFFER_SIZE) ?: 0 + + if (read > 0) { + totalBytesRead += read + + // 计算音频能量值 + var energy = 0.0 + var maxSample = 0 + var sampleCount = 0 + + for (i in 0 until read step 2) { + if (i + 1 < read) { + val sample = (buffer[i].toInt() and 0xFF) or (buffer[i + 1].toInt() shl 8) + val signedSample = if (sample > 32767) sample - 65536 else sample + if (Math.abs(signedSample) > maxSample) { + maxSample = Math.abs(signedSample) + } + energy += signedSample * signedSample + sampleCount++ + } + } + + if (sampleCount > 0) { + energy = Math.sqrt(energy / sampleCount) + + // 改进语音检测逻辑 + val isVoiceDetected = energy > SILENCE_THRESHOLD || maxSample > SILENCE_THRESHOLD * 2 + + + if (isVoiceDetected) { + consecutiveSilenceCount = 0 + if (!isSpeaking) { + // 开始新的语音识别 + isSpeaking = true + lastVoiceTime = System.currentTimeMillis() + silenceDuration = 0 + voiceBuffer.reset() + sequence = 0 + isAlreadyDefinite = false + + // 确保 WebSocket 连接已建立 + if (webSocket == null || !isInitialized.get()) { + Log.d("AsrService", "Starting new WebSocket connection for speech") + connectWebSocket() + } + } + silenceDuration = 0 + } else { + consecutiveSilenceCount++ + if (consecutiveSilenceCount > 20) { // 增加连续静音计数阈值 + silenceDuration = System.currentTimeMillis() - lastVoiceTime + if (silenceDuration > MIN_SILENCE_DURATION) { + isSpeaking = false + } + } + } + } + + // 写入数据到缓冲区 + voiceBuffer.write(buffer, 0, read) + + // 检查是否需要发送数据 + val currentTime = System.currentTimeMillis() + val shouldSend = currentTime - lastSendTime >= SEND_INTERVAL && isSpeaking + + if (shouldSend && isInitialized.get()) { + val audioData = voiceBuffer.toByteArray() + val isLast = !isRecording.get() || + (currentTime - lastAsrTime > FORCE_DEFINITE_DURATION && + currentText.isNotEmpty() && !isAlreadyDefinite) + + try { + val audioOnlyRequest = sendAudioOnlyRequest(audioData, isLast) + val result = webSocket?.send(ByteString.of(*audioOnlyRequest)) + + // 重置缓冲区 + voiceBuffer.reset() + lastSendTime = currentTime + + if (result == true) { + Log.d("AsrService", "Audio data sent successfully, size: ${audioData.size}, isLast: $isLast") + } else { + Log.e("AsrService", "Failed to send audio data") + if (isRecording.get()) { + connectWebSocket() + } + } + } catch (e: Exception) { + Log.e("AsrService", "Error sending audio data", e) + if (isRecording.get()) { + connectWebSocket() + } + } + } + } else if (read < 0) { + Log.e("AsrService", "Error reading audio data: $read") + return@Thread + } else { + Thread.sleep(10) + } + } catch (e: Exception) { + Log.e("AsrService", "Error in recording thread", e) + stopAsr() + return@Thread + } + } + }.apply { start() } + + } catch (e: Exception) { + Log.e("AsrService", "Error starting ASR", e) + stopAsr() + } + } + + private fun stopAsr() { + Log.d("stopAsr", "Stopping ASR service...") + + // 重置状态码重试计数 + statusCodeRetryCount = 0 + + // 放弃音频焦点 + abandonAudioFocus() + + // 恢复音频设置 + audioManager?.mode = AudioManager.MODE_NORMAL + audioManager?.isSpeakerphoneOn = true + + // 1. 停止录音线程 + isRecording.set(false) + + // 2. 等待录音线程结束 + recordingThread?.join(1000) // 最多等待1秒 + recordingThread = null + + // 3. 停止音频录制 + try { + audioRecord?.stop() + audioRecord?.release() + } catch (e: Exception) { + Log.e("stopAsr", "Error stopping audio record", e) + } + audioRecord = null + + // 4. 关闭 WebSocket 连接 + try { + webSocket?.close(1000, "Normal closure") + // 等待一小段时间确保连接完全关闭 + Thread.sleep(100) + } catch (e: Exception) { + Log.e("stopAsr", "Error closing WebSocket", e) + } + webSocket = null + + // 5. 重置所有状态 + isInitialized.set(false) + sequence = 0 + retryCount = 0 + lastVoiceTime = 0L + silenceDuration = 0L + + // 6. 停止前台服务 + stopForeground(true) + stopSelf() + + Log.d("stopAsr", "ASR service stopped successfully") + } + + private fun createNotificationChannel() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val channel = NotificationChannel( + CHANNEL_ID, + "ASR Service", + NotificationManager.IMPORTANCE_LOW + ) + val notificationManager = getSystemService(NotificationManager::class.java) + notificationManager.createNotificationChannel(channel) + } + } + + private fun createNotification(): Notification { + return NotificationCompat.Builder(this, CHANNEL_ID) + .setContentTitle("语音识别服务") + .setContentText("正在识别语音") + .setSmallIcon(android.R.drawable.stat_notify_call_mute) + .setForegroundServiceBehavior(NotificationCompat.FOREGROUND_SERVICE_IMMEDIATE) + .build() + } + + override fun onDestroy() { + super.onDestroy() + // 确保在服务销毁时清理 MediaPlayer + cleanupMediaPlayer(mediaPlayer) + stopAsr() + } + + override fun onBind(intent: Intent?): IBinder? = null + + private fun sendAudioOnlyRequest(audioData: ByteArray, isLast: Boolean): ByteArray { + sequence++ + val messageTypeSpecificFlags = if (isLast) NEG_WITH_SEQUENCE else POS_SEQUENCE + val header = getHeader(AUDIO_ONLY_REQUEST, messageTypeSpecificFlags, JSON, GZIP, 0) + val seqBytes = intToBytes(if (isLast) -sequence else sequence) + val payloadBytes = gzipCompress(audioData) + val payloadSize = intToBytes(payloadBytes.size) + + val audioOnlyRequest = ByteArray(header.size + seqBytes.size + payloadSize.size + payloadBytes.size) + var destPos = 0 + System.arraycopy(header, 0, audioOnlyRequest, destPos, header.size) + destPos += header.size + System.arraycopy(seqBytes, 0, audioOnlyRequest, destPos, seqBytes.size) + destPos += seqBytes.size + System.arraycopy(payloadSize, 0, audioOnlyRequest, destPos, payloadSize.size) + destPos += payloadSize.size + System.arraycopy(payloadBytes, 0, audioOnlyRequest, destPos, payloadBytes.size) + + return audioOnlyRequest + } + + private fun playAudio(base64Audio: String) { + try { + // 设置状态为正在播放音频 + isPlayingAudio = true + isProcessingRequest = true + + // 保存原始音量 + originalVolume = audioManager?.getStreamVolume(AudioManager.STREAM_VOICE_CALL) ?: 0 + + // 设置音频路由为听筒 + audioManager?.mode = AudioManager.MODE_IN_COMMUNICATION + audioManager?.isSpeakerphoneOn = false + + // 降低音量 + val maxVolume = audioManager?.getStreamMaxVolume(AudioManager.STREAM_VOICE_CALL) ?: 0 + val targetVolume = (maxVolume * VOLUME_REDUCTION_FACTOR).toInt() + audioManager?.setStreamVolume(AudioManager.STREAM_VOICE_CALL, targetVolume, 0) + + // 解码 base64 音频数据 + val audioBytes = Base64.decode(base64Audio, Base64.DEFAULT) + + // 创建临时文件 + val tempFile = File(tempAudioDir, "temp_audio_${System.currentTimeMillis()}.mp3") + FileOutputStream(tempFile).use { outputStream -> + outputStream.write(audioBytes) + } + + // 释放之前的 MediaPlayer 实例 + cleanupMediaPlayer(mediaPlayer) + + // 创建新的 MediaPlayer 实例 + mediaPlayer = MediaPlayer().apply { + setAudioAttributes( + AudioAttributes.Builder() + .setUsage(AudioAttributes.USAGE_VOICE_COMMUNICATION) // 使用语音通信用途 + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) // 内容类型为语音 + .setFlags(AudioAttributes.FLAG_AUDIBILITY_ENFORCED) + .build() + ) + setDataSource(tempFile.absolutePath) + prepareAsync() + + // 设置准备完成监听器 + setOnPreparedListener { mp -> + try { + mp.start() + Log.d("AsrService", "Audio playback started with reduced volume") + } catch (e: Exception) { + Log.e("AsrService", "Error starting audio playback", e) + cleanupMediaPlayer(mp) + } + } + + // 设置错误监听器 + setOnErrorListener { mp, what, extra -> + Log.e("AsrService", "MediaPlayer error: what=$what, extra=$extra") + cleanupMediaPlayer(mp) + true + } + + // 设置完成监听器 + setOnCompletionListener { mp -> + cleanupMediaPlayer(mp) + } + } + } catch (e: Exception) { + Log.e("AsrService", "Error playing audio", e) + cleanupMediaPlayer(mediaPlayer) + } + } + + private fun cleanupMediaPlayer(mp: MediaPlayer?) { + try { + mp?.apply { + if (isPlaying) { + stop() + } + reset() + release() + } + mediaPlayer = null + } catch (e: Exception) { + Log.e("AsrService", "Error cleaning up MediaPlayer", e) + } finally { + // 重置状态 + isPlayingAudio = false + isProcessingRequest = false + // 恢复音频路由设置 + audioManager?.mode = AudioManager.MODE_IN_COMMUNICATION + audioManager?.isSpeakerphoneOn = false + // 恢复原始音量 + audioManager?.setStreamVolume(AudioManager.STREAM_VOICE_CALL, originalVolume, 0) + } + } + + private fun definate(text: String) { + + LAST_RESPONSE_TIME = System.currentTimeMillis(); + webSocket?.close(1000, "Normal closure") + silenceDuration = 0 + isSpeaking = false + webSocket = null + isInitialized.set(false) + isAlreadyDefinite = true + sequence = 0 + Log.d("stop", "stopAsr了") + + if (text.isNotEmpty()){ + sendTextAsr(text) + } + + } + + private fun sendTextAsr(text: String) { + // 使用协程在后台线程执行网络操作 + GlobalScope.launch(kotlinx.coroutines.Dispatchers.IO) { + try { + // 调用后端接口开始 + val jsonRequest = JSONObject().apply { + put("model", "bot-20241114164326-xlcc91") + put("stream", false) + put("messages", JSONArray().apply { + put(JSONObject().apply { + put("role", "user") + put("content", JSONArray().apply { + put(JSONObject().apply { + put("type", "text") + put("text", text) + }) + }) + }) + }) + } + + val request = Request.Builder() + .url(UPLOAD_URL.format(serverIp)) + .post(jsonRequest.toString().toRequestBody("application/json".toMediaTypeOrNull())) + .addHeader("X-Context-Id", contextId) + .addHeader("Connection", "close") + .build() + Log.d("AsrRequest", "AsrRequest: " + contextId) + + val response = okHttpClient.newCall(request).execute() + val responseCode = response.code + Log.d("TAG", "Upload response code: $responseCode") + + if (!response.isSuccessful) { + Log.e("TAG", "Upload failed with response code: $responseCode") + val errorBody = response.body?.string() + Log.e("TAG", "Error response: $errorBody") + isProcessingRequest = false + return@launch + } + + val responseBody = response.body?.string() + Log.d("TAG", "Success response received") + + // 解析响应并播放语音 + try { + val jsonResponse = JSONObject(responseBody) + val choices = jsonResponse.getJSONArray("choices") + if (choices.length() > 0) { + val firstChoice = choices.getJSONObject(0) + val message = firstChoice.getJSONObject("message") + val audio = message.optJSONObject("audio") + if (audio != null) { + val audioData = audio.optString("data") + if (audioData.isNotEmpty()) { + // 截断音频数据用于日志显示 + val truncatedAudio = + if (audioData.length > 20) { + audioData.substring(0, 20) + "..." + } else { + audioData + } + Log.d( + "TAG", + "Audio data length: ${audioData.length}, content: $truncatedAudio" + ) + // 在主线程播放音频 + withContext(kotlinx.coroutines.Dispatchers.Main) { + playAudio(audioData) + } + } else { + // 如果没有音频数据,直接重置处理状态 + isProcessingRequest = false + } + } else { + // 如果没有音频对象,直接重置处理状态 + isProcessingRequest = false + } + } else { + // 如果没有选择,直接重置处理状态 + isProcessingRequest = false + } + } catch (e: Exception) { + Log.e("AsrService", "Error parsing response", e) + isProcessingRequest = false + } + } catch (e: Exception) { + Log.e("AsrService", "Error in sendTextAsr", e) + isProcessingRequest = false + } + } + } + + private fun parseResponse(res: ByteArray): Int { + if (res.isEmpty()) { + return -1 + } + + val num = 0b00001111 + val result = HashMap() + + val protocolVersion = (res[0].toInt() ushr 4) and num + result["protocol_version"] = protocolVersion + val headerSize = res[0].toInt() and 0x0f + result["header_size"] = headerSize + + val messageType = (res[1].toInt() ushr 4) and num + result["message_type"] = messageType + val messageTypeSpecificFlags = res[1].toInt() and 0x0f + result["message_type_specific_flags"] = messageTypeSpecificFlags + val serializationMethod = res[2].toInt() ushr num + result["serialization_method"] = serializationMethod + val messageCompression = res[2].toInt() and 0x0f + result["message_compression"] = messageCompression + val reserved = res[3] + result["reserved"] = reserved + + val temp = ByteArray(4) + System.arraycopy(res, 4, temp, 0, temp.size) + val sequence = bytesToInt(temp) + + Log.i("hg", "seq: "+sequence) + + if (sequence == 45000081){ + definate(currentText) + return sequence + } + if(sequence < 0){ + Log.i("AsrService", "Received status code 45000081, reconnecting...") + // 关闭当前连接 + webSocket?.close(1000, "Reconnecting due to status code -") + // 重置状态 + isInitialized.set(false) + isConnecting.set(false) + isSpeaking = false + return sequence + } + + System.arraycopy(res, 8, temp, 0, temp.size) + val payloadSize = bytesToInt(temp) + val payload = ByteArray(res.size - 12) + System.arraycopy(res, 12, payload, 0, payload.size) + + Log.i("AsrService", "messageType: $messageType") + + if (messageType == FULL_SERVER_RESPONSE || messageType == SERVER_ACK) { + val payloadStr = if (messageCompression == GZIP) { + String(gzipDecompress(payload)) + } else { + String(payload) + } + Log.d("AsrService", "Payload: $payloadStr") + result["payload_size"] = payloadSize + Log.d("AsrService", "Response: ${Gson().toJson(result)}") + + try { + val jsonResponse = JSONObject(payloadStr) + + // 检查是否是初始化响应 + if (messageType == FULL_SERVER_RESPONSE) { + Log.d("AsrService", "Received server response") + isInitialized.set(true) + } + + var text = "" + // 检查是否有识别结果 + if (jsonResponse.has("result")) { + // 只要有结果,就应该设置isSpeak = false + val result = jsonResponse.getJSONObject("result") + + if (result.has("text")) { + text = result.getString("text") + if (text.isNotEmpty()) { + Log.d("AsrService", "Recognized text: $text") + currentText = text + lastAsrTime = System.currentTimeMillis() + } else { + Log.d("AsrService", "Empty text in response") + } + } else { + Log.d("AsrService", "No text field in result") + } + if(jsonResponse.getJSONObject("result").has("utterances") && + jsonResponse.getJSONObject("result").getJSONArray("utterances").length()>0){ + var definiteFlag = jsonResponse.getJSONObject("result") + .getJSONArray("utterances") + .getJSONObject(0) + .getBoolean("definite") + if(definiteFlag){ + definate(text) + } + } + } else { + Log.d("AsrService", "No result field in response") + } + } catch (e: Exception) { + Log.e("AsrService", "Error parsing ASR result", e) + } + } + + return sequence + } + + private fun bytesToInt(src: ByteArray): Int { + if (src.size != 4) { + throw IllegalArgumentException("Invalid byte array size") + } + return ((src[0].toInt() and 0xFF) shl 24) or + ((src[1].toInt() and 0xFF) shl 16) or + ((src[2].toInt() and 0xFF) shl 8) or + (src[3].toInt() and 0xFF) + } + + private fun gzipDecompress(src: ByteArray): ByteArray { + val out = ByteArrayOutputStream() + val ins = ByteArrayInputStream(src) + val gzip = GZIPInputStream(ins) + val buffer = ByteArray(ins.available()) + var len = 0 + while (gzip.read(buffer).also { len = it } > 0) { + out.write(buffer, 0, len) + } + out.close() + return out.toByteArray() + } +} diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt new file mode 100644 index 00000000..e2d8ecc2 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/service/ScreenshotService.kt @@ -0,0 +1,460 @@ +package com.example.android.service +import javax.net.ssl.HttpsURLConnection + +import android.app.Notification +import android.app.NotificationChannel +import android.app.NotificationManager +import android.app.Service +import android.content.Intent +import android.graphics.PixelFormat +import android.hardware.display.DisplayManager +import android.hardware.display.VirtualDisplay +import android.media.ImageReader +import android.media.MediaPlayer +import android.media.projection.MediaProjection +import android.media.projection.MediaProjectionManager +import android.os.Build +import android.os.Handler +import android.os.IBinder +import android.os.Looper +import android.util.Base64 +import android.util.DisplayMetrics +import android.util.Log +import android.view.WindowManager +import androidx.core.app.NotificationCompat +import okhttp3.MediaType.Companion.toMediaTypeOrNull +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody.Companion.toRequestBody +import okhttp3.logging.HttpLoggingInterceptor +import org.json.JSONObject +import org.json.JSONArray +import java.io.ByteArrayOutputStream +import java.io.File +import java.util.concurrent.TimeUnit +import javax.net.ssl.SSLContext +import javax.net.ssl.TrustManager +import javax.net.ssl.X509TrustManager +import java.security.cert.X509Certificate +import android.graphics.Bitmap + +class ScreenshotService : Service() { + companion object { + const val ACTION_START = "com.example.android.action.START_SCREENSHOT" + const val ACTION_STOP = "com.example.android.action.STOP_SCREENSHOT" + const val EXTRA_RESULT_CODE = "result_code" + const val EXTRA_RESULT_DATA = "result_data" + const val SERVER_IP = "server_ip" + const val CONTEXT_ID = "context_id" + private const val NOTIFICATION_ID = 1 + private const val CHANNEL_ID = "screenshot_channel" + private const val TAG = "ScreenshotService" + private const val UPLOAD_URL = "http://%s/api/v3/bots/chat/completions" + private const val SCREENSHOT_INTERVAL = 3000L + private const val VIRTUAL_DISPLAY_NAME = "ScreenshotService" + private const val MAX_UPLOAD_RETRIES = 3 + private const val UPLOAD_RETRY_DELAY = 1000L + } + + private var resultCode: Int = 0 + private var resultData: Intent? = null + private var imageReader: ImageReader? = null + private var virtualDisplay: VirtualDisplay? = null + private var mediaProjection: MediaProjection? = null + private val handler = Handler(Looper.getMainLooper()) + private var isCapturing = false + private var isProcessingRequest = false + private var isPlayingAudio = false + private var screenWidth = 0 + private var screenHeight = 0 + private var screenDensity = 0 + private var serverIp = "" + private var contextId = "" + private val okHttpClient: OkHttpClient by lazy { + val trustAllCerts = arrayOf(object : X509TrustManager { + override fun getAcceptedIssuers(): Array = arrayOf() + override fun checkClientTrusted(chain: Array, authType: String) {} + override fun checkServerTrusted(chain: Array, authType: String) {} + }) + + val sslContext = SSLContext.getInstance("TLS") + sslContext.init(null, trustAllCerts, java.security.SecureRandom()) + + val loggingInterceptor = HttpLoggingInterceptor().apply { + setLevel(HttpLoggingInterceptor.Level.BODY) + } + + OkHttpClient.Builder() + .sslSocketFactory(sslContext.socketFactory, trustAllCerts[0] as X509TrustManager) + .hostnameVerifier { _, _ -> true } + .addInterceptor(loggingInterceptor) + .connectTimeout(30, TimeUnit.SECONDS) + .readTimeout(30, TimeUnit.SECONDS) + .writeTimeout(30, TimeUnit.SECONDS) + .build() + } + + private val mediaProjectionCallback = object : MediaProjection.Callback() { + override fun onStop() { + Log.d(TAG, "MediaProjection stopped") + cleanup() + stopSelf() + } + } + + private var mediaPlayer: MediaPlayer? = null + private val tempAudioDir: File by lazy { + File(cacheDir, "temp_audio").apply { + if (!exists()) { + mkdirs() + } + } + } + + init { + try { + // 配置信任所有证书(仅用于开发环境) + val trustAllCerts = arrayOf(object : X509TrustManager { + override fun getAcceptedIssuers(): Array = arrayOf() + override fun checkClientTrusted(chain: Array, authType: String) {} + override fun checkServerTrusted(chain: Array, authType: String) {} + }) + + val sslContext = SSLContext.getInstance("TLS") + sslContext.init(null, trustAllCerts, java.security.SecureRandom()) + + // 设置默认的 SSL 套接字工厂 + HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.socketFactory) + + // 设置主机名验证器 + HttpsURLConnection.setDefaultHostnameVerifier { _, _ -> true } + + // 设置系统属性以允许不安全的 SSL + System.setProperty("https.protocols", "TLSv1.2") + System.setProperty("javax.net.ssl.trustStore", "NONE") + System.setProperty("javax.net.ssl.trustStoreType", "BKS") + } catch (e: Exception) { + Log.e(TAG, "Error initializing SSL context", e) + } + } + + override fun onCreate() { + super.onCreate() + Log.d(TAG, "Service created") + createNotificationChannel() + startForeground(NOTIFICATION_ID, createNotification()) + } + + override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { + Log.i("intent", intent?.getStringExtra(SERVER_IP).toString()) + Log.d(TAG, "Service started with action: ${intent?.action}") + when (intent?.action) { + ACTION_START -> { + resultCode = intent.getIntExtra(EXTRA_RESULT_CODE, 0) + resultData = intent.getParcelableExtra(EXTRA_RESULT_DATA) + serverIp = intent.getStringExtra(SERVER_IP) ?: "" + contextId = intent.getStringExtra(CONTEXT_ID) ?: "" + if (resultData != null) { + Log.d(TAG, "Starting screenshot capture") + setupScreenCapture() + startPeriodicScreenshot() + } else { + Log.e(TAG, "Result data is null") + } + } + ACTION_STOP -> { + Log.d(TAG, "Stopping screenshot capture") + cleanup() + stopSelf() + } + } + return START_NOT_STICKY + } + + private fun setupScreenCapture() { + try { + val windowManager = getSystemService(WINDOW_SERVICE) as WindowManager + val metrics = DisplayMetrics() + windowManager.defaultDisplay.getMetrics(metrics) + screenWidth = metrics.widthPixels + screenHeight = metrics.heightPixels + screenDensity = metrics.densityDpi + + // 只在这里创建一次 ImageReader + if (imageReader == null) { + imageReader = ImageReader.newInstance(screenWidth, screenHeight, PixelFormat.RGBA_8888, 2) + imageReader?.setOnImageAvailableListener({ reader -> + // 再次检查状态,确保没有正在处理的请求和播放的音频 + if (isProcessingRequest || isPlayingAudio) { + Log.d(TAG, "Skipping image processing: request in progress or audio playing") + return@setOnImageAvailableListener + } + + val image = reader.acquireLatestImage() + if (image != null) { + try { + Log.d(TAG, "Processing captured image") + val planes = image.planes + val buffer = planes[0].buffer + val pixelStride = planes[0].pixelStride + val rowStride = planes[0].rowStride + val rowPadding = rowStride - pixelStride * screenWidth + + val bitmap = Bitmap.createBitmap( + screenWidth + rowPadding / pixelStride, + screenHeight, + Bitmap.Config.ARGB_8888 + ) + bitmap.copyPixelsFromBuffer(buffer) + + // 裁剪掉多余的部分 + val croppedBitmap = Bitmap.createBitmap( + bitmap, + 0, + 0, + screenWidth, + screenHeight + ) + + // 保存截图 + val outputStream = ByteArrayOutputStream() + croppedBitmap.compress(Bitmap.CompressFormat.JPEG, 100, outputStream) + val imageBytes = outputStream.toByteArray() + Log.d(TAG, "Image captured, size: ${imageBytes.size} bytes") + + // 上传截图 + uploadScreenshot(imageBytes) + + // 释放资源 + croppedBitmap.recycle() + bitmap.recycle() + } finally { + image.close() + } + } + }, handler) + } + } catch (e: Exception) { + Log.e(TAG, "Error setting up screen capture", e) + cleanup() + stopSelf() + } + } + + private fun takeScreenshot() { + try { + // 双重检查,确保没有正在处理的请求和播放的音频 + if (isProcessingRequest || isPlayingAudio) { + Log.d(TAG, "Skipping screenshot: request in progress or audio playing") + return + } + + val imageReader = imageReader ?: run { + Log.e(TAG, "ImageReader is null") + return + } + // 触发一次图像捕获 + imageReader.acquireLatestImage()?.close() + Log.d(TAG, "Screenshot triggered") + } catch (e: Exception) { + Log.e(TAG, "Error triggering screenshot", e) + } + } + + private fun startPeriodicScreenshot() { + if (isCapturing) return + isCapturing = true + + // 确保 ImageReader 和 VirtualDisplay 都已创建 + setupScreenCapture() + createVirtualDisplay() + + if (virtualDisplay == null) { + Log.e(TAG, "Failed to create VirtualDisplay") + cleanup() + stopSelf() + return + } + + // 使用 Handler 替代 Timer,这样可以更好地控制请求间隔 + handler.post(object : Runnable { + override fun run() { + if (!isCapturing) return + + // 只有在没有正在处理的请求时,才进行新的截图 + if (!isProcessingRequest) { + takeScreenshot() + } + + // 无论是否处理了截图,都按固定间隔继续检查 + handler.postDelayed(this, SCREENSHOT_INTERVAL) + } + }) + Log.d(TAG, "Periodic screenshot started") + } + + private fun createVirtualDisplay() { + try { + val mediaProjection = getMediaProjection() + if (virtualDisplay == null && imageReader != null) { + virtualDisplay = mediaProjection?.createVirtualDisplay( + VIRTUAL_DISPLAY_NAME, + screenWidth, + screenHeight, + screenDensity, + DisplayManager.VIRTUAL_DISPLAY_FLAG_AUTO_MIRROR, + imageReader?.surface, + null, + handler + ) + Log.d(TAG, "VirtualDisplay created successfully") + } + } catch (e: Exception) { + Log.e(TAG, "Error creating virtual display", e) + stopSelf() + } + } + + private fun getMediaProjection(): MediaProjection? { + if (mediaProjection == null) { + mediaProjection = (getSystemService(MEDIA_PROJECTION_SERVICE) as MediaProjectionManager) + .getMediaProjection(resultCode, resultData!!) + mediaProjection?.registerCallback(mediaProjectionCallback, handler) + } + return mediaProjection + } + + private fun cleanup() { + try { + Log.d(TAG, "Cleaning up resources") + handler.removeCallbacksAndMessages(null) // 移除所有待处理的回调 + virtualDisplay?.release() + virtualDisplay = null + imageReader?.close() + imageReader = null + mediaProjection?.unregisterCallback(mediaProjectionCallback) + mediaProjection = null + } catch (e: Exception) { + Log.e(TAG, "Error during cleanup", e) + } finally { + isCapturing = false + isProcessingRequest = false + isPlayingAudio = false + } + } + + private fun createNotificationChannel() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val channel = NotificationChannel( + CHANNEL_ID, + "Screenshot Service", + NotificationManager.IMPORTANCE_LOW + ) + val notificationManager = getSystemService(NotificationManager::class.java) + notificationManager.createNotificationChannel(channel) + } + } + + private fun createNotification(): Notification { + return NotificationCompat.Builder(this, CHANNEL_ID) + .setContentTitle("截图服务") + .setContentText("正在处理截图") + .setSmallIcon(android.R.drawable.stat_notify_call_mute) + .setForegroundServiceBehavior(NotificationCompat.FOREGROUND_SERVICE_IMMEDIATE) + .build() + } + + override fun onDestroy() { + super.onDestroy() + Log.d(TAG, "Service destroyed") + cleanup() + mediaPlayer?.release() + mediaPlayer = null + // 清理临时音频文件 + tempAudioDir.listFiles()?.forEach { it.delete() } + } + + override fun onBind(intent: Intent?): IBinder? = null + + private fun uploadScreenshot(imageBytes: ByteArray) { + Thread { + var retryCount = 0 + var success = false + + while (retryCount < MAX_UPLOAD_RETRIES && !success) { + try { + // 设置状态为正在处理请求 + isProcessingRequest = true + Log.d(TAG, "Starting upload to $UPLOAD_URL, attempt ${retryCount + 1}") + val base64Image = Base64.encodeToString(imageBytes, Base64.NO_WRAP) + + // 截断 base64 图像数据用于日志显示 + val truncatedImage = if (base64Image.length > 20) { + base64Image.substring(0, 20) + "..." + } else { + base64Image + } + Log.d(TAG, "Image data length: ${base64Image.length}, content: $truncatedImage") + + val jsonRequest = JSONObject().apply { + put("model", "bot-20241114164326-xlcc91") + put("stream", false) + put("messages", JSONArray().apply { + put(JSONObject().apply { + put("role", "user") + put("content", JSONArray().apply { + put(JSONObject().apply { + put("type", "text") + put("text", "") + }) + put(JSONObject().apply { + put("type", "image_url") + put("image_url", JSONObject().apply { + put("url", "data:image/jpeg;base64,$base64Image") + }) + }) + }) + }) + }) + } + + val request = Request.Builder() + .url(UPLOAD_URL.format(serverIp)) + .post(jsonRequest.toString().toRequestBody("application/json".toMediaTypeOrNull())) + .addHeader("X-Context-Id", contextId) + .addHeader("Connection", "close") + .build() + + Log.d("ScreenshotService", "ScreenshotService: " + contextId) + val response = okHttpClient.newCall(request).execute() + val responseCode = response.code + + if (response.isSuccessful) { + Log.d(TAG, "Upload successful") + success = true + } else { + Log.e(TAG, "Upload failed with response code: $responseCode") + val errorBody = response.body?.string() + Log.e(TAG, "Error response: $errorBody") + retryCount++ + if (retryCount < MAX_UPLOAD_RETRIES) { + Thread.sleep(UPLOAD_RETRY_DELAY) + } + } + } catch (e: Exception) { + Log.e(TAG, "Error uploading screenshot", e) + retryCount++ + if (retryCount < MAX_UPLOAD_RETRIES) { + Thread.sleep(UPLOAD_RETRY_DELAY) + } + } finally { + isProcessingRequest = false + } + } + + if (!success) { + Log.e(TAG, "Failed to upload screenshot after $MAX_UPLOAD_RETRIES attempts") + } + }.start() + } +} diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt new file mode 100644 index 00000000..9597aba2 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Color.kt @@ -0,0 +1,11 @@ +package com.example.android.ui.theme + +import androidx.compose.ui.graphics.Color + +val Purple80 = Color(0xFFD0BCFF) +val PurpleGrey80 = Color(0xFFCCC2DC) +val Pink80 = Color(0xFFEFB8C8) + +val Purple40 = Color(0xFF6650a4) +val PurpleGrey40 = Color(0xFF625b71) +val Pink40 = Color(0xFF7D5260) \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt new file mode 100644 index 00000000..9044a9ce --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Theme.kt @@ -0,0 +1,49 @@ +package com.example.android.ui.theme + +import android.app.Activity +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.MaterialTheme +import androidx.compose.material3.darkColorScheme +import androidx.compose.material3.dynamicDarkColorScheme +import androidx.compose.material3.dynamicLightColorScheme +import androidx.compose.material3.lightColorScheme +import androidx.compose.runtime.Composable +import androidx.compose.ui.platform.LocalContext + +private val DarkColorScheme = darkColorScheme( + primary = Purple80, + secondary = PurpleGrey80, + tertiary = Pink80 +) + +private val LightColorScheme = lightColorScheme( + primary = Purple40, + secondary = PurpleGrey40, + tertiary = Pink40 + +) + +@Composable +fun AndroidTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + // Dynamic color is available on Android 12+ + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography, + content = content + ) +} \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt new file mode 100644 index 00000000..a7717539 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/ui/theme/Type.kt @@ -0,0 +1,18 @@ +package com.example.android.ui.theme + +import androidx.compose.material3.Typography +import androidx.compose.ui.text.TextStyle +import androidx.compose.ui.text.font.FontFamily +import androidx.compose.ui.text.font.FontWeight +import androidx.compose.ui.unit.sp + +// Set of Material typography styles to start with +val Typography = Typography( + bodyLarge = TextStyle( + fontFamily = FontFamily.Default, + fontWeight = FontWeight.Normal, + fontSize = 16.sp, + lineHeight = 24.sp, + letterSpacing = 0.5.sp + ) +) \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt b/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt new file mode 100644 index 00000000..a805a9c3 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/java/com/example/android/utils/PermissionUtils.kt @@ -0,0 +1,17 @@ +package com.example.android.utils + +import android.content.Context +import android.content.pm.PackageManager + +object PermissionUtils { + private const val TAG = "PermissionUtils" + + fun hasAllPermissions(context: Context): Boolean { + return context.checkSelfPermission(android.Manifest.permission.RECORD_AUDIO) == + PackageManager.PERMISSION_GRANTED + } + + fun getPermissionMessage(context: Context): String { + return "需要录音权限才能使用语音识别功能" + } +} \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 00000000..07d5da9c --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml new file mode 100644 index 00000000..2b068d11 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/drawable/ic_launcher_foreground.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 00000000..036d09bc --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 00000000..036d09bc --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp new file mode 100644 index 00000000..e98979ec Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..e46f31b7 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp new file mode 100644 index 00000000..b572245c Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp new file mode 100644 index 00000000..0ecbad5f Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..4ba2aa80 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp new file mode 100644 index 00000000..c7cb8106 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp new file mode 100644 index 00000000..b15f8179 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..b3ce67e6 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp new file mode 100644 index 00000000..0dfe4ca5 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp new file mode 100644 index 00000000..01666e0a Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..a435f8cf Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp new file mode 100644 index 00000000..27e9d377 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp new file mode 100644 index 00000000..dcc5c2a2 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp new file mode 100644 index 00000000..9156af0d Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp new file mode 100644 index 00000000..d9e9a380 Binary files /dev/null and b/demohouse/hgdoll/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp differ diff --git a/demohouse/hgdoll/android/app/src/main/res/values/colors.xml b/demohouse/hgdoll/android/app/src/main/res/values/colors.xml new file mode 100644 index 00000000..f8c6127d --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/values/colors.xml @@ -0,0 +1,10 @@ + + + #FFBB86FC + #FF6200EE + #FF3700B3 + #FF03DAC5 + #FF018786 + #FF000000 + #FFFFFFFF + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml b/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml new file mode 100644 index 00000000..580fcb50 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/values/ic_launcher_background.xml @@ -0,0 +1,4 @@ + + + #88B0EF + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/values/strings.xml b/demohouse/hgdoll/android/app/src/main/res/values/strings.xml new file mode 100644 index 00000000..0a7c39b8 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/values/strings.xml @@ -0,0 +1,12 @@ + + + HGDoll + 截图服务 + 用于显示截图服务的通知 + 正在运行截图服务 + 点击停止截图服务 + 语音识别服务 + 用于显示语音识别服务的通知 + 正在运行语音识别服务 + 点击停止语音识别服务 + \ No newline at end of file diff --git a/demohouse/hgdoll/android/app/src/main/res/values/themes.xml b/demohouse/hgdoll/android/app/src/main/res/values/themes.xml new file mode 100644 index 00000000..bc6743f3 --- /dev/null +++ b/demohouse/hgdoll/android/app/src/main/res/values/themes.xml @@ -0,0 +1,5 @@ + + + +