Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
f93c9d3
make qwen model configurable
haixuanTao Jul 23, 2025
f7eddb2
Default to llama cpp
haixuanTao Jul 23, 2025
3484287
Make whisper better by making it output punctuation
haixuanTao Jul 29, 2025
d36159f
Making command public
haixuanTao Jul 30, 2025
c35bc70
Add dora-openai-websocket
haixuanTao Jul 30, 2025
b33950e
Add an example using whisper
haixuanTao Jul 30, 2025
f48a342
Minor fix and improvements
haixuanTao Jul 30, 2025
d6edb85
Improvement of the overall language pipeline to be more resilient
haixuanTao Aug 1, 2025
ab678cd
make tracing optional
haixuanTao Aug 1, 2025
b18d816
improve dataflow configuration for whisper
haixuanTao Aug 1, 2025
456796e
Improved readme
haixuanTao Aug 2, 2025
34233ce
add dora install within the readme
haixuanTao Aug 2, 2025
78c2c7c
Use quantized model instead of fp16 for faster response and lower mem…
haixuanTao Aug 2, 2025
b8c714e
Adding model selection
haixuanTao Aug 2, 2025
1de576c
Add en-core-web-sm as kokoro depedency
haixuanTao Aug 10, 2025
ef9db39
Add history to the conversation
haixuanTao Aug 10, 2025
c1edb29
Add speech started topic
haixuanTao Aug 10, 2025
ab0a29d
Improve speech detection messages
haixuanTao Aug 15, 2025
d96ce3a
Improve chinese support
haixuanTao Aug 15, 2025
09a6af4
Make server host and port configurable
haixuanTao Aug 28, 2025
e531c1b
Add dora-openai-websocket as build specification
haixuanTao Aug 28, 2025
f5017d2
Make openai websocket server a pip library
haixuanTao Aug 29, 2025
167604a
pushing whisper-1-template
haixuanTao Aug 29, 2025
012facb
Use pip instead of cargo
haixuanTao Aug 29, 2025
f4c08f3
Fixing formatting
haixuanTao Aug 29, 2025
7b2aa7c
Remove licensing
haixuanTao Aug 29, 2025
2d66081
Accumulates audio in vad to not get over saturated by audio chunk
haixuanTao Sep 9, 2025
3541f18
Remove 24khz downsampling
haixuanTao Sep 9, 2025
8a0a6b6
Fix template with tool call
haixuanTao Sep 10, 2025
a7b21dd
Add max token
haixuanTao Sep 10, 2025
6e3fa62
Add better responsedone output
haixuanTao Sep 10, 2025
047a1ea
fix whisper to support tool call
haixuanTao Sep 10, 2025
34e88db
add qwen tooling
haixuanTao Sep 10, 2025
007af2f
fix tool call and remove tool call on function output
haixuanTao Sep 11, 2025
bc38f19
Revert whisper logic
haixuanTao Sep 16, 2025
feb53e2
use $ sign escaping in tool call
haixuanTao Sep 17, 2025
fa192c3
Readding ... for better improving whisper
haixuanTao Sep 17, 2025
aa0a60c
Remove queue from whisper
haixuanTao Sep 18, 2025
38e8adf
Adding system prompt into dataflow
haixuanTao Sep 20, 2025
20de6af
Improve websocket support
haixuanTao Sep 24, 2025
5edad66
annotate item_id with unique value
haixuanTao Sep 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
401 changes: 372 additions & 29 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ members = [
"node-hub/dora-rerun",
"node-hub/terminal-print",
"node-hub/openai-proxy-server",
"node-hub/dora-openai-websocket",
"node-hub/dora-kit-car",
"node-hub/dora-object-to-pose",
"node-hub/dora-mistral-rs",
Expand Down
1 change: 1 addition & 0 deletions binaries/cli/src/command/coordinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use dora_tracing::TracingBuilder;
use eyre::Context;
use std::net::{IpAddr, SocketAddr};
use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter;

#[derive(Debug, clap::Args)]
Expand Down
1 change: 1 addition & 0 deletions binaries/cli/src/command/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use std::{
path::PathBuf,
};
use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter;

#[derive(Debug, clap::Args)]
Expand Down
32 changes: 16 additions & 16 deletions binaries/cli/src/command/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ mod up;

pub use run::run_func;

use build::Build;
use check::Check;
use coordinator::Coordinator;
use daemon::Daemon;
use destroy::Destroy;
use eyre::Context;
use graph::Graph;
use list::ListArgs;
use logs::LogsArgs;
use new::NewArgs;
use run::Run;
use runtime::Runtime;
use self_::SelfSubCommand;
use start::Start;
use stop::Stop;
use up::Up;
pub use build::Build;
pub use check::Check;
pub use coordinator::Coordinator;
pub use daemon::Daemon;
pub use destroy::Destroy;
pub use eyre::Context;
pub use graph::Graph;
pub use list::ListArgs;
pub use logs::LogsArgs;
pub use new::NewArgs;
pub use run::Run;
pub use runtime::Runtime;
pub use self_::SelfSubCommand;
pub use start::Start;
pub use stop::Stop;
pub use up::Up;

/// dora-rs cli client
#[derive(Debug, clap::Subcommand)]
Expand Down
1 change: 1 addition & 0 deletions binaries/cli/src/command/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::{
session::DataflowSession,
};
use dora_daemon::{Daemon, LogDestination, flume};
#[cfg(feature = "tracing")]
use dora_tracing::TracingBuilder;
use eyre::Context;
use tokio::runtime::Builder;
Expand Down
16 changes: 8 additions & 8 deletions binaries/cli/src/command/start/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,28 @@ mod attach;
pub struct Start {
/// Path to the dataflow descriptor file
#[clap(value_name = "PATH")]
dataflow: String,
pub dataflow: String,
/// Assign a name to the dataflow
#[clap(long)]
name: Option<String>,
pub name: Option<String>,
/// Address of the dora coordinator
#[clap(long, value_name = "IP", default_value_t = LOCALHOST)]
coordinator_addr: IpAddr,
pub coordinator_addr: IpAddr,
/// Port number of the coordinator control server
#[clap(long, value_name = "PORT", default_value_t = DORA_COORDINATOR_PORT_CONTROL_DEFAULT)]
coordinator_port: u16,
pub coordinator_port: u16,
/// Attach to the dataflow and wait for its completion
#[clap(long, action)]
attach: bool,
pub attach: bool,
/// Run the dataflow in background
#[clap(long, action)]
detach: bool,
pub detach: bool,
/// Enable hot reloading (Python only)
#[clap(long, action)]
hot_reload: bool,
pub hot_reload: bool,
// Use UV to run nodes.
#[clap(long, action)]
uv: bool,
pub uv: bool,
}

impl Executable for Start {
Expand Down
2 changes: 1 addition & 1 deletion binaries/cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{
path::PathBuf,
};

mod command;
pub mod command;
mod common;
mod formatting;
pub mod output;
Expand Down
78 changes: 78 additions & 0 deletions examples/openai-realtime/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Dora-OpenAI-Realtime (ROOT Repo)

## Front End

### Build Client

```bash
git clone [email protected]:moxin-org/moly.git
cd moly
cargo build --release
```

### Run Client

```bash
cd moly
cargo run -r
```

## Server

### Build server

```bash
uv venv --seed -p 3.11
source .venv/bin/activate
uv pip install dora-rs-cli dora-rs
dora build whisper-template-metal.yml --uv ## very long process
```

### Run server

```bash
source .venv/bin/activate
dora up
cargo run --release -p dora-openai-websocket
```

## On finish

```bash
dora destroy
```

## GUI

- Go to MolyServer Tab
- Add a custom Provider
- In API Host, use:

- Name: dora-websocket
- API Host: ws://127.0.0.1:8123
- Type: OpenAI Realtime

- Then go to Chat Tab
- New Chat
- ( Make sure the servver is running with: `cargo run --release -p dora-openai-websocket`)
- On bottom right, click on 🎧 icon.
> If nothing happen is that the server is not found.
- Click on start
- Wait for the first AI greeting
- Start speaking!
- You should get AI response!

### WIP: Moyoyo

## {Recommended} Install git-lfs

```bash
brew install git-lfs # MacOS
```

## Clone Moxin Voice Chat

```bash
git lfs install
git clone https://github.com/moxin-org/moxin-voice-chat.git
```
71 changes: 71 additions & 0 deletions examples/openai-realtime/whisper-1-template-metal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
nodes:
- id: NODE_ID
build: pip install -e ../../node-hub/dora-openai-websocket
path: dynamic
inputs:
audio: tts/audio
transcript: stt/text
text: llm/text
speech_started: dora-vad/timestamp_start
speech_stopped: dora-vad/timestamp_end
outputs:
- audio
- text
- function_call_output

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio:
source: NODE_ID/audio
queue_size: 10
outputs:
- audio
- timestamp_start
- timestamp_end
env:
MIN_SPEECH_DURATION_MS: 500
MIN_SILENCE_DURATION_MS: 500
THRESHOLD: 0.6

- id: stt
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
audio:
source: dora-vad/audio
queue_size: 10
outputs:
- text
- word
- speech_started
env:
TARGET_LANGUAGE: english

- id: llm
build: pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:
text: stt/text
text_to_audio: NODE_ID/text
text_tool_response: NODE_ID/function_call_output
outputs:
- text
env:
MODEL_NAME_OR_PATH: LLM_ID
MODEL_FILE_PATTERN: "*[qQ]6_[kK].[gG][gG][uU][fF]"
MAX_TOKENS: 10_000
CONTEXT_SIZE: 10_000
TOOLS_JSON: |
TOOLS_ID
SYSTEM_PROMPT: |
SYSTEM_PROMPT_ID

- id: tts
build: pip install -e ../../node-hub/dora-kokoro-tts
path: dora-kokoro-tts
inputs:
text: llm/text
outputs:
- audio
70 changes: 70 additions & 0 deletions examples/openai-realtime/whisper-template-metal.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
nodes:
- id: NODE_ID
build: pip install -e ../../node-hub/dora-openai-websocket
path: dynamic
inputs:
audio: tts/audio
transcript: stt/text
text: llm/text
speech_started: dora-vad/timestamp_start
speech_stopped: dora-vad/timestamp_end
outputs:
- audio
- response.create
- function_call_output

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio:
source: NODE_ID/audio
queue_size: 3
outputs:
- audio
- timestamp_start
- timestamp_end
env:
MIN_SPEECH_DURATION_MS: 1000
MIN_SILENCE_DURATION_MS: 1000
THRESHOLD: 0.5

- id: stt
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
audio: dora-vad/audio
text_noise: llm/text
outputs:
- text
- word
- speech_started
env:
TARGET_LANGUAGE: english

- id: llm
build: pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:
text: stt/text
response.create: NODE_ID/response.create
text_tool_response: NODE_ID/function_call_output
outputs:
- text
env:
MODEL_NAME_OR_PATH: LLM_ID
MODEL_FILE_PATTERN: "*[qQ]6_[kK].[gG][gG][uU][fF]"
MAX_TOKENS: 30_000
CONTEXT_SIZE: 30_000
TOOLS_JSON: >
TOOLS_ID
SYSTEM_PROMPT: |
SYSTEM_PROMPT_ID

- id: tts
build: pip install -e ../../node-hub/dora-kokoro-tts
path: dora-kokoro-tts
inputs:
text: llm/text
outputs:
- audio
Loading