1- import { PreTrainedTokenizer } from ' https://cdn.jsdelivr.net/npm/@huggingface/[email protected] ' ; 1+ import { PreTrainedTokenizer } from " https://cdn.jsdelivr.net/npm/@huggingface/[email protected] " ; 22
33// import 'phonemize' code from Kokoro.js repo
4- import { phonemize } from ' ./phonemizer.js' ;
4+ import { phonemize } from " ./phonemizer.js" ;
55
66const { RawTensor, RawSession } = Supabase . ai ;
77
@@ -15,95 +15,97 @@ const SAMPLE_RATE = 24000; // 24 kHz
1515> The size of s and c is 256 × 1
1616*/
1717const STYLE_DIM = 256 ;
18- const MODEL_ID = ' onnx-community/Kokoro-82M-ONNX' ;
18+ const MODEL_ID = " onnx-community/Kokoro-82M-ONNX" ;
1919
2020// https://huggingface.co/onnx-community/Kokoro-82M-ONNX#samples
2121const ALLOWED_VOICES = [
22- ' af_bella' ,
23- ' af_nicole' ,
24- ' af_sarah' ,
25- ' af_sky' ,
26- ' am_adam' ,
27- ' am_michael' ,
28- ' bf_emma' ,
29- ' bf_isabella' ,
30- ' bm_george' ,
31- ' bm_lewis' ,
22+ " af_bella" ,
23+ " af_nicole" ,
24+ " af_sarah" ,
25+ " af_sky" ,
26+ " am_adam" ,
27+ " am_michael" ,
28+ " bf_emma" ,
29+ " bf_isabella" ,
30+ " bm_george" ,
31+ " bm_lewis" ,
3232] ;
3333
3434const session = await RawSession . fromHuggingFace ( MODEL_ID ) ;
3535
3636Deno . serve ( async ( req ) => {
37- const params = new URL ( req . url ) . searchParams ;
38- const text = params . get ( ' text' ) ?? ' Hello from Supabase!' ;
39- const voice = params . get ( ' voice' ) ?? ' af_bella' ;
40-
41- if ( ! ALLOWED_VOICES . includes ( voice ) ) {
42- return Response . json ( {
43- error : `invalid voice '${ voice } '` ,
44- must_be_one_of : ALLOWED_VOICES ,
45- } , { status : 400 } ) ;
46- }
47-
48- const tokenizer = await loadTokenizer ( ) ;
49- const language = voice . at ( 0 ) ; // 'a'merican | 'b'ritish
50- const phonemes = await phonemize ( text , language ) ;
51- const { input_ids } = tokenizer ( phonemes , {
52- truncation : true ,
53- } ) ;
54-
55- // Select voice style based on number of input tokens
56- const num_tokens = Math . max (
57- input_ids . dims . at ( - 1 ) - 2 , // Without padding;
58- 0 ,
59- ) ;
60-
61- const voiceStyle = await loadVoiceStyle ( voice , num_tokens ) ;
62-
63- const { waveform } = await session . run ( {
64- input_ids,
65- style : voiceStyle ,
66- speed : new Tensor ( ' float32' , [ 1 ] , [ 1 ] ) ,
67- } ) ;
68-
69- // Do `wave` encoding from rust backend
70- const audio = await waveform . tryEncodeAudio ( SAMPLE_RATE ) ;
71-
72- return new Response ( audio , {
73- headers : {
74- ' Content-Type' : ' audio/wav' ,
75- } ,
76- } ) ;
37+ const params = new URL ( req . url ) . searchParams ;
38+ const text = params . get ( " text" ) ?? " Hello from Supabase!" ;
39+ const voice = params . get ( " voice" ) ?? " af_bella" ;
40+
41+ if ( ! ALLOWED_VOICES . includes ( voice ) ) {
42+ return Response . json ( {
43+ error : `invalid voice '${ voice } '` ,
44+ must_be_one_of : ALLOWED_VOICES ,
45+ } , { status : 400 } ) ;
46+ }
47+
48+ const tokenizer = await loadTokenizer ( ) ;
49+ const language = voice . at ( 0 ) ; // 'a'merican | 'b'ritish
50+ const phonemes = await phonemize ( text , language ) ;
51+ const { input_ids } = tokenizer ( phonemes , {
52+ truncation : true ,
53+ } ) ;
54+
55+ // Select voice style based on number of input tokens
56+ const num_tokens = Math . max (
57+ input_ids . dims . at ( - 1 ) - 2 , // Without padding;
58+ 0 ,
59+ ) ;
60+
61+ const voiceStyle = await loadVoiceStyle ( voice , num_tokens ) ;
62+
63+ const { waveform } = await session . run ( {
64+ input_ids,
65+ style : voiceStyle ,
66+ speed : new Tensor ( " float32" , [ 1 ] , [ 1 ] ) ,
67+ } ) ;
68+
69+ // Do `wave` encoding from rust backend
70+ const audio = await waveform . tryEncodeAudio ( SAMPLE_RATE ) ;
71+
72+ return new Response ( audio , {
73+ headers : {
74+ " Content-Type" : " audio/wav" ,
75+ } ,
76+ } ) ;
7777} ) ;
7878
7979async function loadVoiceStyle ( voice : string , num_tokens : number ) {
80- const voice_url =
81- `https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices/${ voice } .bin?download=true` ;
80+ const voice_url =
81+ `https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/voices/${ voice } .bin?download=true` ;
8282
83- console . log ( ' loading voice:' , voice_url ) ;
83+ console . log ( " loading voice:" , voice_url ) ;
8484
85- const voiceBuffer = await fetch ( voice_url ) . then ( async ( res ) => await res . arrayBuffer ( ) ) ;
85+ const voiceBuffer = await fetch ( voice_url ) . then ( async ( res ) =>
86+ await res . arrayBuffer ( )
87+ ) ;
8688
87- const offset = num_tokens * STYLE_DIM ;
88- const voiceData = new Float32Array ( voiceBuffer ) . slice (
89- offset ,
90- offset + STYLE_DIM ,
91- ) ;
89+ const offset = num_tokens * STYLE_DIM ;
90+ const voiceData = new Float32Array ( voiceBuffer ) . slice (
91+ offset ,
92+ offset + STYLE_DIM ,
93+ ) ;
9294
93- return new Tensor ( ' float32' , voiceData , [ 1 , STYLE_DIM ] ) ;
95+ return new Tensor ( " float32" , voiceData , [ 1 , STYLE_DIM ] ) ;
9496}
9597
9698async function loadTokenizer ( ) {
97- // BUG: invalid 'h' not JSON. That's why we need to manually fetch the assets
98- // const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
99+ // BUG: invalid 'h' not JSON. That's why we need to manually fetch the assets
100+ // const tokenizer = await AutoTokenizer.from_pretrained(MODEL_ID);
99101
100- const tokenizerData = await fetch (
101- ' https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer.json?download=true' ,
102- ) . then ( async ( res ) => await res . json ( ) ) ;
102+ const tokenizerData = await fetch (
103+ " https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer.json?download=true" ,
104+ ) . then ( async ( res ) => await res . json ( ) ) ;
103105
104- const tokenizerConfig = await fetch (
105- ' https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer_config.json?download=true' ,
106- ) . then ( async ( res ) => await res . json ( ) ) ;
106+ const tokenizerConfig = await fetch (
107+ " https://huggingface.co/onnx-community/Kokoro-82M-ONNX/resolve/main/tokenizer_config.json?download=true" ,
108+ ) . then ( async ( res ) => await res . json ( ) ) ;
107109
108- return new PreTrainedTokenizer ( tokenizerData , tokenizerConfig ) ;
110+ return new PreTrainedTokenizer ( tokenizerData , tokenizerConfig ) ;
109111}
0 commit comments