From ff36415a86640200328ac35defe03c0f7d874051 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 24 Nov 2022 20:15:07 +0200 Subject: [PATCH] talk.wasm : update video link + some minor fixes --- examples/talk.wasm/emscripten.cpp | 22 ++++++++++++---------- examples/talk.wasm/gpt-2.cpp | 6 +++--- examples/talk.wasm/index-tmpl.html | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/examples/talk.wasm/emscripten.cpp b/examples/talk.wasm/emscripten.cpp index 6bd3564..f0add29 100644 --- a/examples/talk.wasm/emscripten.cpp +++ b/examples/talk.wasm/emscripten.cpp @@ -135,11 +135,9 @@ void talk_main(size_t index) { talk_set_status("processing ..."); - g_force_speak = false; - t_last = t_now; - { + if (!g_force_speak) { const auto t_start = std::chrono::high_resolution_clock::now(); int ret = whisper_full(ctx, wparams, pcmf32.data(), pcmf32.size()); @@ -156,18 +154,22 @@ void talk_main(size_t index) { { std::string text_heard; - const int n_segments = whisper_full_n_segments(ctx); - for (int i = n_segments - 1; i < n_segments; ++i) { - const char * text = whisper_full_get_segment_text(ctx, i); + if (!g_force_speak) { + const int n_segments = whisper_full_n_segments(ctx); + for (int i = n_segments - 1; i < n_segments; ++i) { + const char * text = whisper_full_get_segment_text(ctx, i); - const int64_t t0 = whisper_full_get_segment_t0(ctx, i); - const int64_t t1 = whisper_full_get_segment_t1(ctx, i); + const int64_t t0 = whisper_full_get_segment_t0(ctx, i); + const int64_t t1 = whisper_full_get_segment_t1(ctx, i); - printf ("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text); + printf ("[%s --> %s] %s\n", to_timestamp(t0).c_str(), to_timestamp(t1).c_str(), text); - text_heard += text; + text_heard += text; + } } + g_force_speak = false; + // remove text between brackets using regex { std::regex re("\\[.*?\\]"); diff --git a/examples/talk.wasm/gpt-2.cpp b/examples/talk.wasm/gpt-2.cpp index c6526e4..0bffe72 100644 --- a/examples/talk.wasm/gpt-2.cpp +++ b/examples/talk.wasm/gpt-2.cpp @@ -812,9 +812,9 @@ I'm fine, thanks. How are you? Thanks, I'm fine too. What are you doing? I'm just sitting here. It's a lovely day, isn't it? -Yes, it is. -Did you know that I'm a robot? -I wasn't aware of that. +Yes, it is. I love the weather this time of year. +I wish it would rain a little bit. +Me too. )"; std::mt19937 rng; diff --git a/examples/talk.wasm/index-tmpl.html b/examples/talk.wasm/index-tmpl.html index ec1e3b3..be95b1d 100644 --- a/examples/talk.wasm/index-tmpl.html +++ b/examples/talk.wasm/index-tmpl.html @@ -143,7 +143,7 @@

- Here is a short video of the demo in action: https://youtu.be/2om-7tFMaNs + Here is a short video of the demo in action: https://youtu.be/LeWKl8t1-Hc

@@ -507,7 +507,7 @@ context = new AudioContext({ sampleRate: 16000, channelCount: 1, - echoCancellation: true, + echoCancellation: false, autoGainControl: true, noiseSuppression: true, });