adding Speaker Change tag in txt and vtt outputs, like json, when tiny diarization model is used

2023-07-07 09:00:09 -04:00 · 2023-07-07 09:00:09 -04:00 · d029175359
parent 4774d2feb0
commit d029175359
1 changed files with 16 additions and 0 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -332,6 +332,7 @@ bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_
    fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);

    const int n_segments = whisper_full_n_segments(ctx);
+    bool speaker_turned = false;
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
        std::string speaker = "";
@ -343,6 +344,13 @@ bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_
            speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
        }

+        if (params.tinydiarize) {
+            if (speaker_turned) {
+                speaker.insert(0, "Speaker Change: ");
+            }
+            speaker_turned = whisper_full_get_segment_speaker_turn_next(ctx, i);
+        }
+
        fout << speaker << text << "\n";
    }

@ -361,6 +369,7 @@ bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_
    fout << "WEBVTT\n\n";

    const int n_segments = whisper_full_n_segments(ctx);
+    bool speaker_turned = false;
    for (int i = 0; i < n_segments; ++i) {
        const char * text = whisper_full_get_segment_text(ctx, i);
        const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
@ -374,6 +383,13 @@ bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_
            speaker.append(">");
        }

+        if (params.tinydiarize) {
+            if (speaker_turned) {
+                speaker.insert(0, "Speaker Change: ");
+            }
+            speaker_turned = whisper_full_get_segment_speaker_turn_next(ctx, i);
+        }
+
        fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
        fout << speaker << text << "\n\n";
    }