adding Speaker Change tag in txt and vtt outputs, like json, when tiny diarization model is used

pull/1086/head
Aniket 2023-07-07 09:00:09 -04:00
parent 4774d2feb0
commit d029175359
1 changed files with 16 additions and 0 deletions

View File

@ -332,6 +332,7 @@ bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);
const int n_segments = whisper_full_n_segments(ctx);
bool speaker_turned = false;
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(ctx, i);
std::string speaker = "";
@ -343,6 +344,13 @@ bool output_txt(struct whisper_context * ctx, const char * fname, const whisper_
speaker = estimate_diarization_speaker(pcmf32s, t0, t1);
}
if (params.tinydiarize) {
if (speaker_turned) {
speaker.insert(0, "Speaker Change: ");
}
speaker_turned = whisper_full_get_segment_speaker_turn_next(ctx, i);
}
fout << speaker << text << "\n";
}
@ -361,6 +369,7 @@ bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_
fout << "WEBVTT\n\n";
const int n_segments = whisper_full_n_segments(ctx);
bool speaker_turned = false;
for (int i = 0; i < n_segments; ++i) {
const char * text = whisper_full_get_segment_text(ctx, i);
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
@ -374,6 +383,13 @@ bool output_vtt(struct whisper_context * ctx, const char * fname, const whisper_
speaker.append(">");
}
if (params.tinydiarize) {
if (speaker_turned) {
speaker.insert(0, "Speaker Change: ");
}
speaker_turned = whisper_full_get_segment_speaker_turn_next(ctx, i);
}
fout << to_timestamp(t0) << " --> " << to_timestamp(t1) << "\n";
fout << speaker << text << "\n\n";
}