From 571083f508266c4eb5cb5457d836df5dd3c173ce Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 2 Sep 2023 08:31:46 +0800 Subject: [PATCH] server : avoid aniprompt in probabilities of final response (#2849) --- examples/server/server.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 09eac2ec2..94def943b 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1379,7 +1379,13 @@ int main(int argc, char **argv) } } - const json data = format_final_response(llama, llama.generated_text, llama.generated_token_probs); + auto probs = llama.generated_token_probs; + if (llama.params.n_probs > 0 && llama.stopped_word) { + const std::vector stop_word_toks = llama_tokenize(llama.ctx, llama.stopping_word, false); + probs = std::vector(llama.generated_token_probs.begin(), llama.generated_token_probs.end() - stop_word_toks.size()); + } + + const json data = format_final_response(llama, llama.generated_text, probs); llama_print_timings(llama.ctx); @@ -1456,7 +1462,11 @@ int main(int argc, char **argv) if (!llama.has_next_token) { // Generation is done, send extra information. - const json data = format_final_response(llama, "", llama.generated_token_probs); + const json data = format_final_response( + llama, + "", + std::vector(llama.generated_token_probs.begin(), llama.generated_token_probs.begin() + sent_token_probs_index) + ); const std::string str = "data: " +