From 1638757767072a4957f52b9e3594f0b67610631b Mon Sep 17 00:00:00 2001 From: Martin Krasser Date: Thu, 10 Aug 2023 12:16:38 +0200 Subject: [PATCH] Fix grammar-based sampling issue in server (#2566) --- examples/server/server.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 10ae264f5..637f6d6c2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -196,6 +196,7 @@ struct llama_server_context llama_context *ctx = nullptr; gpt_params params; + grammar_parser::parse_state parsed_grammar; llama_grammar *grammar = nullptr; bool truncated = false; @@ -241,10 +242,13 @@ struct llama_server_context stopped_limit = false; stopping_word = ""; multibyte_pending = 0; - grammar = nullptr; - n_remain = 0; n_past = 0; + + if (grammar != nullptr) { + llama_grammar_free(grammar); + grammar = nullptr; + } } bool loadModel(const gpt_params ¶ms_) @@ -265,8 +269,6 @@ struct llama_server_context bool loadGrammar() { if (!params.grammar.empty()) { - grammar_parser::parse_state parsed_grammar; - parsed_grammar = grammar_parser::parse(params.grammar.c_str()); // will be empty (default) if there are parse errors if (parsed_grammar.rules.empty()) {