From 5dd1f45e1d5c017af22c209ff4d6f35f2fa8d5e3 Mon Sep 17 00:00:00 2001
From: marcus <marcus.s.dunn@gmail.com>
Date: Mon, 4 Dec 2023 13:30:27 -0800
Subject: [PATCH] used precomputed token text for grammar sample

---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama.cpp b/llama.cpp
index fd905ade7..c16819a48 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7361,7 +7361,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
 
     for (size_t i = 0; i < candidates->size; ++i) {
         const llama_token id    = candidates->data[i].id;
-        const std::string piece = llama_token_to_piece(ctx, id);
+        const std::string piece = ctx->model.vocab.id_to_token[id].text;
         if (id == eos) {
             if (!allow_eos) {
                 candidates->data[i].logit = -INFINITY;