server : fix crash when prompt exceeds context size (#3996)

This commit is contained in:
Alexey Parfenov 2023-11-11 05:48:21 +00:00 committed by GitHub
parent 34b0a08207
commit d96ca7ded7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1557,15 +1557,6 @@ struct llama_server_context
slot.num_prompt_tokens = prompt_tokens.size();
if (!slot.params.cache_prompt)
{
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
}
else
{
if (slot.params.n_keep < 0)
{
slot.params.n_keep = slot.num_prompt_tokens;
@ -1595,6 +1586,15 @@ struct llama_server_context
GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
}
if (!slot.params.cache_prompt)
{
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
}
else
{
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{