diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 1393f0b08..cb8747c2b 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -798,7 +798,8 @@ int main(int argc, char ** argv) { } // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. - if (params.interactive && n_remain <= 0 && params.n_predict != -1) { + // We skip this logic when n_predict == -1 (infinite) or -2 (stop at context size). + if (params.interactive && n_remain <= 0 && params.n_predict >= 0) { n_remain = params.n_predict; is_interacting = true; }