llama : fix kv_cache n init (close #1903)

This commit is contained in:
Georgi Gerganov 2023-06-17 19:30:22 +03:00
parent 86c7571864
commit 051e1b0e6a
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
3 changed files with 4 additions and 0 deletions

1
.gitignore vendored
View file

@ -34,6 +34,7 @@ models/*
/perplexity
/embedding
/train-text-from-scratch
/simple
/benchmark-matmult
/vdot
/server

View file

@ -38,6 +38,7 @@ else()
add_subdirectory(benchmark)
add_subdirectory(baby-llama)
add_subdirectory(train-text-from-scratch)
add_subdirectory(simple)
if (LLAMA_METAL)
add_subdirectory(metal)
endif()

View file

@ -886,6 +886,7 @@ static bool kv_cache_init(
const int64_t n_elements = n_embd*n_mem;
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
cache.n = 0;
struct ggml_init_params params;
params.mem_size = cache.buf.size;
@ -904,6 +905,7 @@ static bool kv_cache_init(
ggml_set_name(cache.k, "cache_k");
ggml_set_name(cache.v, "cache_v");
(void) n_gpu_layers;
#ifdef GGML_USE_CUBLAS
if (n_gpu_layers > n_layer + 1) {
ggml_cuda_assign_buffers_no_scratch(cache.v);