From a872a2b28eaefc8d464eaa535c94deeb501666f9 Mon Sep 17 00:00:00 2001 From: Shouzheng Liu Date: Thu, 17 Aug 2023 03:35:53 -0400 Subject: [PATCH] ggml-alloc : fix discrepency between measure&eval (#2639) The GGML memory allocator consistently places a tensor within the optimal-fit memory block, which is the smallest block capable of accommodating the tensor's size. During the measurement phase, the final block is generously sized, ensuring it never qualifies as the optimal-fit block as long as there exists another block capable of accommodating the tensor. Nevertheless, in the evaluation phase, the last block is constrained in size and could potentially qualify as the optimal-fit block. Consequently, there exists the possibility of a tensor being allocated to a different region during evaluation, leading to more memory fragmentation in our scratch buffer. This recent commit guarantees uniform behavior of the allocator across both the measurement and evaluation phases, eliminating discrepancies between the two. --- ggml-alloc.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/ggml-alloc.c b/ggml-alloc.c index 8de28cf9d..3ee98d03d 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -113,10 +113,10 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) size_t max_avail = 0; - // find the best fitting free block + // find the best fitting free block besides the last block int best_fit_block = -1; size_t best_fit_size = SIZE_MAX; - for (int i = 0; i < alloc->n_free_blocks; i++) { + for (int i = 0; i < alloc->n_free_blocks - 1; i++) { struct free_block * block = &alloc->free_blocks[i]; max_avail = MAX(max_avail, block->size); if (block->size >= size && block->size <= best_fit_size) { @@ -128,10 +128,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) AT_PRINTF("block %d\n", best_fit_block); if (best_fit_block == -1) { - fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n", - __func__, size, max_avail); - GGML_ASSERT(!"not enough space in the buffer"); + // the last block is our last resort + struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1]; + if (block->size >= size) { + best_fit_block = alloc->n_free_blocks - 1; + max_avail = MAX(max_avail, block->size); + } else { + fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n", + __func__, size, max_avail); + GGML_ASSERT(!"not enough space in the buffer"); return; + } } struct free_block * block = &alloc->free_blocks[best_fit_block]; void * addr = block->addr;