From 48b7ff193e64c97ab174280ba0eb8d14b47c49ba Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 22 Dec 2023 12:12:53 +0100 Subject: [PATCH] llama : fix platforms without mmap (#4578) * llama : fix platforms without mmap * win32 : limit prefetch size to the file size * fix win32 error clobber, unnecessary std::string in std::runtime_error --- ggml-cuda.cu | 3 ++- ggml.c | 6 ++++-- llama.cpp | 36 ++++++++++++++++++------------------ 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index ac91ee12e..37d7f2792 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale( GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); - const float scale = ((float *) dst->op_params)[0]; + float scale; + memcpy(&scale, dst->op_params, sizeof(float)); scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream); CUDA_CHECK(cudaGetLastError()); diff --git a/ggml.c b/ggml.c index 15e1984d1..3656422d7 100644 --- a/ggml.c +++ b/ggml.c @@ -10335,7 +10335,8 @@ static void ggml_compute_forward_scale_f32( } // scale factor - const float v = *(float *) dst->op_params; + float v; + memcpy(&v, dst->op_params, sizeof(float)); const int ith = params->ith; const int nth = params->nth; @@ -15152,7 +15153,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor { // necessary for llama if (src0->grad) { - const float s = ((float *) tensor->op_params)[0]; + float s; + memcpy(&s, tensor->op_params, sizeof(float)); src0->grad = ggml_add_or_set(ctx, diff --git a/llama.cpp b/llama.cpp index cb0546c95..4e4495739 100644 --- a/llama.cpp +++ b/llama.cpp @@ -778,7 +778,7 @@ struct llama_file { throw std::runtime_error(format("read error: %s", strerror(errno))); } if (ret != 1) { - throw std::runtime_error(std::string("unexpectedly reached end of file")); + throw std::runtime_error("unexpectedly reached end of file"); } } @@ -931,29 +931,29 @@ struct llama_mmap { #elif defined(_WIN32) static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { - (void) numa; + llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) { + GGML_UNUSED(numa); size = file->size; HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); - DWORD error = GetLastError(); if (hMapping == NULL) { + DWORD error = GetLastError(); throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str())); } addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); - error = GetLastError(); + DWORD error = GetLastError(); CloseHandle(hMapping); if (addr == NULL) { throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str())); } - if (prefetch) { + if (prefetch > 0) { // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG); HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll"); @@ -965,9 +965,9 @@ struct llama_mmap { // advise the kernel to preload the mapped memory WIN32_MEMORY_RANGE_ENTRY range; range.VirtualAddress = addr; - range.NumberOfBytes = (SIZE_T)size; + range.NumberOfBytes = (SIZE_T) std::min(size, prefetch); if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) { - fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n", + LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n", llama_format_win_err(GetLastError()).c_str()); } } @@ -982,26 +982,26 @@ struct llama_mmap { ~llama_mmap() { if (!UnmapViewOfFile(addr)) { - fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n", + LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n", llama_format_win_err(GetLastError()).c_str()); } } #else static constexpr bool SUPPORTED = false; - llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { - (void) file; - (void) prefetch; - (void) numa; + llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) { + GGML_UNUSED(file); + GGML_UNUSED(prefetch); + GGML_UNUSED(numa); - throw std::runtime_error(std::string("mmap not supported")); + throw std::runtime_error("mmap not supported"); } - void unmap(size_t offset, size_t len) { - (void) offset; - (void) len; + void unmap_fragment(size_t first, size_t last) { + GGML_UNUSED(first); + GGML_UNUSED(last); - throw std::runtime_error(std::string("mmap not supported")); + throw std::runtime_error("mmap not supported"); } #endif };