llama : fix platforms without mmap (#4578)

* llama : fix platforms without mmap

* win32 : limit prefetch size to the file size

* fix win32 error clobber, unnecessary std::string in std::runtime_error
This commit is contained in:
slaren 2023-12-22 12:12:53 +01:00 committed by GitHub
parent 48b24b170e
commit 48b7ff193e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 24 additions and 21 deletions

View file

@ -7702,7 +7702,8 @@ inline void ggml_cuda_op_scale(
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32);
const float scale = ((float *) dst->op_params)[0]; float scale;
memcpy(&scale, dst->op_params, sizeof(float));
scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream); scale_f32_cuda(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream);
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());

6
ggml.c
View file

@ -10335,7 +10335,8 @@ static void ggml_compute_forward_scale_f32(
} }
// scale factor // scale factor
const float v = *(float *) dst->op_params; float v;
memcpy(&v, dst->op_params, sizeof(float));
const int ith = params->ith; const int ith = params->ith;
const int nth = params->nth; const int nth = params->nth;
@ -15152,7 +15153,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
{ {
// necessary for llama // necessary for llama
if (src0->grad) { if (src0->grad) {
const float s = ((float *) tensor->op_params)[0]; float s;
memcpy(&s, tensor->op_params, sizeof(float));
src0->grad = src0->grad =
ggml_add_or_set(ctx, ggml_add_or_set(ctx,

View file

@ -778,7 +778,7 @@ struct llama_file {
throw std::runtime_error(format("read error: %s", strerror(errno))); throw std::runtime_error(format("read error: %s", strerror(errno)));
} }
if (ret != 1) { if (ret != 1) {
throw std::runtime_error(std::string("unexpectedly reached end of file")); throw std::runtime_error("unexpectedly reached end of file");
} }
} }
@ -931,29 +931,29 @@ struct llama_mmap {
#elif defined(_WIN32) #elif defined(_WIN32)
static constexpr bool SUPPORTED = true; static constexpr bool SUPPORTED = true;
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false) {
(void) numa; GGML_UNUSED(numa);
size = file->size; size = file->size;
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL); HANDLE hMapping = CreateFileMappingA(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
DWORD error = GetLastError();
if (hMapping == NULL) { if (hMapping == NULL) {
DWORD error = GetLastError();
throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str())); throw std::runtime_error(format("CreateFileMappingA failed: %s", llama_format_win_err(error).c_str()));
} }
addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0); addr = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, 0);
error = GetLastError(); DWORD error = GetLastError();
CloseHandle(hMapping); CloseHandle(hMapping);
if (addr == NULL) { if (addr == NULL) {
throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str())); throw std::runtime_error(format("MapViewOfFile failed: %s", llama_format_win_err(error).c_str()));
} }
if (prefetch) { if (prefetch > 0) {
// PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it // PrefetchVirtualMemory is only present on Windows 8 and above, so we dynamically load it
BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG); BOOL (WINAPI *pPrefetchVirtualMemory) (HANDLE, ULONG_PTR, PWIN32_MEMORY_RANGE_ENTRY, ULONG);
HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll"); HMODULE hKernel32 = GetModuleHandleW(L"kernel32.dll");
@ -965,9 +965,9 @@ struct llama_mmap {
// advise the kernel to preload the mapped memory // advise the kernel to preload the mapped memory
WIN32_MEMORY_RANGE_ENTRY range; WIN32_MEMORY_RANGE_ENTRY range;
range.VirtualAddress = addr; range.VirtualAddress = addr;
range.NumberOfBytes = (SIZE_T)size; range.NumberOfBytes = (SIZE_T) std::min(size, prefetch);
if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) { if (!pPrefetchVirtualMemory(GetCurrentProcess(), 1, &range, 0)) {
fprintf(stderr, "warning: PrefetchVirtualMemory failed: %s\n", LLAMA_LOG_WARN("warning: PrefetchVirtualMemory failed: %s\n",
llama_format_win_err(GetLastError()).c_str()); llama_format_win_err(GetLastError()).c_str());
} }
} }
@ -982,26 +982,26 @@ struct llama_mmap {
~llama_mmap() { ~llama_mmap() {
if (!UnmapViewOfFile(addr)) { if (!UnmapViewOfFile(addr)) {
fprintf(stderr, "warning: UnmapViewOfFile failed: %s\n", LLAMA_LOG_WARN("warning: UnmapViewOfFile failed: %s\n",
llama_format_win_err(GetLastError()).c_str()); llama_format_win_err(GetLastError()).c_str());
} }
} }
#else #else
static constexpr bool SUPPORTED = false; static constexpr bool SUPPORTED = false;
llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { llama_mmap(struct llama_file * file, size_t prefetch = -1, bool numa = false) {
(void) file; GGML_UNUSED(file);
(void) prefetch; GGML_UNUSED(prefetch);
(void) numa; GGML_UNUSED(numa);
throw std::runtime_error(std::string("mmap not supported")); throw std::runtime_error("mmap not supported");
} }
void unmap(size_t offset, size_t len) { void unmap_fragment(size_t first, size_t last) {
(void) offset; GGML_UNUSED(first);
(void) len; GGML_UNUSED(last);
throw std::runtime_error(std::string("mmap not supported")); throw std::runtime_error("mmap not supported");
} }
#endif #endif
}; };