diff --git a/CMakeLists.txt b/CMakeLists.txt index 57ae4c2df..ce237cf45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -177,27 +177,29 @@ if (LLAMA_METAL) if (LLAMA_METAL_SHADER_DEBUG) # custom command to do the following: # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air - # xcrun -sdk macosx metallib ggml-metal.air -o ggml.metallib + # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib # # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works # disabling fast math is needed in order to pass tests/test-backend-ops # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 + # note: unfortunately, we have to call it default.metallib instead of ggml.metallib + # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 set(XC_FLAGS -fno-fast-math -fno-inline -g) if (LLAMA_QKK_64) set(XC_FLAGS ${XC_FLAGS} -DQK_K=64) endif() add_custom_command( - OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib + OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air - COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib + COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib DEPENDS ggml-metal.metal COMMENT "Compiling Metal kernels" ) add_custom_target( ggml-metal ALL - DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml.metallib + DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib ) endif() diff --git a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj index a70750a22..14b93f26c 100644 --- a/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj +++ b/examples/llama.swiftui/llama.swiftui.xcodeproj/project.pbxproj @@ -23,26 +23,10 @@ 8A3F84242AC4C891005E2EE8 /* models in Resources */ = {isa = PBXBuildFile; fileRef = 8A3F84232AC4C891005E2EE8 /* models */; }; 8A907F332AC7138A006146EA /* LibLlama.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A907F322AC7134E006146EA /* LibLlama.swift */; }; 8A9F7C4D2AC332EE008AE1EA /* LlamaState.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8A9F7C4C2AC332EE008AE1EA /* LlamaState.swift */; }; - F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; }; F1FE20DC2B465C4500B45541 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 549479C82AC9E10B00E0F78B /* ggml-metal.metal */; }; + F1FE20E22B465ECA00B45541 /* LoadCustomButton.swift in Sources */ = {isa = PBXBuildFile; fileRef = F1FE20E12B465EC900B45541 /* LoadCustomButton.swift */; }; /* End PBXBuildFile section */ -/* Begin PBXBuildRule section */ - F1FE20DB2B465C2100B45541 /* PBXBuildRule */ = { - isa = PBXBuildRule; - compilerSpec = com.apple.compilers.proxy.script; - fileType = sourcecode.metal; - inputFiles = ( - ); - isEditable = 1; - outputFiles = ( - "${DERIVED_FILES_DIR}/ggml-metal.air", - "${DERIVED_FILES_DIR}/ggml.metallib", - ); - script = "# metal\nxcrun metal -c \"${INPUT_FILE_PATH}\" -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\nxcrun metallib -o \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE%-metal}.metallib\" \"${DERIVED_FILES_DIR}/${INPUT_FILE_BASE}.air\"\n"; - }; -/* End PBXBuildRule section */ - /* Begin PBXFileReference section */ 542376062B0D9BEA008E6A1C /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../ggml-quants.h"; sourceTree = ""; }; 542376072B0D9BFB008E6A1C /* ggml-quants.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-quants.c"; path = "../../ggml-quants.c"; sourceTree = ""; }; @@ -209,7 +193,6 @@ 8A1C83712AC328BD0096AF73 /* Resources */, ); buildRules = ( - F1FE20DB2B465C2100B45541 /* PBXBuildRule */, ); dependencies = ( ); diff --git a/ggml-metal.m b/ggml-metal.m index 55cc1a872..fbbdcd8c4 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -258,14 +258,14 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { bundle = [NSBundle bundleForClass:[GGMLMetalClass class]]; #endif NSError * error = nil; - NSString * libPath = [bundle pathForResource:@"ggml" ofType:@"metallib"]; + NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"]; if (libPath != nil) { // pre-compiled library found NSURL * libURL = [NSURL fileURLWithPath:libPath]; GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]); ctx->library = [ctx->device newLibraryWithURL:libURL error:&error]; } else { - GGML_METAL_LOG_INFO("%s: ggml.metallib not found, loading from source\n", __func__); + GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__); NSString * sourcePath; NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"]; @@ -295,7 +295,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) { #endif // try to disable fast-math // NOTE: this seems to have no effect whatsoever - // instead, in order to disable fast-math, we have to build ggml.metallib from the command line + // instead, in order to disable fast-math, we have to build default.metallib from the command line // using xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air // and go through the "pre-compiled library found" path above //[options setFastMathEnabled:false]; diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 354246a26..fe7f3202f 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -3fd01e00e40583ccd4b393a7c6502d6a4455a1d5 +f96711108d55bdbbd277e6be07204dce6a94fb93