From 24a447e20af425fa44cf10feaa632b6bb596c80f Mon Sep 17 00:00:00 2001 From: automaticcat Date: Sat, 30 Dec 2023 15:07:48 +0700 Subject: [PATCH] ggml : add ggml_cpu_has_avx_vnni() (#4589) * feat: add avx_vnni based on intel documents * ggml: add avx vnni based on intel document * llama: add avx vnni information display * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * docs: add more details about using oneMKL and oneAPI for intel processors * Update ggml.c Fix indentation upgate Co-authored-by: Georgi Gerganov --------- Co-authored-by: Georgi Gerganov --- README.md | 30 ++++++++++++++++++++++-------- common/common.cpp | 1 + ggml.c | 8 ++++++++ ggml.h | 1 + llama.cpp | 1 + 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 48dcd6464..ca6d14e17 100644 --- a/README.md +++ b/README.md @@ -385,16 +385,30 @@ Building the program with BLAS support may lead to some performance improvements Check [BLIS.md](docs/BLIS.md) for more information. -- #### Intel MKL +- #### Intel oneMKL + - Using manual oneAPI installation: + By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps: + ```bash + mkdir build + cd build + source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-runtime docker image, only required for manual installation + cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON + cmake --build . --config Release + ``` - By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by: + - Using oneAPI docker image: + If you do not want to source the environment vars and install oneAPI manually, you can also build the code using intel docker container: [oneAPI-runtime](https://hub.docker.com/r/intel/oneapi-runtime) - ```bash - mkdir build - cd build - cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx - cmake --build . --config Release - ``` + ```bash + mkdir build + cd build + cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON + cmake --build . --config Release + ``` + + Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. + + Check [Optimizing and Running LLaMA2 on IntelĀ® CPU](https://www.intel.com/content/www/us/en/content-details/791610/optimizing-and-running-llama2-on-intel-cpu.html) for more information. - #### cuBLAS diff --git a/common/common.cpp b/common/common.cpp index b3425ab09..eacaee18e 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1394,6 +1394,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "build_number: %d\n", LLAMA_BUILD_NUMBER); fprintf(stream, "cpu_has_arm_fma: %s\n", ggml_cpu_has_arm_fma() ? "true" : "false"); fprintf(stream, "cpu_has_avx: %s\n", ggml_cpu_has_avx() ? "true" : "false"); + fprintf(stream, "cpu_has_avx_vnni: %s\n", ggml_cpu_has_avx_vnni() ? "true" : "false"); fprintf(stream, "cpu_has_avx2: %s\n", ggml_cpu_has_avx2() ? "true" : "false"); fprintf(stream, "cpu_has_avx512: %s\n", ggml_cpu_has_avx512() ? "true" : "false"); fprintf(stream, "cpu_has_avx512_vbmi: %s\n", ggml_cpu_has_avx512_vbmi() ? "true" : "false"); diff --git a/ggml.c b/ggml.c index a9e1ea9b4..bcec200f6 100644 --- a/ggml.c +++ b/ggml.c @@ -19638,6 +19638,14 @@ int ggml_cpu_has_avx(void) { #endif } +int ggml_cpu_has_avx_vnni(void) { +#if defined(__AVXVNNI__) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_avx2(void) { #if defined(__AVX2__) return 1; diff --git a/ggml.h b/ggml.h index 67d6bc4f1..64f4e45e8 100644 --- a/ggml.h +++ b/ggml.h @@ -2198,6 +2198,7 @@ extern "C" { // GGML_API int ggml_cpu_has_avx (void); + GGML_API int ggml_cpu_has_avx_vnni (void); GGML_API int ggml_cpu_has_avx2 (void); GGML_API int ggml_cpu_has_avx512 (void); GGML_API int ggml_cpu_has_avx512_vbmi(void); diff --git a/llama.cpp b/llama.cpp index 68c7cced6..a833d4c15 100644 --- a/llama.cpp +++ b/llama.cpp @@ -10780,6 +10780,7 @@ const char * llama_print_system_info(void) { s = ""; s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; + s += "AVX_VNNI = " + std::to_string(ggml_cpu_has_avx_vnni()) + " | "; s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | "; s += "AVX512_VBMI = " + std::to_string(ggml_cpu_has_avx512_vbmi()) + " | ";