From 8800226d65d5c98cd34eede6a6c05c78405c52da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 14 Apr 2024 13:12:59 +0200 Subject: [PATCH] Fix --split-max-size (#6655) * Fix --split-max-size Byte size calculation was done on int and overflowed. * add tests.sh * add examples test scripts to ci run Will autodiscover examples/*/tests.sh scripts and run them. * move WORK_PATH to a subdirectory * clean up before and after test * explicitly define which scripts to run * add --split-max-size to readme --- ci/run.sh | 49 ++++++++++++++++ examples/gguf-split/README.md | 1 + examples/gguf-split/gguf-split.cpp | 4 +- examples/gguf-split/tests.sh | 89 ++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 examples/gguf-split/tests.sh diff --git a/ci/run.sh b/ci/run.sh index 19776b5f7..085dfd42f 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -153,6 +153,52 @@ function gg_sum_ctest_release { gg_printf '```\n' } +# test_scripts_debug + +function gg_run_test_scripts_debug { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# test_scripts_release + +function gg_run_test_scripts_release { + cd ${SRC} + + set -e + + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + function gg_get_model { local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf" local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" @@ -642,6 +688,9 @@ test $ret -eq 0 && gg_run ctest_release if [ -z ${GG_BUILD_LOW_PERF} ]; then test $ret -eq 0 && gg_run embd_bge_small + test $ret -eq 0 && gg_run test_scripts_debug + test $ret -eq 0 && gg_run test_scripts_release + if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then if [ -z ${GG_BUILD_CUDA} ]; then test $ret -eq 0 && gg_run open_llama_3b_v2 diff --git a/examples/gguf-split/README.md b/examples/gguf-split/README.md index ddb1f7649..ad1d86651 100644 --- a/examples/gguf-split/README.md +++ b/examples/gguf-split/README.md @@ -5,5 +5,6 @@ CLI to split / merge GGUF files. **Command line options:** - `--split`: split GGUF to multiple GGUF, default operation. +- `--split-max-size`: max size per split in `M` or `G`, f.ex. `500M` or `2G`. - `--split-max-tensors`: maximum tensors in each split: default(128) - `--merge`: merge multiple GGUF to a single GGUF. diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index 24acbf02a..39c75e0a7 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -59,10 +59,10 @@ static size_t split_str_to_n_bytes(std::string str) { int n; if (str.back() == 'M') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024; // megabytes + n_bytes = (size_t)n * 1024 * 1024; // megabytes } else if (str.back() == 'G') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024 * 1024; // gigabytes + n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes } else { throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back())); } diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh new file mode 100644 index 000000000..879522f7e --- /dev/null +++ b/examples/gguf-split/tests.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +set -eu + +if [ $# -lt 1 ] +then + echo "usage: $0 path_to_build_binary [path_to_temp_folder]" + echo "example: $0 ../../build/bin ../../tmp" + exit 1 +fi + +if [ $# -gt 1 ] +then + TMP_DIR=$2 +else + TMP_DIR=/tmp +fi + +set -x + +SPLIT=$1/gguf-split +MAIN=$1/main +WORK_PATH=$TMP_DIR/gguf-split +CUR_DIR=$(pwd) + +mkdir -p "$WORK_PATH" + +# Clean up in case of previously failed test +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf + +# 1. Get a model +( + cd $WORK_PATH + "$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +) +echo PASS + +# 2. Split with max tensors strategy +$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +echo PASS +echo + +# 2b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 3. Merge +$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf +echo PASS +echo + +# 3b. Test the merged model is loading properly +$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 4. Split with no tensor in metadata +#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors +#echo PASS +#echo + +# 4b. Test the sharded model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 5. Merge +#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf +#echo PASS +#echo + +# 5b. Test the merged model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 6. Split with size strategy +$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G +echo PASS +echo + +# 6b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# Clean up +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf