scripts : add script for measuring the time per token

This commit is contained in:
Georgi Gerganov 2023-05-08 22:06:54 +03:00
parent 8fbf7777ce
commit d155f0f865
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 108 additions and 18 deletions

1
.gitignore vendored
View file

@ -44,5 +44,6 @@ zig-cache/
ppl-*.txt ppl-*.txt
qnt-*.txt qnt-*.txt
perf-*.txt
examples/jeopardy/results.txt examples/jeopardy/results.txt

View file

@ -330,18 +330,18 @@ As the models are currently fully loaded into memory, you will need adequate dis
Several quantization methods are supported. They differ in the resulting model disk size and inference speed. Several quantization methods are supported. They differ in the resulting model disk size and inference speed.
| Model | Measure | F16 | Q4_0 | Q4_1 | Q4_2 | Q5_0 | Q5_1 | Q8_0 | | Model | Measure | F16 | Q4_0 | Q4_1 | Q5_0 | Q5_1 | Q8_0 |
|------:|--------------|-------:|-------:|-------:|-------:|-------:|-------:|-------:| |------:|--------------|-------:|-------:|-------:|-------:|-------:|-------:|
| 7B | perplexity | 5.9066 | 6.1620 | 6.0910 | 6.1466 | 5.9862 | 5.9481 | 5.9069 | | 7B | perplexity | 5.9066 | 6.1620 | 6.0910 | 5.9862 | 5.9481 | 5.9069 |
| 7B | file size | 13.0G | 4.0G | 4.8G | 4.0G | 4.4G | 4.8G | 7.1G | | 7B | file size | 13.0G | 4.0G | 4.8G | 4.4G | 4.8G | 7.1G |
| 7B | ms/tok @ 4th | 128 | 56 | 61 | 84 | 91 | 95 | 75 | | 7B | ms/tok @ 4th | 128 | 56 | 61 | 91 | 95 | 75 |
| 7B | ms/tok @ 8th | 128 | 47 | 55 | 48 | 53 | 59 | 75 | | 7B | ms/tok @ 8th | 128 | 47 | 55 | 53 | 59 | 75 |
| 7B | bits/weight | 16.0 | 5.0 | 6.0 | 5.0 | 5.5 | 6.0 | 9.0 | | 7B | bits/weight | 16.0 | 5.0 | 6.0 | 5.5 | 6.0 | 9.0 |
| 13B | perplexity | 5.2543 | 5.3863 | 5.3607 | 5.3513 | 5.2856 | 5.2706 | 5.2548 | | 13B | perplexity | 5.2543 | 5.3863 | 5.3607 | 5.2856 | 5.2706 | 5.2548 |
| 13B | file size | 25.0G | 7.6G | 9.1G | 7.6G | 8.4G | 9.1G | 14G | | 13B | file size | 25.0G | 7.6G | 9.1G | 8.4G | 9.1G | 14G |
| 13B | ms/tok @ 4th | 239 | 104 | 113 | 160 | 176 | 185 | 141 | | 13B | ms/tok @ 4th | 239 | 104 | 113 | 176 | 185 | 141 |
| 13B | ms/tok @ 8th | 240 | 85 | 99 | 97 | 108 | 117 | 147 | | 13B | ms/tok @ 8th | 240 | 85 | 99 | 108 | 117 | 147 |
| 13B | bits/weight | 16.0 | 5.0 | 6.0 | 5.0 | 5.5 | 6.0 | 9.0 | | 13B | bits/weight | 16.0 | 5.0 | 6.0 | 5.5 | 6.0 | 9.0 |
### Perplexity (measuring model quality) ### Perplexity (measuring model quality)

View file

@ -2809,9 +2809,9 @@ void llama_print_timings(struct llama_context * ctx) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0); fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0);
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample); fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval); fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval); fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0); fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0);
} }

93
scripts/perf-run-all.sh Executable file
View file

@ -0,0 +1,93 @@
#!/bin/bash
#
# Measure the performance (time per token) of the various quantization techniques
#
QUANTIZE=0
if [ "$1" != "" ]; then
echo "Quantizing"
QUANTIZE=1
fi
if [ "$QUANTIZE" != "0" ]; then
#
# quantize
#
# 7B
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-7b-q4_0.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-7b-q4_1.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-7b-q5_0.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-7b-q5_1.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-7b-q8_0.txt
# 13B
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-13b-q4_0.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-13b-q4_1.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-13b-q5_0.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-13b-q5_1.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-13b-q8_0.txt
fi
#
# perf
# run each command twice
#
set -x
# 7B - 4 threads
./bin/main -m ../models/7B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-f16.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-q4_0.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-q4_1.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-q5_0.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-q5_1.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-7b-q8_0.txt | grep llama_print_timings
# 7B - 8 threads
./bin/main -m ../models/7B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-f16.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-q4_0.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-q4_1.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-q5_0.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-q5_1.txt | grep llama_print_timings
./bin/main -m ../models/7B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/7B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-7b-q8_0.txt | grep llama_print_timings
# 13B - 4 threads
./bin/main -m ../models/13B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-f16.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-q4_0.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-q4_1.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-q5_0.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-q5_1.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 4 2>&1 | tee ../perf-13b-q8_0.txt | grep llama_print_timings
# 13B - 8 threads
./bin/main -m ../models/13B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-f16.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-f16.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q4_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-q4_0.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q4_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-q4_1.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q5_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-q5_0.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q5_1.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-q5_1.txt | grep llama_print_timings
./bin/main -m ../models/13B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | grep "I believe"
time ./bin/main -m ../models/13B/ggml-model-q8_0.bin -p "I believe the meaning of life is" --no-mmap -c 2048 --ignore-eos -s 1 -n 64 -t 8 2>&1 | tee ../perf-13b-q8_0.txt | grep llama_print_timings

View file

@ -7,7 +7,6 @@
# 7B # 7B
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-7b-q4_0.txt time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-7b-q4_0.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-7b-q4_1.txt time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-7b-q4_1.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q4_2.bin q4_2 2>&1 | tee ../qnt-7b-q4_2.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-7b-q5_0.txt time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-7b-q5_0.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-7b-q5_1.txt time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-7b-q5_1.txt
time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-7b-q8_0.txt time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-7b-q8_0.txt
@ -15,7 +14,6 @@ time ./bin/quantize ../models/7B/ggml-model-f16.bin ../models/7B/ggml-model-q8_0
# 13B # 13B
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-13b-q4_0.txt time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_0.bin q4_0 2>&1 | tee ../qnt-13b-q4_0.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-13b-q4_1.txt time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_1.bin q4_1 2>&1 | tee ../qnt-13b-q4_1.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q4_2.bin q4_2 2>&1 | tee ../qnt-13b-q4_2.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-13b-q5_0.txt time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_0.bin q5_0 2>&1 | tee ../qnt-13b-q5_0.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-13b-q5_1.txt time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q5_1.bin q5_1 2>&1 | tee ../qnt-13b-q5_1.txt
time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-13b-q8_0.txt time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q8_0.bin q8_0 2>&1 | tee ../qnt-13b-q8_0.txt
@ -28,7 +26,6 @@ time ./bin/quantize ../models/13B/ggml-model-f16.bin ../models/13B/ggml-model-q8
time ./bin/perplexity -m ../models/7B/ggml-model-f16.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-f16.txt time ./bin/perplexity -m ../models/7B/ggml-model-f16.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-f16.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q4_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q4_0.txt time ./bin/perplexity -m ../models/7B/ggml-model-q4_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q4_0.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q4_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q4_1.txt time ./bin/perplexity -m ../models/7B/ggml-model-q4_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q4_1.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q4_2.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q4_2.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q5_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q5_0.txt time ./bin/perplexity -m ../models/7B/ggml-model-q5_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q5_0.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q5_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q5_1.txt time ./bin/perplexity -m ../models/7B/ggml-model-q5_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q5_1.txt
time ./bin/perplexity -m ../models/7B/ggml-model-q8_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q8_0.txt time ./bin/perplexity -m ../models/7B/ggml-model-q8_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-7b-q8_0.txt
@ -37,7 +34,6 @@ time ./bin/perplexity -m ../models/7B/ggml-model-q8_0.bin -f ./wiki.test.raw --n
time ./bin/perplexity -m ../models/13B/ggml-model-f16.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-f16.txt time ./bin/perplexity -m ../models/13B/ggml-model-f16.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-f16.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q4_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q4_0.txt time ./bin/perplexity -m ../models/13B/ggml-model-q4_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q4_0.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q4_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q4_1.txt time ./bin/perplexity -m ../models/13B/ggml-model-q4_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q4_1.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q4_2.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q4_2.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q5_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q5_0.txt time ./bin/perplexity -m ../models/13B/ggml-model-q5_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q5_0.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q5_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q5_1.txt time ./bin/perplexity -m ../models/13B/ggml-model-q5_1.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q5_1.txt
time ./bin/perplexity -m ../models/13B/ggml-model-q8_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q8_0.txt time ./bin/perplexity -m ../models/13B/ggml-model-q8_0.bin -f ./wiki.test.raw --no-mmap -t 12 2>&1 | tee ../ppl-13b-q8_0.txt