tests : fix quantize perf (#1990)

* fix test quantize perf

* avoid the global state
This commit is contained in:
katsu560 2023-06-27 01:47:02 +09:00 committed by GitHub
parent 5743ca8092
commit a84ab1da8d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -21,6 +21,7 @@
#define QK 32 #define QK 32
#define WARMUP 5 #define WARMUP 5
#define ITERATIONS 10 #define ITERATIONS 10
#define MAX_ITERATIONS 100000000
#define L1_SIZE 32*128 #define L1_SIZE 32*128
#define L2_SIZE 32*2048 #define L2_SIZE 32*2048
@ -36,9 +37,9 @@ struct quantize_perf_params {
bool op_dequantize_row_q = false; bool op_dequantize_row_q = false;
bool op_quantize_row_q_dot = false; bool op_quantize_row_q_dot = false;
bool op_vec_dot_q = false; bool op_vec_dot_q = false;
int64_t iterations = ITERATIONS;
}; };
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#include <x86intrin.h> #include <x86intrin.h>
@ -75,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) {
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset; return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
} }
void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)> function) { void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function<size_t(void)> function) {
int64_t min_time_us = INT64_MAX; int64_t min_time_us = INT64_MAX;
int64_t total_time_us = 0; int64_t total_time_us = 0;
int64_t min_time_cycles = INT64_MAX; int64_t min_time_cycles = INT64_MAX;
@ -86,7 +87,7 @@ void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)>
} }
for (int i = 0; i < ITERATIONS; i++) { for (int i = 0; i < iterations; i++) {
const int64_t start_time = ggml_time_us(); const int64_t start_time = ggml_time_us();
const int64_t start_cycles = cpu_cycles(); const int64_t start_cycles = cpu_cycles();
@ -102,9 +103,38 @@ void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)>
} }
printf(" min cycles/%d vals : %9.2f\n", QK, QK * min_time_cycles / (float) size); printf(" min cycles/%d vals : %9.2f\n", QK, QK * min_time_cycles / (float) size);
printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * ITERATIONS)); printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * iterations));
printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * ITERATIONS, total_time_us)); printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * iterations, total_time_us));
printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * ITERATIONS, total_time_us)); printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * iterations, total_time_us));
}
void usage(char * argv[]) {
printf("Benchmark quantization specific functions on synthetic data\n");
printf("\n");
printf("usage: %s [options]\n", argv[0]);
printf("\n");
printf("options: (default)\n");
printf(" -h, --help show this help message and exit\n");
printf(" --size SIZE set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE);
printf(" -3 use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE);
printf(" -4 use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE);
printf(" --op OP set test opration as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n");
printf(" quantize_row_q_dot, vec_dot_q (all)\n");
printf(" --type TYPE set test type as");
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
quantize_fns_t qfns = ggml_internal_get_quantize_fn(type);
if (ggml_type_name(type) != NULL) {
if (qfns.quantize_row_q && qfns.dequantize_row_q) {
printf(" %s", ggml_type_name(type));
}
}
}
printf(" (all)\n");
printf(" --alignment-offset OFFSET\n");
printf(" set alignment offset as OFFSET (0)\n");
printf(" -i NUM, --iterations NUM\n");
printf(" set test iteration number (%d)\n", ITERATIONS);
} }
int main(int argc, char * argv[]) { int main(int argc, char * argv[]) {
@ -178,6 +208,21 @@ int main(int argc, char * argv[]) {
break; break;
} }
params.alignment_offset = alignment; params.alignment_offset = alignment;
} else if ((arg == "-i") || (arg == "--iterations")) {
if (++i >= argc) {
invalid_param = true;
break;
}
int number = std::stoi(argv[i]);
if (number < 0 || number > MAX_ITERATIONS) {
fprintf(stderr, "error: iterations must be less than %d\n", MAX_ITERATIONS);
invalid_param = true;
break;
}
params.iterations = number;
} else if ((arg == "-h") || (arg == "--help")) {
usage(argv);
return 1;
} else { } else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
return 1; return 1;
@ -213,6 +258,8 @@ int main(int argc, char * argv[]) {
generate_data(0, largest, test_data1); generate_data(0, largest, test_data1);
generate_data(1, largest, test_data2); generate_data(1, largest, test_data2);
int64_t iterations = params.iterations;
// Initialize GGML, ensures float conversion tables are initialized // Initialize GGML, ensures float conversion tables are initialized
struct ggml_init_params ggml_params = { struct ggml_init_params ggml_params = {
@ -225,7 +272,7 @@ int main(int argc, char * argv[]) {
for (int i = 0; i < GGML_TYPE_COUNT; i++) { for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i; ggml_type type = (ggml_type) i;
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i); quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) { if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
continue; continue;
} }
@ -241,7 +288,7 @@ int main(int argc, char * argv[]) {
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
} }
@ -255,7 +302,7 @@ int main(int argc, char * argv[]) {
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
} }
@ -270,7 +317,7 @@ int main(int argc, char * argv[]) {
return test_out[0]; return test_out[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
} }
@ -284,7 +331,7 @@ int main(int argc, char * argv[]) {
return test_q1[0]; return test_q1[0];
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
} }
@ -301,7 +348,7 @@ int main(int argc, char * argv[]) {
return result; return result;
}; };
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type); size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
benchmark_function(size, quantized_size, quantize_fn); benchmark_function(size, quantized_size, iterations, quantize_fn);
} }
printf("\n"); printf("\n");
} }