From e85bb1a8e736228a1f0d965777de5f77f22834b8 Mon Sep 17 00:00:00 2001 From: slaren Date: Fri, 17 Nov 2023 16:17:37 +0100 Subject: [PATCH] llama : add functions to get the model's metadata (#4013) * llama : add functions to get the model's metadata * format -> std::to_string * better documentation --- ggml.c | 25 +++++++++++ ggml.h | 1 + llama.cpp | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++--- llama.h | 17 +++++++ 4 files changed, 167 insertions(+), 7 deletions(-) diff --git a/ggml.c b/ggml.c index c7086ba84..f92292b39 100644 --- a/ggml.c +++ b/ggml.c @@ -18452,24 +18452,29 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) { } const char * gguf_get_key(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); return ctx->kv[key_id].key.data; } enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); return ctx->kv[key_id].type; } enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.type; } const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.data; } const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); struct gguf_kv * kv = &ctx->kv[key_id]; struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i]; @@ -18477,70 +18482,90 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i } int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY); return ctx->kv[key_id].value.arr.n; } uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8); return ctx->kv[key_id].value.uint8; } int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8); return ctx->kv[key_id].value.int8; } uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16); return ctx->kv[key_id].value.uint16; } int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16); return ctx->kv[key_id].value.int16; } uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32); return ctx->kv[key_id].value.uint32; } int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32); return ctx->kv[key_id].value.int32; } float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32); return ctx->kv[key_id].value.float32; } uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64); return ctx->kv[key_id].value.uint64; } int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64); return ctx->kv[key_id].value.int64; } double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64); return ctx->kv[key_id].value.float64; } bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL); return ctx->kv[key_id].value.bool_; } const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING); return ctx->kv[key_id].value.str.data; } +const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) { + GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx)); + GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY); + GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING); + return &ctx->kv[key_id].value; +} + int gguf_get_n_tensors(const struct gguf_context * ctx) { return ctx->header.n_tensors; } diff --git a/ggml.h b/ggml.h index 8e6b64606..f2fce0f22 100644 --- a/ggml.h +++ b/ggml.h @@ -2045,6 +2045,7 @@ extern "C" { GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id); GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id); GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id); + GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id); GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id); GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id); GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i); diff --git a/llama.cpp b/llama.cpp index 8500b20cb..3cc3fc9f0 100644 --- a/llama.cpp +++ b/llama.cpp @@ -604,6 +604,60 @@ static int8_t llama_rope_scaling_type_from_string(const std::string & name) { return LLAMA_ROPE_SCALING_UNSPECIFIED; } +static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) { + switch (type) { + case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]); + case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]); + case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]); + case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]); + case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]); + case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]); + case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]); + case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]); + case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]); + case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]); + case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false"; + default: return format("unknown type %d", type); + } +} + +static std::string gguf_kv_to_str(struct gguf_context * ctx_gguf, int i) { + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + + switch (type) { + case GGUF_TYPE_STRING: + return gguf_get_val_str(ctx_gguf, i); + case GGUF_TYPE_ARRAY: + { + const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i); + int arr_n = gguf_get_arr_n(ctx_gguf, i); + const void * data = gguf_get_arr_data(ctx_gguf, i); + std::stringstream ss; + ss << "["; + for (int j = 0; j < arr_n; j++) { + if (arr_type == GGUF_TYPE_STRING) { + std::string val = gguf_get_arr_str(ctx_gguf, i, j); + // escape quotes + replace_all(val, "\\", "\\\\"); + replace_all(val, "\"", "\\\""); + ss << '"' << val << '"'; + } else if (arr_type == GGUF_TYPE_ARRAY) { + ss << "???"; + } else { + ss << gguf_data_to_str(arr_type, data, j); + } + if (j < arr_n - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); + } + default: + return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0); + } +} + // // ggml helpers // @@ -1327,6 +1381,9 @@ struct llama_model { int n_gpu_layers; + // gguf metadata + std::unordered_map gguf_kv; + // context struct ggml_context * ctx = NULL; @@ -1785,10 +1842,10 @@ struct llama_model_loader { case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break; case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break; default: - { - LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max)); - ftype = LLAMA_FTYPE_ALL_F32; - } break; + { + LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max)); + ftype = LLAMA_FTYPE_ALL_F32; + } break; } // this is a way to mark that we have "guessed" the file type @@ -1802,10 +1859,20 @@ struct llama_model_loader { } for (int i = 0; i < n_kv; i++) { - const char * name = gguf_get_key(ctx_gguf, i); - const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + const char * name = gguf_get_key(ctx_gguf, i); + const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i); + const std::string type_name = + type == GGUF_TYPE_ARRAY + ? format("%s[%s,%d]", gguf_type_name(type), gguf_type_name(gguf_get_arr_type(ctx_gguf, i)), gguf_get_arr_n(ctx_gguf, i)) + : gguf_type_name(type); - LLAMA_LOG_INFO("%s: - kv %3d: %42s %-8s\n", __func__, i, name, gguf_type_name(type)); + std::string value = gguf_kv_to_str(ctx_gguf, i); + const size_t MAX_VALUE_LEN = 40; + if (value.size() > MAX_VALUE_LEN) { + value = format("%s...", value.substr(0, MAX_VALUE_LEN - 3).c_str()); + } + + LLAMA_LOG_INFO("%s: - kv %3d: %42s %-16s = %s\n", __func__, i, name, type_name.c_str(), value.c_str()); } // print type counts @@ -2100,6 +2167,17 @@ static void llm_load_hparams( auto & hparams = model.hparams; + // get metadata as string + for (int i = 0; i < gguf_get_n_kv(ctx); i++) { + enum gguf_type type = gguf_get_kv_type(ctx, i); + if (type == GGUF_TYPE_ARRAY) { + continue; + } + const char * name = gguf_get_key(ctx, i); + const std::string value = gguf_kv_to_str(ctx, i); + model.gguf_kv.emplace(name, value); + } + // get general kv GGUF_GET_KEY(ctx, model.name, gguf_get_val_str, GGUF_TYPE_STRING, false, kv(LLM_KV_GENERAL_NAME)); @@ -8671,6 +8749,45 @@ float llama_rope_freq_scale_train(const struct llama_model * model) { return model->hparams.rope_freq_scale_train; } +int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) { + const auto & it = model->gguf_kv.find(key); + if (it == model->gguf_kv.end()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + return snprintf(buf, buf_size, "%s", it->second.c_str()); +} + +int llama_model_meta_count(const struct llama_model * model) { + return (int)model->gguf_kv.size(); +} + +int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) { + if (i < 0 || i >= (int)model->gguf_kv.size()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + auto it = model->gguf_kv.begin(); + std::advance(it, i); + return snprintf(buf, buf_size, "%s", it->first.c_str()); +} + +int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) { + if (i < 0 || i >= (int)model->gguf_kv.size()) { + if (buf_size > 0) { + buf[0] = '\0'; + } + return -1; + } + auto it = model->gguf_kv.begin(); + std::advance(it, i); + return snprintf(buf, buf_size, "%s", it->second.c_str()); +} + int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) { return snprintf(buf, buf_size, "%s %s %s", llama_model_arch_name(model->arch).c_str(), diff --git a/llama.h b/llama.h index 0a5d6c60f..70e8fda4b 100644 --- a/llama.h +++ b/llama.h @@ -301,6 +301,23 @@ extern "C" { // Get the model's RoPE frequency scaling factor LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model); + // Functions to access the model's GGUF metadata scalar values + // - The functions return the length of the string on success, or -1 on failure + // - The output string is always null-terminated and cleared on failure + // - GGUF array values are not supported by these functions + + // Get metadata value as a string by key name + LLAMA_API int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size); + + // Get the number of metadata key/value pairs + LLAMA_API int llama_model_meta_count(const struct llama_model * model); + + // Get metadata key name by index + LLAMA_API int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size); + + // Get metadata value as a string by index + LLAMA_API int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size); + // Get a string describing the model type LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);