gguf : add 64-bit support (GGUF v2) (#2821)

* gguf : bump version to 2

* gguf : add support for 64-bit (no backwards comp yet)

* gguf : v1 backwards comp

* gguf.py : bump GGUF version

* gguf.py : uint64_t on all lengths, sizes and counts, enums still uint32_t

* gguf.py : string lengths uint32_t

* gguf : update all counts to 64-bit

* gguf.py : string len uint64_t and n_dims uint32_t

* gguf : fix typo

* llama.cpp : print gguf version

---------

Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
Georgi Gerganov 2023-08-27 14:19:54 +03:00 committed by GitHub
parent edd4c14817
commit d0cee0d36d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 164 additions and 27 deletions

View file

@ -30,6 +30,9 @@ bool gguf_ex_write(const std::string & fname) {
gguf_set_val_u32 (ctx, "some.parameter.uint32", 0x12345678);
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
gguf_set_val_u64 (ctx, "some.parameter.uint64", 0x123456789abcdef0ull);
gguf_set_val_i64 (ctx, "some.parameter.int64", -0x123456789abcdef1ll);
gguf_set_val_f64 (ctx, "some.parameter.float64", 0.1234567890123456789);
gguf_set_val_bool(ctx, "some.parameter.bool", true);
gguf_set_val_str (ctx, "some.parameter.string", "hello world");

137
ggml.c
View file

@ -19394,7 +19394,7 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
////////////////////////////////////////////////////////////////////////////////
struct gguf_str {
uint32_t n;
uint64_t n; // GGUFv2
char * data;
};
@ -19408,9 +19408,12 @@ static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_FLOAT32] = sizeof(float),
[GGUF_TYPE_BOOL] = sizeof(bool),
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
[GGUF_TYPE_UINT64] = sizeof(uint64_t),
[GGUF_TYPE_INT64] = sizeof(int64_t),
[GGUF_TYPE_FLOAT64] = sizeof(double),
[GGUF_TYPE_ARRAY] = 0, // undefined
};
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_UINT8] = "u8",
@ -19423,8 +19426,11 @@ static const char * GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
[GGUF_TYPE_BOOL] = "bool",
[GGUF_TYPE_STRING] = "str",
[GGUF_TYPE_ARRAY] = "arr",
[GGUF_TYPE_UINT64] = "u64",
[GGUF_TYPE_INT64] = "i64",
[GGUF_TYPE_FLOAT64] = "f64",
};
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
static_assert(GGUF_TYPE_COUNT == 13, "GGUF_TYPE_COUNT != 13");
union gguf_value {
uint8_t uint8;
@ -19434,6 +19440,9 @@ union gguf_value {
uint32_t uint32;
int32_t int32;
float float32;
uint64_t uint64;
int64_t int64;
double float64;
bool bool_;
struct gguf_str str;
@ -19441,7 +19450,7 @@ union gguf_value {
struct {
enum gguf_type type;
uint32_t n;
uint64_t n; // GGUFv2
void * data;
} arr;
};
@ -19449,8 +19458,6 @@ union gguf_value {
struct gguf_kv {
struct gguf_str key;
uint32_t n_bytes; // TODO: is this actually needed?
enum gguf_type type;
union gguf_value value;
};
@ -19458,15 +19465,15 @@ struct gguf_kv {
struct gguf_header {
uint32_t magic;
uint32_t version;
uint32_t n_tensors;
uint32_t n_kv;
uint64_t n_tensors; // GGUFv2
uint64_t n_kv; // GGUFv2
};
struct gguf_tensor_info {
struct gguf_str name;
uint32_t n_dims;
uint32_t ne[GGML_MAX_DIMS];
uint64_t ne[GGML_MAX_DIMS];
enum ggml_type type;
@ -19497,19 +19504,32 @@ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset)
return n == size;
}
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
static bool gguf_fread_str_cur(FILE * file, struct gguf_str * p, size_t * offset) {
p->n = 0;
p->data = NULL;
bool ok = true;
// TODO: how to avoid mallocs for strings?
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
return ok;
}
static bool gguf_fread_str_v1(FILE * file, struct gguf_str * p, size_t * offset) {
p->n = 0;
p->data = NULL;
bool ok = true;
uint32_t n = 0;
ok = ok && gguf_fread_el(file, &n, sizeof(n), offset); p->data = calloc(n + 1, 1); p->n = n;
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
return ok;
}
struct gguf_context * gguf_init_empty(void) {
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
@ -19565,8 +19585,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ctx->data = NULL;
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
if (ctx->header.version == 1) {
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
uint32_t n_tensors = 0;
uint32_t n_kv = 0;
ok = ok && gguf_fread_el(file, &n_tensors, sizeof(n_tensors), &offset);
ok = ok && gguf_fread_el(file, &n_kv, sizeof(n_kv), &offset);
ctx->header.n_tensors = n_tensors;
ctx->header.n_kv = n_kv;
} else {
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
}
if (!ok) {
fprintf(stderr, "%s: failed to read header\n", __func__);
@ -19576,6 +19609,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
}
}
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
bool (* gguf_fread_str)(FILE *, struct gguf_str *, size_t *) = gguf_fread_str_cur;
if (ctx->header.version == 1) {
gguf_fread_str = gguf_fread_str_v1;
}
// read the kv pairs
{
ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
@ -19585,9 +19624,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
ok = ok && gguf_fread_str(file, &kv->key, &offset);
//ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
ok = ok && gguf_fread_str(file, &kv->key, &offset);
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
@ -19599,12 +19637,23 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
case GGUF_TYPE_UINT64: ok = ok && gguf_fread_el (file, &kv->value.uint64, sizeof(kv->value.uint64), &offset); break;
case GGUF_TYPE_INT64: ok = ok && gguf_fread_el (file, &kv->value.int64, sizeof(kv->value.int64), &offset); break;
case GGUF_TYPE_FLOAT64: ok = ok && gguf_fread_el (file, &kv->value.float64, sizeof(kv->value.float64), &offset); break;
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
case GGUF_TYPE_ARRAY:
{
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
if (ctx->header.version == 1) {
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
uint32_t n = 0;
ok = ok && gguf_fread_el(file, &n, sizeof(n), &offset);
kv->value.arr.n = n;
} else {
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
}
switch (kv->value.arr.type) {
case GGUF_TYPE_UINT8:
@ -19614,6 +19663,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
case GGUF_TYPE_UINT64:
case GGUF_TYPE_INT64:
case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
@ -19660,7 +19712,14 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ok = ok && gguf_fread_str(file, &info->name, &offset);
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
for (uint32_t j = 0; j < info->n_dims; ++j) {
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
if (ctx->header.version == 1) {
// NOTE: temporary handling of GGUFv1 >> remove after Oct 2023
uint32_t t = 0;
ok = ok && gguf_fread_el(file, &t, sizeof(t), &offset);
info->ne[j] = t;
} else {
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
}
}
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
@ -19954,6 +20013,18 @@ float gguf_get_val_f32(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.float32;
}
uint64_t gguf_get_val_u64(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.uint64;
}
int64_t gguf_get_val_i64(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.int64;
}
double gguf_get_val_f64(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.float64;
}
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
return ctx->kv[i].value.bool_;
}
@ -20056,6 +20127,27 @@ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
ctx->kv[idx].value.float32 = val;
}
void gguf_set_val_u64(struct gguf_context * ctx, const char * key, uint64_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_UINT64;
ctx->kv[idx].value.uint64 = val;
}
void gguf_set_val_i64(struct gguf_context * ctx, const char * key, int64_t val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_INT64;
ctx->kv[idx].value.int64 = val;
}
void gguf_set_val_f64(struct gguf_context * ctx, const char * key, double val) {
const int idx = gguf_get_or_add_key(ctx, key);
ctx->kv[idx].type = GGUF_TYPE_FLOAT64;
ctx->kv[idx].value.float64 = val;
}
void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
const int idx = gguf_get_or_add_key(ctx, key);
@ -20106,6 +20198,9 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
case GGUF_TYPE_UINT64: gguf_set_val_u64 (ctx, src->kv[i].key.data, src->kv[i].value.uint64); break;
case GGUF_TYPE_INT64: gguf_set_val_i64 (ctx, src->kv[i].key.data, src->kv[i].value.int64); break;
case GGUF_TYPE_FLOAT64: gguf_set_val_f64 (ctx, src->kv[i].key.data, src->kv[i].value.float64); break;
case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
case GGUF_TYPE_ARRAY:
@ -20267,6 +20362,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
case GGUF_TYPE_UINT32: gguf_bwrite_el (buf, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
case GGUF_TYPE_INT32: gguf_bwrite_el (buf, &kv->value.int32, sizeof(kv->value.int32) ); break;
case GGUF_TYPE_FLOAT32: gguf_bwrite_el (buf, &kv->value.float32, sizeof(kv->value.float32)); break;
case GGUF_TYPE_UINT64: gguf_bwrite_el (buf, &kv->value.uint64, sizeof(kv->value.uint64) ); break;
case GGUF_TYPE_INT64: gguf_bwrite_el (buf, &kv->value.int64, sizeof(kv->value.int64) ); break;
case GGUF_TYPE_FLOAT64: gguf_bwrite_el (buf, &kv->value.float64, sizeof(kv->value.float64)); break;
case GGUF_TYPE_BOOL: gguf_bwrite_el (buf, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
case GGUF_TYPE_STRING: gguf_bwrite_str(buf, &kv->value.str ); break;
case GGUF_TYPE_ARRAY:
@ -20282,6 +20380,9 @@ static void gguf_write_to_buf(struct gguf_context * ctx, struct gguf_buf * buf,
case GGUF_TYPE_UINT32:
case GGUF_TYPE_INT32:
case GGUF_TYPE_FLOAT32:
case GGUF_TYPE_UINT64:
case GGUF_TYPE_INT64:
case GGUF_TYPE_FLOAT64:
case GGUF_TYPE_BOOL:
{
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);

11
ggml.h
View file

@ -216,7 +216,7 @@
#define GGML_EXIT_ABORTED 1
#define GGUF_MAGIC 0x46554747 // "GGUF"
#define GGUF_VERSION 1
#define GGUF_VERSION 2
#define GGUF_DEFAULT_ALIGNMENT 32
@ -1827,6 +1827,9 @@ extern "C" {
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
};
@ -1867,6 +1870,9 @@ extern "C" {
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
@ -1886,6 +1892,9 @@ extern "C" {
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);

View file

@ -13,7 +13,7 @@ from typing import Any, IO, List, Optional
#
GGUF_MAGIC = 0x46554747
GGUF_VERSION = 1
GGUF_VERSION = 2
GGUF_DEFAULT_ALIGNMENT = 32
# general
@ -365,6 +365,9 @@ class GGUFValueType(IntEnum):
BOOL = 7
STRING = 8
ARRAY = 9
UINT64 = 10
INT64 = 11
FLOAT64 = 12
@staticmethod
def get_type(val):
@ -378,6 +381,7 @@ class GGUFValueType(IntEnum):
return GGUFValueType.BOOL
elif isinstance(val, int):
return GGUFValueType.INT32
# TODO: need help with 64-bit types in Python
else:
print("Unknown type: "+str(type(val)))
sys.exit()
@ -400,8 +404,8 @@ class GGUFWriter:
def write_header_to_file(self):
self.fout.write(struct.pack("<I", GGUF_MAGIC))
self.fout.write(struct.pack("<I", GGUF_VERSION))
self.fout.write(struct.pack("<I", self.ti_data_count))
self.fout.write(struct.pack("<I", self.kv_data_count))
self.fout.write(struct.pack("<Q", self.ti_data_count))
self.fout.write(struct.pack("<Q", self.kv_data_count))
self.flush()
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
@ -444,6 +448,18 @@ class GGUFWriter:
self.add_key(key)
self.add_val(val, GGUFValueType.FLOAT32)
def add_uint64(self, key: str, val: int):
self.add_key(key)
self.add_val(val, GGUFValueType.UINT64)
def add_int64(self, key: str, val: int):
self.add_key(key)
self.add_val(val, GGUFValueType.INT64)
def add_float64(self, key: str, val: float):
self.add_key(key)
self.add_val(val, GGUFValueType.FLOAT64)
def add_bool(self, key: str, val: bool):
self.add_key(key)
self.add_val(val, GGUFValueType.BOOL)
@ -483,17 +499,23 @@ class GGUFWriter:
self.kv_data += struct.pack("<i", val)
elif vtype == GGUFValueType.FLOAT32:
self.kv_data += struct.pack("<f", val)
elif vtype == GGUFValueType.UINT64:
self.kv_data += struct.pack("<Q", val)
elif vtype == GGUFValueType.INT64:
self.kv_data += struct.pack("<q", val)
elif vtype == GGUFValueType.FLOAT64:
self.kv_data += struct.pack("<d", val)
elif vtype == GGUFValueType.BOOL:
self.kv_data += struct.pack("?", val)
elif vtype == GGUFValueType.STRING:
encoded_val = val.encode("utf8") if isinstance(val, str) else val
self.kv_data += struct.pack("<I", len(encoded_val))
self.kv_data += struct.pack("<Q", len(encoded_val))
self.kv_data += encoded_val
elif vtype == GGUFValueType.ARRAY:
ltype = set([GGUFValueType.get_type(item) for item in val])
assert len(ltype) == 1, "All items in a GGUF array should be of the same type"
self.kv_data += struct.pack("<I", list(ltype)[0])
self.kv_data += struct.pack("<I", len(val))
self.kv_data += struct.pack("<Q", len(val))
for item in val:
self.add_val(item, add_vtype=False)
else:
@ -507,12 +529,12 @@ class GGUFWriter:
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
encoded_name = name.encode("utf8")
self.ti_data += struct.pack("<I", len(encoded_name))
self.ti_data += struct.pack("<Q", len(encoded_name))
self.ti_data += encoded_name
n_dims = len(tensor_shape)
self.ti_data += struct.pack("<I", n_dims)
for i in range(n_dims):
self.ti_data += struct.pack("<I", tensor_shape[n_dims - 1 - i])
self.ti_data += struct.pack("<Q", tensor_shape[n_dims - 1 - i])
if raw_dtype is None:
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
else:

View file

@ -1144,11 +1144,13 @@ static bool llama_kv_cache_init(
enum llama_fver {
GGUF_FILE_VERSION_V1 = 1,
GGUF_FILE_VERSION_V2 = 2,
};
static const char * llama_file_version_name(llama_fver version) {
switch (version) {
case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)";
case GGUF_FILE_VERSION_V2: return "GGUF V2 (latest)";
}
return "unknown";