From 4d698495eae6912db94dcdedb0c3b01c63143646 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 26 Jul 2023 11:16:07 +0300 Subject: [PATCH] gguf : init --- .gitignore | 1 + Makefile | 7 +++++-- examples/gguf/gguf.cpp | 34 ++++++++++++++++++++++++++++++++++ ggml.h | 25 ++++++++++++++++++++++++- 4 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 examples/gguf/gguf.cpp diff --git a/.gitignore b/.gitignore index c1ab6bb6d..abe8e28cb 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ models-mnt /server /Pipfile /embd-input-test +/gguf /libllama.so build-info.h arm_neon.h diff --git a/Makefile b/Makefile index fb7c27cd9..e19acfbb2 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Define the default target now so that it is always the first target -BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test +BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test gguf # Binaries only useful for tests TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0 @@ -330,7 +330,7 @@ libllama.so: llama.o ggml.o $(OBJS) $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) clean: - rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $(TEST_TARGETS) + rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test gguf build-info.h $(TEST_TARGETS) # # Examples @@ -370,6 +370,9 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput +gguf: examples/gguf/gguf.cpp build-info.h ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) + train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp build-info.h ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp new file mode 100644 index 000000000..602de519a --- /dev/null +++ b/examples/gguf/gguf.cpp @@ -0,0 +1,34 @@ +#include "ggml.h" + +#include +#include + +bool gguf_write(const std::string & fname) { + + + return true; +} + +bool gguf_read(const std::string & fname) { + return true; +} + +int main(int argc, char ** argv) { + if (argc < 3) { + fprintf(stdout, "usage: %s data.gguf r|w\n", argv[0]); + return -1; + } + + const std::string fname(argv[1]); + const std::string mode(argv[2]); + + GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w"); + + if (mode == "w") { + GGML_ASSERT(gguf_write(fname) && "failed to write gguf file"); + } else if (mode == "r") { + GGML_ASSERT(gguf_read(fname) && "failed to read gguf file"); + } + + return 0; +} diff --git a/ggml.h b/ggml.h index 9919cce7c..2e700c9a0 100644 --- a/ggml.h +++ b/ggml.h @@ -190,6 +190,9 @@ #define GGML_FILE_MAGIC 0x67676d6c // "ggml" #define GGML_FILE_VERSION 1 +#define GGUF_FILE_MAGIC 0x47475546 // "GGUF" +#define GGUF_FILE_VERSION 1 + #define GGML_QNT_VERSION 2 // bump this on quantization format changes #define GGML_QNT_VERSION_FACTOR 1000 // do not change this @@ -202,7 +205,6 @@ #define GGML_MAX_OP_PARAMS 32 #define GGML_DEFAULT_N_THREADS 4 - #define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_ABORTED 1 @@ -1611,6 +1613,27 @@ extern "C" { GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist); + // + // gguf + // + + enum gguf_metadata_value_type { + GGUF_METADATA_VALUE_TYPE_UINT8 = 0, + GGUF_METADATA_VALUE_TYPE_INT8 = 1, + GGUF_METADATA_VALUE_TYPE_UINT16 = 2, + GGUF_METADATA_VALUE_TYPE_INT16 = 3, + GGUF_METADATA_VALUE_TYPE_UINT32 = 4, + GGUF_METADATA_VALUE_TYPE_INT32 = 5, + GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6, + GGUF_METADATA_VALUE_TYPE_BOOL = 7, + GGUF_METADATA_VALUE_TYPE_STRING = 8, + GGUF_METADATA_VALUE_TYPE_ARRAY = 9, + }; + + struct gguf_string { + uint32_t n; + char * data; + }; // // system info //