From 4d698495eae6912db94dcdedb0c3b01c63143646 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 26 Jul 2023 11:16:07 +0300
Subject: [PATCH] gguf : init

---
 .gitignore             |  1 +
 Makefile               |  7 +++++--
 examples/gguf/gguf.cpp | 34 ++++++++++++++++++++++++++++++++++
 ggml.h                 | 25 ++++++++++++++++++++++++-
 4 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 examples/gguf/gguf.cpp
diff --git a/.gitignore b/.gitignore
index c1ab6bb6d..abe8e28cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ models-mnt
 /server
 /Pipfile
 /embd-input-test
+/gguf
 /libllama.so
 build-info.h
 arm_neon.h
diff --git a/Makefile b/Makefile
index fb7c27cd9..e19acfbb2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
 # Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test gguf
 
 # Binaries only useful for tests
 TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0
@@ -330,7 +330,7 @@ libllama.so: llama.o ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
 
 clean:
-	rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $(TEST_TARGETS)
+	rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test gguf build-info.h $(TEST_TARGETS)
 
 #
 # Examples
@@ -370,6 +370,9 @@ $(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-in
 embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
 
+gguf: examples/gguf/gguf.cpp                                  build-info.h ggml.o $(OBJS)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
 train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
new file mode 100644
index 000000000..602de519a
--- /dev/null
+++ b/examples/gguf/gguf.cpp
@@ -0,0 +1,34 @@
+#include "ggml.h"
+
+#include <cstdio>
+#include <string>
+
+bool gguf_write(const std::string & fname) {
+
+
+    return true;
+}
+
+bool gguf_read(const std::string & fname) {
+    return true;
+}
+
+int main(int argc, char ** argv) {
+    if (argc < 3) {
+        fprintf(stdout, "usage: %s data.gguf r|w\n", argv[0]);
+        return -1;
+    }
+
+    const std::string fname(argv[1]);
+    const std::string mode(argv[2]);
+
+    GGML_ASSERT((mode == "r" || mode == "w") && "mode must be r or w");
+
+    if (mode == "w") {
+        GGML_ASSERT(gguf_write(fname) && "failed to write gguf file");
+    } else if (mode == "r") {
+        GGML_ASSERT(gguf_read(fname)  && "failed to read gguf file");
+    }
+
+    return 0;
+}
diff --git a/ggml.h b/ggml.h
index 9919cce7c..2e700c9a0 100644
--- a/ggml.h
+++ b/ggml.h
@@ -190,6 +190,9 @@
 #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 #define GGML_FILE_VERSION 1
 
+#define GGUF_FILE_MAGIC   0x47475546 // "GGUF"
+#define GGUF_FILE_VERSION 1
+
 #define GGML_QNT_VERSION        2    // bump this on quantization format changes
 #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
 
@@ -202,7 +205,6 @@
 #define GGML_MAX_OP_PARAMS     32
 #define GGML_DEFAULT_N_THREADS 4
 
-
 #define GGML_EXIT_SUCCESS 0
 #define GGML_EXIT_ABORTED 1
 
@@ -1611,6 +1613,27 @@ extern "C" {
 
     GGML_API size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, int start, int n, int64_t * hist);
 
+    //
+    // gguf
+    //
+
+    enum gguf_metadata_value_type {
+        GGUF_METADATA_VALUE_TYPE_UINT8   = 0,
+        GGUF_METADATA_VALUE_TYPE_INT8    = 1,
+        GGUF_METADATA_VALUE_TYPE_UINT16  = 2,
+        GGUF_METADATA_VALUE_TYPE_INT16   = 3,
+        GGUF_METADATA_VALUE_TYPE_UINT32  = 4,
+        GGUF_METADATA_VALUE_TYPE_INT32   = 5,
+        GGUF_METADATA_VALUE_TYPE_FLOAT32 = 6,
+        GGUF_METADATA_VALUE_TYPE_BOOL    = 7,
+        GGUF_METADATA_VALUE_TYPE_STRING  = 8,
+        GGUF_METADATA_VALUE_TYPE_ARRAY   = 9,
+    };
+
+    struct gguf_string {
+        uint32_t n;
+        char * data;
+    };
     //
     // system info
     //