"""TODOs 1. Implement writing tensor data with alignment. 2. Implement writers for known architectures, LLaMA in particular. 3. Add docstrings from the format specs. 4. After development is done, Convert it to a proper pip-installable Python package, and possibly move it to its own repo under ggml-org. """ import struct from enum import IntEnum from typing import List, Any import constants class GGMLQuantizationType(IntEnum): F32 = 0 F16 = 1 QR_0 = 2 Q4_1 = 3 # Q4_2 = 4 # support has been removed # Q4_3 = 5 # support has been removed Q5_0 = 6 Q5_1 = 7 Q8_0 = 8 Q8_1 = 9 Q2_K = 10 Q3_K = 11 Q4_K = 12 Q5_K = 13 Q6_K = 14 Q8_K = 15 class GGUFValueType(IntEnum): UINT8 = 0 INT8 = 1 UINT16 = 2 INT16 = 3 UINT32 = 4 INT32 = 5 FLOAT32 = 6 BOOL = 7 STRING = 8 ARRAY = 9 @staticmethod def get_type(value): if isinstance(value, str): return GGUFValueType.STRING elif isinstance(value, list): return GGUFValueType.ARRAY elif isinstance(value, float): return GGUFValueType.FLOAT32 elif isinstance(value, bool): return GGUFValueType.BOOL else: return GGUFValueType.INT32 class GGUFWriter: def __init__(self, buffered_writer): self.buffered_writer = buffered_writer def write_header(self, tensor_count: int, metadata_kv_count: int): self.buffered_writer.write(struct.pack(" "GGUFWriter": f = open(path, "wb") return cls(f) def write_key(self, key: str, value_type: GGUFValueType): encoded_key = key.encode("utf8") self.buffered_writer.write(struct.pack("