gguf : general usability improvements (#3409)

This commit is contained in:
cebtenzzre 2023-10-02 14:58:46 -04:00 committed by GitHub
parent 9476b01226
commit 0fe321031a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 120 additions and 101 deletions

View file

@ -41,8 +41,7 @@ if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
NDArray: TypeAlias = 'np.ndarray[Any, Any]' NDArray: TypeAlias = 'np.ndarray[Any, Any]'
ARCH=gguf.MODEL_ARCH.LLAMA ARCH = gguf.MODEL_ARCH.LLAMA
NAMES=gguf.MODEL_TENSOR_NAMES[ARCH]
DEFAULT_CONCURRENCY = 8 DEFAULT_CONCURRENCY = 8
# #
@ -953,7 +952,7 @@ class OutputFile:
of.close() of.close()
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType: def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32): if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
return GGMLFileType.AllF32 return GGMLFileType.AllF32

View file

@ -313,7 +313,7 @@ class ModelParams:
gguf_writer.add_feed_forward_length(self.get_n_ff()) gguf_writer.add_feed_forward_length(self.get_n_ff())
def tensor_name(key, bid=None, suffix=".weight"): def tensor_name(key, bid=None, suffix=".weight"):
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + suffix return gguf.TENSOR_NAMES[key].format(bid=bid) + suffix
class Layer: class Layer:
def __init__(self, params, lora_params, bid): def __init__(self, params, lora_params, bid):

View file

@ -364,7 +364,7 @@ class ModelParams:
gguf_writer.add_feed_forward_length(self.get_n_ff()) gguf_writer.add_feed_forward_length(self.get_n_ff())
def tensor_name(key, bid=None): def tensor_name(key, bid=None):
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + ".weight" return gguf.TENSOR_NAMES[key].format(bid=bid) + ".weight"
class Layer: class Layer:
def __init__(self, params, bid): def __init__(self, params, bid):

View file

@ -118,76 +118,97 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.STARCODER: "starcoder", MODEL_ARCH.STARCODER: "starcoder",
} }
MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = { TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_ARCH.LLAMA: { MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.POS_EMBD: "position_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ROPE_FREQS: "rope_freqs", MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q", MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k", MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v", MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd", MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
}, MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
MODEL_ARCH.GPTNEOX: { MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
MODEL_TENSOR.OUTPUT_NORM: "output_norm", }
MODEL_TENSOR.OUTPUT: "output",
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", MODEL_ARCH.LLAMA: [
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", MODEL_TENSOR.ROPE_FREQS,
}, MODEL_TENSOR.ATTN_NORM,
MODEL_ARCH.FALCON: { MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", MODEL_TENSOR.ATTN_ROT_EMBD,
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2", MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", ],
}, MODEL_ARCH.GPTNEOX: [
MODEL_ARCH.BAICHUAN: { MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ROPE_FREQS: "rope_freqs", MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q", MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k", MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v", MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", ],
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd", MODEL_ARCH.FALCON: [
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate", MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", MODEL_TENSOR.ATTN_NORM,
}, MODEL_TENSOR.ATTN_NORM_2,
MODEL_ARCH.STARCODER: { MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.POS_EMBD: "position_embd", MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.OUTPUT: "output", ],
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", MODEL_ARCH.BAICHUAN: [
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output", MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm", MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down", MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", MODEL_TENSOR.ATTN_NORM,
}, MODEL_TENSOR.ATTN_Q,
MODEL_ARCH.GPT2: { MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_ROT_EMBD,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.STARCODER: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.POS_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.GPT2: [
# TODO # TODO
}, ],
# TODO # TODO
} }
@ -338,28 +359,24 @@ class TensorNameMap:
mapping: dict[str, tuple[MODEL_TENSOR, str]] mapping: dict[str, tuple[MODEL_TENSOR, str]]
tensor_names: dict[MODEL_TENSOR, str]
def __init__(self, arch: MODEL_ARCH, n_blocks: int): def __init__(self, arch: MODEL_ARCH, n_blocks: int):
mapping = self.mapping = {} self.mapping = {}
tensor_names = self.tensor_names = MODEL_TENSOR_NAMES[arch]
for tensor, keys in self.mappings_cfg.items(): for tensor, keys in self.mappings_cfg.items():
tensor_name = tensor_names.get(tensor) if tensor not in MODEL_TENSORS[arch]:
if tensor_name is None:
continue continue
mapping[tensor_name] = (tensor, tensor_name) tensor_name = TENSOR_NAMES[tensor]
self.mapping[tensor_name] = (tensor, tensor_name)
for key in keys: for key in keys:
mapping[key] = (tensor, tensor_name) self.mapping[key] = (tensor, tensor_name)
for bid in range(n_blocks): for bid in range(n_blocks):
for tensor, keys in self.block_mappings_cfg.items(): for tensor, keys in self.block_mappings_cfg.items():
tensor_name = tensor_names.get(tensor) if tensor not in MODEL_TENSORS[arch]:
if tensor_name is None:
continue continue
tensor_name = tensor_name.format(bid = bid) tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
mapping[tensor_name] = (tensor, tensor_name) self.mapping[tensor_name] = (tensor, tensor_name)
for key in keys: for key in keys:
key = key.format(bid = bid) key = key.format(bid = bid)
mapping[key] = (tensor, tensor_name) self.mapping[key] = (tensor, tensor_name)
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None: def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key) result = self.mapping.get(key)
@ -800,22 +817,25 @@ class SpecialVocab:
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad') special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
special_token_ids: dict[str, int] = {} special_token_ids: dict[str, int] = {}
def __init__(self, path: Path, load_merges: bool = False, special_token_types: tuple[str, ...] | None = None): def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
special_token_types: tuple[str, ...] | None = None,
):
self.special_token_ids = {} self.special_token_ids = {}
self.load_merges = load_merges self.load_merges = load_merges
if special_token_types is not None: if special_token_types is not None:
self.special_token_types = special_token_types self.special_token_types = special_token_types
self.load(path) self._load(Path(path))
def load(self, path: Path): def _load(self, path: Path) -> None:
if not self.try_load_from_tokenizer_json(path): if not self._try_load_from_tokenizer_json(path):
self.try_load_from_config_json(path) self._try_load_from_config_json(path)
def try_load_from_tokenizer_json(self, path: Path) -> bool: def _try_load_from_tokenizer_json(self, path: Path) -> bool:
tokenizer_file = path / 'tokenizer.json' tokenizer_file = path / 'tokenizer.json'
if not tokenizer_file.is_file(): if not tokenizer_file.is_file():
return False return False
with open(tokenizer_file, 'r', encoding = 'utf-8') as f: with open(tokenizer_file, encoding = 'utf-8') as f:
tokenizer = json.load(f) tokenizer = json.load(f)
if self.load_merges: if self.load_merges:
merges = tokenizer.get('model', {}).get('merges') merges = tokenizer.get('model', {}).get('merges')
@ -825,7 +845,7 @@ class SpecialVocab:
added_tokens = tokenizer.get('added_tokens') added_tokens = tokenizer.get('added_tokens')
if added_tokens is None or not tokenizer_config_file.is_file(): if added_tokens is None or not tokenizer_config_file.is_file():
return True return True
with open(tokenizer_config_file, 'r', encoding = 'utf-8') as f: with open(tokenizer_config_file, encoding = 'utf-8') as f:
tokenizer_config = json.load(f) tokenizer_config = json.load(f)
for typ in self.special_token_types: for typ in self.special_token_types:
entry = tokenizer_config.get(f'{typ}_token') entry = tokenizer_config.get(f'{typ}_token')
@ -844,11 +864,11 @@ class SpecialVocab:
break break
return True return True
def try_load_from_config_json(self, path: Path) -> bool: def _try_load_from_config_json(self, path: Path) -> bool:
config_file = path / 'config.json' config_file = path / 'config.json'
if not config_file.is_file(): if not config_file.is_file():
return False return False
with open(config_file, 'r', encoding = 'utf-8') as f: with open(config_file, encoding = 'utf-8') as f:
config = json.load(f) config = json.load(f)
for typ in self.special_token_types: for typ in self.special_token_types:
maybe_token_id = config.get(f'{typ}_token_id') maybe_token_id = config.get(f'{typ}_token_id')
@ -856,7 +876,7 @@ class SpecialVocab:
self.special_token_ids[typ] = maybe_token_id self.special_token_ids[typ] = maybe_token_id
return True return True
def add_to_gguf(self, gw: GGUFWriter): def add_to_gguf(self, gw: GGUFWriter) -> None:
if len(self.merges) > 0: if len(self.merges) > 0:
print(f'gguf: Adding {len(self.merges)} merge(s).') print(f'gguf: Adding {len(self.merges)} merge(s).')
gw.add_token_merges(self.merges) gw.add_token_merges(self.merges)
@ -868,8 +888,8 @@ class SpecialVocab:
print(f'gguf: Setting special token type {typ} to {tokid}') print(f'gguf: Setting special token type {typ} to {tokid}')
handler(tokid) handler(tokid)
def __repr__(self): def __repr__(self) -> str:
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids if self.special_token_ids else "unset"}>' return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'
# Example usage: # Example usage:

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "gguf" name = "gguf"
version = "0.3.3" version = "0.4.0"
description = "Write ML models in GGUF for GGML" description = "Write ML models in GGUF for GGML"
authors = ["GGML <ggml@ggml.ai>"] authors = ["GGML <ggml@ggml.ai>"]
packages = [ packages = [