diff --git a/convert-lora-to-ggml.py b/convert-lora-to-ggml.py index 04a7b8bbf..a94a7d0af 100755 --- a/convert-lora-to-ggml.py +++ b/convert-lora-to-ggml.py @@ -6,23 +6,22 @@ import struct import sys from typing import Any, Dict, Sequence, TextIO +import numpy as np import torch -from convert import DATA_TYPE_TO_FTYPE, NUMPY_TYPE_TO_DATA_TYPE, DataType +NUMPY_TYPE_TO_FTYPE: Dict[str, int] = {"float32": 0, "float16": 1} + HF_SUBLAYER_TO_GGML = { - "self_attn.q_proj": "attention.wq", - "self_attn.k_proj": "attention.wk", - "self_attn.v_proj": "attention.wv", - "self_attn.o_proj": "attention.wo", - "mlp.gate_proj": "feed_forward.w1", - "mlp.down_proj": "feed_forward.w2", - "mlp.up_proj": "feed_forward.w3", - "input_layernorm": "attention_norm", + "self_attn.q_proj": "attn_q", + "self_attn.k_proj": "attn_k", + "self_attn.v_proj": "attn_v", + "self_attn.o_proj": "attn_output", + "mlp.gate_proj": "ffn_gate", + "mlp.down_proj": "ffn_down", + "mlp.up_proj": "ffn_up", + "input_layernorm": "attn_norm", "post_attention_layernorm": "ffn_norm", - # "norm": "norm", - # "embed_tokens": "tok_embeddings", - # "lm_head": "output", } @@ -39,7 +38,7 @@ def translate_tensor_name(t: str) -> str: sys.exit(1) output_string = ( - f"layers.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}" + f"blk.{nn}.{HF_SUBLAYER_TO_GGML[sub_layer]}.weight.lora{lora_type}" ) return output_string else: @@ -54,12 +53,14 @@ def write_file_header(fout: TextIO, params: Dict[str, Any]) -> None: # https://opendelta.readthedocs.io/en/latest/modules/deltas.html says that `lora_alpha` is an int # but some models ship a float value instead # let's convert to int, but fail if lossless conversion is not possible - assert int(params["lora_alpha"]) == params["lora_alpha"], "cannot convert float to int losslessly" + assert ( + int(params["lora_alpha"]) == params["lora_alpha"] + ), "cannot convert float to int losslessly" fout.write(struct.pack("i", int(params["lora_alpha"]))) def write_tensor_header( - self, name: str, shape: Sequence[int], data_type: DataType + self, name: str, shape: Sequence[int], data_type: np.dtype ) -> None: sname = name.encode("utf-8") fout.write( @@ -67,7 +68,7 @@ def write_tensor_header( "iii", len(shape), len(sname), - DATA_TYPE_TO_FTYPE[NUMPY_TYPE_TO_DATA_TYPE[data_type]], + NUMPY_TYPE_TO_FTYPE[data_type.name], ) ) fout.write(struct.pack("i" * len(shape), *shape[::-1]))