Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions convert-falcon-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def count_model_parts(dir_model: str) -> int:
with open(dir_model + "/config.json", "r", encoding="utf-8") as f:
hparams = json.load(f)

if hparams["architectures"][0] != "RWForCausalLM":
if hparams["architectures"][0] not in ("RWForCausalLM", "FalconForCausalLM"):
print("Model architecture not supported: " + hparams["architectures"][0])

sys.exit()
Expand All @@ -93,19 +93,34 @@ def count_model_parts(dir_model: str) -> int:

print("gguf: get model metadata")

block_count = hparams["n_layer"]
if "n_layer" in hparams:
block_count = hparams["n_layer"]
elif "num_hidden_layers" in hparams:
block_count = hparams["num_hidden_layers"]
else:
print("No block count found")

sys.exit()

if "n_head" in hparams:
n_head = hparams["n_head"]
elif "num_attention_heads" in hparams:
n_head = hparams["num_attention_heads"]
else:
print("No head count found")

sys.exit()

n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1

gguf_writer.add_name("Falcon")
gguf_writer.add_context_length(2048) # not in config.json
gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
gguf_writer.add_embedding_length(hparams["hidden_size"])
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
gguf_writer.add_block_count(block_count)
gguf_writer.add_head_count(hparams["n_head"])
if "n_head_kv" in hparams:
gguf_writer.add_head_count_kv(hparams["n_head_kv"])
else:
gguf_writer.add_head_count_kv(1)
gguf_writer.add_head_count(n_head)
gguf_writer.add_head_count_kv(n_head_kv)
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
gguf_writer.add_file_type(ftype)

Expand Down Expand Up @@ -190,9 +205,6 @@ def count_model_parts(dir_model: str) -> int:
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)

# params for qkv transform
n_head = hparams["n_head"]
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1

head_dim = hparams["hidden_size"] // n_head

# tensor info
Expand Down