Skip to content

feat: add potential to run Jina Embeddings architecture #6826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
May 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
86a5d96
feat: first things to do
Apr 11, 2024
747d17a
feat: create tensors for Jina architecture
Apr 12, 2024
a40156a
fix: use other tensors
Apr 12, 2024
b00d38b
feat: embedding gets results
Apr 16, 2024
cf1c144
fix: fix usage of ALIBI
Apr 22, 2024
63a1d7c
fix: clean prints
Apr 22, 2024
c229e48
fix: do some cleanup unused vars
Apr 22, 2024
e232370
fix: revert changes to Makefile and CMakeLists
Apr 22, 2024
795ff1d
fix: revert some changes
Apr 22, 2024
d6ac931
fix: fix small detail
Apr 22, 2024
db7e8ce
Merge branch 'master' into feat-jina-embeddings
JoanFM Apr 22, 2024
c1c0f4d
fix: fix convert formatting
Apr 22, 2024
64cd4b1
fix: fix linting and editor
Apr 22, 2024
71ff763
feat: set proper vocab settings
Apr 22, 2024
d7d6a4e
fix: JinaBertForMaskedLM registration
Apr 23, 2024
cde49b7
feat: support q_normalization and k_normalization in Jina arch
Apr 23, 2024
dd060a2
feat: handle gpt2 tokenizer with Jina architecture
Apr 24, 2024
dfa0676
feat: example comments in embedding
Apr 24, 2024
c3f4b1f
feat: rename Jina Bert to Jina Bert V2
Apr 24, 2024
f8d1709
Merge branch 'master' into feat-jina-embeddings
JoanFM Apr 30, 2024
d9b8dd6
fix: add some changes as per review
Apr 30, 2024
14073a2
feat: proper KQ_pos for Jina embeddings
Apr 30, 2024
76436c1
Merge branch 'master' of https://github.com/JoanFM/llama.cpp into fea…
May 6, 2024
cf9fcd8
feat: add capacity to load models ES and DE for Spanish
May 8, 2024
e59b546
Merge branch 'master' into feat-jina-embeddings
JoanFM May 8, 2024
b7ede48
llama : fix pre-tokenizers
ggerganov May 8, 2024
8e36fd5
Merge branch 'master' of https://github.com/JoanFM/llama.cpp into fea…
May 8, 2024
849aeda
Merge branch 'master' of https://github.com/JoanFM/llama.cpp into fea…
May 9, 2024
ee3250d
Merge branch 'master' of https://github.com/JoanFM/llama.cpp into fea…
May 9, 2024
7fdca33
ggml : full ALiBi support
ggerganov May 10, 2024
d0592d4
ggml : update ggml_soft_max_ext() CUDA, SYCL
ggerganov May 10, 2024
166e60b
ggml : ggml_flash_attn_ext() support ALiBi (CPU)
ggerganov May 10, 2024
97c27f5
ggml : ggml_flash_attn_ext() support ALiBi (Metal)
ggerganov May 10, 2024
f7055d3
ggml : fix warning
ggerganov May 10, 2024
865af99
ggml : ggml_flash_attn_ext() support ALiBi (CUDA)
ggerganov May 10, 2024
d9adb88
Merge remote-tracking branch 'origin/gg/refactor-alibi-2' into HEAD
ggerganov May 10, 2024
a1278f1
minor : clean-up
ggerganov May 10, 2024
23499b8
Merge branch 'master' into HEAD
ggerganov May 11, 2024
49b3dbb
embedding : add warning about missing SEP
ggerganov May 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ class TOKENIZER_TYPE(IntEnum):
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
{"name": "jina-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
{"name": "jina-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
{"name": "jina-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
]

# make directory "models/tokenizers" if it doesn't exist
Expand Down
48 changes: 47 additions & 1 deletion convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,17 @@ def get_vocab_base_pre(self, tokenizer) -> str:
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
res = "olmo"
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
# ref: https://huggingface.co/databricks/dbrx-instruct
# ref: https://huggingface.co/databricks/dbrx-base
res = "dbrx"
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
res = "jina-en"
if chkhsh == "171aeeedd6fb548d418a7461d053f11b6f1f1fc9b387bd66640d28a4b9f5c643":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-es
res = "jina-es"
if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
res = "jina-de"

if res is None:
logger.warning("\n")
Expand Down Expand Up @@ -2289,6 +2298,43 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
return [(self.map_tensor_name(name), data_torch)]


@Model.register("JinaBertModel", "JinaBertForMaskedLM")
class JinaBertV2Model(BertModel):
model_arch = gguf.MODEL_ARCH.JINA_BERT_V2

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.intermediate_size = self.hparams["intermediate_size"]

def get_tensors(self):
for name, data in super().get_tensors():
if 'gated_layers' in name:
d1 = data[:self.intermediate_size, :]
name1 = name.replace('gated_layers', 'gated_layers_w')
d2 = data[self.intermediate_size:, :]
name2 = name.replace('gated_layers', 'gated_layers_v')
yield name1, d1
yield name2, d2
continue

yield name, data

def set_vocab(self, *args, **kwargs):
tokenizer_class = 'BertTokenizer'
with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
tokenizer_class = json.load(f)['tokenizer_class']

if tokenizer_class == 'BertTokenizer':
super().set_vocab()
elif tokenizer_class == 'RobertaTokenizer':
self._set_vocab_gpt2()
self.gguf_writer.add_token_type_count(2)
else:
raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
self.gguf_writer.add_add_bos_token(True)
self.gguf_writer.add_add_eos_token(True)


###### CONVERSION LOGIC ######


Expand Down
12 changes: 10 additions & 2 deletions examples/embedding/embedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
}

float * out = output + batch.seq_id[i][0] * n_embd;
//TODO: I would also add a parameter here to enable normalization or not.
/*fprintf(stdout, "unnormalized_embedding:");
for (int hh = 0; hh < n_embd; hh++) {
fprintf(stdout, "%9.6f ", embd[hh]);
}
fprintf(stdout, "\n");*/
llama_embd_normalize(embd, out, n_embd);
}
}
Expand Down Expand Up @@ -123,10 +129,12 @@ int main(int argc, char ** argv) {
inputs.push_back(inp);
}

// add SEP if not present
// check if the last token is SEP
// it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true'
for (auto & inp : inputs) {
if (inp.empty() || inp.back() != llama_token_sep(model)) {
inp.push_back(llama_token_sep(model));
fprintf(stderr, "%s: warning: last token in the prompt is not SEP\n", __func__);
fprintf(stderr, "%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__);
}
}

Expand Down
18 changes: 18 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class MODEL_ARCH(IntEnum):
REFACT = auto()
BERT = auto()
NOMIC_BERT = auto()
JINA_BERT_V2 = auto()
BLOOM = auto()
STABLELM = auto()
QWEN = auto()
Expand Down Expand Up @@ -195,6 +196,7 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
Expand Down Expand Up @@ -380,6 +382,22 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.JINA_BERT_V2: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.TOKEN_TYPES,
MODEL_TENSOR.ATTN_OUT_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_Q_NORM,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.LAYER_OUT_NORM,
],
MODEL_ARCH.MPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
Expand Down
6 changes: 6 additions & 0 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w3", # internlm2
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
"model.layers.{bid}.mlp.c_fc", # starcoder2
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
),

MODEL_TENSOR.FFN_UP_EXP: (
Expand All @@ -269,6 +270,7 @@ class TensorNameMap:
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
"transformer.h.{bid}.mlp.linear_1", # refact
),

Expand Down Expand Up @@ -303,6 +305,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w2", # internlm2
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
"model.layers.{bid}.mlp.c_proj", # starcoder2
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
),

MODEL_TENSOR.FFN_DOWN_EXP: (
Expand All @@ -321,13 +324,15 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
),

MODEL_TENSOR.ATTN_K_NORM: (
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
"encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
),

MODEL_TENSOR.ROPE_FREQS: (
Expand All @@ -338,6 +343,7 @@ class TensorNameMap:
"encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
),

MODEL_TENSOR.SSM_IN: (
Expand Down
Loading
Loading