From d0a6b35e357534bfd7a489ea118f99181190f911 Mon Sep 17 00:00:00 2001 From: SaileshP97 Date: Sun, 22 Jun 2025 19:58:46 +0000 Subject: [PATCH 1/4] Adding Hinvec Model's Meta data. --- mteb/models/overview.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mteb/models/overview.py b/mteb/models/overview.py index 52a034adb3..5d1dae51db 100644 --- a/mteb/models/overview.py +++ b/mteb/models/overview.py @@ -43,6 +43,7 @@ google_models, gritlm_models, gte_models, + hinvec_models, hit_tmg_models, ibm_granite_models, inf_models, @@ -123,6 +124,7 @@ google_models, gritlm_models, gte_models, + hinvec_models, hit_tmg_models, ibm_granite_models, inf_models, From 6a482ca2ca83bedd3075d07bafc67410bc6b579f Mon Sep 17 00:00:00 2001 From: SaileshP97 Date: Sun, 22 Jun 2025 20:16:04 +0000 Subject: [PATCH 2/4] Adding hinvec_model.py --- mteb/models/hinvec_models.py | 62 ++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 mteb/models/hinvec_models.py diff --git a/mteb/models/hinvec_models.py b/mteb/models/hinvec_models.py new file mode 100644 index 0000000000..09bd987432 --- /dev/null +++ b/mteb/models/hinvec_models.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import logging +from functools import partial + +from mteb.encoder_interface import PromptType +from mteb.model_meta import ModelMeta +from mteb.models.instruct_wrapper import sentence_transformers_loader + +logger = logging.getLogger(__name__) + + +def instruction_template( + instruction: str, prompt_type: PromptType | None = None +) -> str: + return f"Instruct: {instruction}\nQuery: " if instruction else "" + + +hinvec_training_datasets = { + + "MintakaRetrieval": ["train"], + "HindiDiscourseClassification": ["train"], + "SentimentAnalysisHindi": ["train"], + "MassiveScenarioClassification": ["train"], + "MTOPIntentClassification": ["train"], + "LinceMTBitextMining": ["train"], + "PhincBitextMining": ["train"], + "XNLI": ["train"], + "MLQARetrieval": ["validation"], + "FloresBitextMining": ["dev"], + "AmazonReviewsClassification": ["train"], +} + +Hinvec_bidir = ModelMeta( + loader=partial( # type: ignore + sentence_transformers_loader, + model_name="Sailesh97/Hinvec", + revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b", + instruction_template=instruction_template, + trust_remote_code=True, + max_seq_length=2048, + padding_side="left", + add_eos_token=True, + ), + name="Sailesh97/Hinvec", + languages=["eng-Latn", "hin-Deva"], + open_weights=True, + revision="d4fc678720cc1b8c5d18599ce2d9a4d6090c8b6b", + release_date="2025-06-19", + n_parameters=939_591_680, + memory_usage_mb=3715, + embed_dim=2048, + license="cc-by-nc-4.0", + max_tokens=2048, + reference="https://huggingface.co/Sailesh97/Hinvec", + similarity_fn_name="cosine", + framework=["Sentence Transformers", "PyTorch"], + use_instructions=True, + training_datasets=hinvec_training_datasets, + public_training_code=None, + public_training_data=None, +) \ No newline at end of file From b7ea696248297e814dddbfa33946b906a3f6e59c Mon Sep 17 00:00:00 2001 From: Sailesh Panda Date: Tue, 24 Jun 2025 15:33:40 +0530 Subject: [PATCH 3/4] Update mteb/models/hinvec_models.py Co-authored-by: Kenneth Enevoldsen --- mteb/models/hinvec_models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mteb/models/hinvec_models.py b/mteb/models/hinvec_models.py index 09bd987432..a4f1188a0a 100644 --- a/mteb/models/hinvec_models.py +++ b/mteb/models/hinvec_models.py @@ -4,8 +4,7 @@ from functools import partial from mteb.encoder_interface import PromptType -from mteb.model_meta import ModelMeta -from mteb.models.instruct_wrapper import sentence_transformers_loader +from mteb.model_meta import ModelMeta, sentence_transformers_loader logger = logging.getLogger(__name__) From f8685a0410fea03b57ed049fabd8910a6d02f968 Mon Sep 17 00:00:00 2001 From: SaileshP97 Date: Wed, 25 Jun 2025 13:50:52 +0000 Subject: [PATCH 4/4] formated code with Black and lint with Ruff --- mteb/models/hinvec_models.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mteb/models/hinvec_models.py b/mteb/models/hinvec_models.py index a4f1188a0a..df2cd41528 100644 --- a/mteb/models/hinvec_models.py +++ b/mteb/models/hinvec_models.py @@ -16,7 +16,6 @@ def instruction_template( hinvec_training_datasets = { - "MintakaRetrieval": ["train"], "HindiDiscourseClassification": ["train"], "SentimentAnalysisHindi": ["train"], @@ -58,4 +57,4 @@ def instruction_template( training_datasets=hinvec_training_datasets, public_training_code=None, public_training_data=None, -) \ No newline at end of file +)