embeddings-benchmark · Samoed · Jun 20, 2025 · Jun 18, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/mteb/models/ru_sentence_models.py b/mteb/models/ru_sentence_models.py
@@ -14,6 +14,47 @@
     nomic_training_data,
 )
 
+GIGA_task_prompts = {
+    "TERRa": "Given a premise, retrieve a hypothesis that is entailed by the premise\nquery: ",
+    "STS22": "Retrieve semantically similar text\nquery: ",
+    "RuSTSBenchmarkSTS": "Retrieve semantically similar text\nquery: ",
+    "RUParaPhraserSTS": "Retrieve semantically similar text\nquery: ",
+    "CEDRClassification": "Дан комментарий, определи выраженную в нем эмоцию (радость, грусть, удивление, страх, гнев или нейтрально) \nкомментарий: ",
+    "GeoreviewClassification": "Classify the organization rating based on the reviews\nquery: ",
+    "GeoreviewClusteringP2P": "Классифицируй рейтинг организации на основе отзыва \nотзыв: ",
+    "HeadlineClassification": "Классифицируй тему данного новостного заголовка \nзаголовок: ",
+    "InappropriatenessClassification": "Классифицируй данный комментарий как токсичный или не токсичный \nкомментарий: ",
+    "KinopoiskClassification": "Classify the sentiment expressed in the given movie review text\nquery: ",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents\nquery: ",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios\nquery: ",
+    "RuReviewsClassification": "Classify product reviews into positive, negative or neutral sentiment\nquery: ",
+    "RuSciBenchGRNTIClassification": "Classify the category of scientific papers based on the titles and abstracts\nquery: ",
+    "RuSciBenchGRNTIClusteringP2P": "Классифицируй категорию научной статьи основываясь на аннотации \nаннотация: ",
+    "RuSciBenchOECDClassification": "Classify the category of scientific papers based on the titles and abstracts\nquery: ",
+    "RuSciBenchOECDClusteringP2P": "Классифицируй категорию научной статьи основываясь на аннотации \nаннотация: ",
+    "SensitiveTopicsClassification": "Классифицируй чувствительную тему по запросу \nзапрос: ",
+    "RuBQRetrieval": {
+        "query": "Given a question, retrieve Wikipedia passages that answer the question\nquery: ",
+        "passage": "",
+    },
+    "RuBQReranking": {
+        "query": "Given a question, retrieve Wikipedia passages that answer the question\nquery: ",
+        "passage": "",
+    },
+    "RiaNewsRetrieval": {
+        "query": "Given a news title, retrieve relevant news article\nquery: ",
+        "passage": "",
+    },
+    "MIRACLReranking": {
+        "query": "Given a question, retrieve Wikipedia passages that answer the question\nquery: ",
+        "passage": "",
+    },
+    "MIRACLRetrieval": {
+        "query": "Given a question, retrieve Wikipedia passages that answer the question\nquery: ",
+        "passage": "",
+    },
+}
+
 rubert_tiny = ModelMeta(
     name="cointegrated/rubert-tiny",
     languages=["rus-Cyrl"],
@@ -589,13 +630,15 @@
 )
 
 giga_embeddings = ModelMeta(
-    loader=partial(
+    loader=partial(  # type: ignore
         InstructSentenceTransformerWrapper,
         model_name="ai-sage/Giga-Embeddings-instruct",
         revision="40b27667b9ad586d7812675df76e5062ccc80b0e",
-        trust_remote_code=True,
-        instruction_template="{instruction}\nquery: ",
+        instruction_template="{instruction}",
+        max_seq_length=512,
         apply_instruction_to_passages=False,
+        prompts_dict=GIGA_task_prompts,
+        trust_remote_code=True,
         model_kwargs={
             "torch_dtype": torch.bfloat16,
         },