diff --git a/mteb/models/kalm_models.py b/mteb/models/kalm_models.py
new file mode 100644
index 0000000000..c26b0cc92d
--- /dev/null
+++ b/mteb/models/kalm_models.py
@@ -0,0 +1,865 @@
+from __future__ import annotations
+
+import logging
+from collections.abc import Sequence
+from functools import partial
+from typing import Any
+
+import numpy as np
+import torch
+
+from mteb.encoder_interface import PromptType
+from mteb.model_meta import ModelMeta
+from mteb.models.instruct_wrapper import InstructSentenceTransformerWrapper
+
+logger = logging.getLogger(__name__)
+
+
+class KALMWrapper(InstructSentenceTransformerWrapper):
+    def encode(
+        self,
+        sentences: Sequence[str],
+        *,
+        task_name: str,
+        prompt_type: PromptType | None = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        if self.add_eos_token:
+            sentences = [
+                example + self.model.tokenizer.eos_token for example in sentences
+            ]
+
+        instruction = self.get_task_instruction(
+            task_name, prompt_type, self.prompts_dict
+        )
+        # import there due to circular imports
+        from mteb import get_task
+
+        task = get_task(task_name)
+
+        # to passage prompts won't be applied to passages
+        if (
+            not self.apply_instruction_to_passages
+            and prompt_type == PromptType.document
+        ):
+            instruction = None
+            logger.info(
+                f"No instruction used, because prompt type = {prompt_type.document}"
+            )
+
+        if task.metadata.type in ["STS", "PairClassification", "Summarization"]:
+            logger.info(
+                f"No instruction used, because task type = {task.metadata.type}"
+            )
+            instruction = None
+
+        if instruction:
+            logger.info(f"Using instruction: '{instruction}' for task: '{task_name}'")
+
+        embeddings = self.model.encode(
+            sentences,
+            prompt=instruction,
+            **kwargs,
+        )
+
+        if isinstance(embeddings, torch.Tensor):
+            # sometimes in kwargs can be return_tensors=True
+            embeddings = embeddings.cpu().detach().float().numpy()
+        return embeddings
+
+
+kalm_training_data = {
+    # from technical report
+    # not in MTEB:
+    # ExpertQA
+    # MEDI2BGE
+    # OpenOrca
+    # PAQ
+    # PubMedQA
+    # SearchQA
+    # arxiv_qa
+    # rag-dataset-12000
+    # CC-News
+    # SQuAD 2.0
+    # TriviaQA
+    # WebGPT Comparisons
+    # MultiNLI
+    # NLLB
+    # WikiAnswers
+    # SimCSE NLI
+    # SNLI
+    # Aya Dataset
+    # eli5
+    # ----
+    # in MTEB:
+    "CodeFeedbackMT": ["train"],
+    "CodeFeedbackST": ["train"],
+    "ArxivClusteringP2P": ["train"],
+    "ArxivClusteringS2S": ["train"],
+    "ArxivClusteringP2P.v2": ["train"],
+    "TRECCOVID": ["train"],
+    "DBPedia": ["train"],
+    "ESCIReranking": ["train"],
+    "FEVER": ["train"],
+    "FiQA2018": ["train"],
+    "FEVERHardNegatives": ["train"],
+    "NanoFEVERRetrieval": ["train"],
+    "FEVER-NL": ["train"],  # translation not trained on
+    "FiQA2018-NL": ["train"],  # translation not trained on
+    "HotpotQA-PL": ["train"],  # translation not trained on
+    "HotpotQA-NL": ["train"],  # translation not trained on
+    "HotpotQAHardNegatives": ["train"],
+    "MultiLongDocRetrieval": ["train"],
+    "MSMARCO": ["train"],
+    "MSMARCOHardNegatives": ["train"],
+    "NanoMSMARCORetrieval": ["train"],
+    "MSMARCO-PL": ["train"],  # translation not trained on
+    "mMARCO-NL": ["train"],  # translation not trained on
+    "MSMARCOv2": ["train"],
+    "NFCorpus": ["train"],
+    "SciFact": ["train"],
+    "NQ": ["train"],
+    "NQHardNegatives": ["train"],
+    "NanoNQRetrieval": ["train"],
+    "NQ-PL": ["train"],  # translation not trained on
+    "NQ-NL": ["train"],  # translation not trained on
+    "YahooAnswersTopicsClassification": ["train"],
+    "ContractNLIConfidentialityOfAgreementLegalBenchClassification": ["train"],
+    "ContractNLIExplicitIdentificationLegalBenchClassification": ["train"],
+    "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLILimitedUseLegalBenchClassification": ["train"],
+    "ContractNLINoLicensingLegalBenchClassification": ["train"],
+    "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification": ["train"],
+    "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLIPermissibleCopyLegalBenchClassification": ["train"],
+    "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification": ["train"],
+    "ContractNLIReturnOfConfidentialInformationLegalBenchClassification": ["train"],
+    "ContractNLISharingWithEmployeesLegalBenchClassification": ["train"],
+    "ContractNLISharingWithThirdPartiesLegalBenchClassification": ["train"],
+    "ContractNLISurvivalOfObligationsLegalBenchClassification": ["train"],
+    "QuoraRetrieval": ["train"],
+    "NanoQuoraRetrieval": ["train"],
+    "BiorxivClusteringP2P.v2": ["train"],
+    "BiorxivClusteringS2S.v2": ["train"],
+    "MedrxivClusteringP2P.v2": ["train"],
+    "MedrxivClusteringS2S.v2": ["train"],
+    "Banking77Classification": ["train"],
+    "AmazonPolarityClassification": ["train"],
+    "ImdbClassification": ["train"],
+    "EmotionClassification": ["train"],
+    "TweetSentimentExtractionClassification": ["train"],
+    "ToxicConversationsClassification": ["train"],
+    "MIRACLRetrieval": ["train"],
+    "MIRACLRetrievalHardNegatives": ["train"],
+    "MIRACLReranking": ["train"],
+    "MrTidyRetrieval": ["train"],
+    "PawsXPairClassification": ["train"],
+    "AmazonReviewsClassification": ["train"],
+    "AmazonCounterfactualClassification": ["train"],
+    "MultilingualSentiment": ["train"],
+    "MassiveIntentClassification": ["train"],
+    "MassiveScenarioClassification": ["train"],
+    "MTOPDomainClassification": ["train"],
+    "MTOPIntentClassification": ["train"],
+}
+
+
+kalm_v2_training_data = {
+    # from technical report
+    # not in MTEB:
+    # ExpertQA
+    # MEDI2BGE
+    # OpenOrca
+    # PAQ
+    # PubMedQA
+    # SearchQA
+    # arxiv_qa
+    # rag-dataset-12000
+    # CC-News
+    # SQuAD 2.0
+    # TriviaQA
+    # WebGPT Comparisons
+    # MultiNLI
+    # NLLB
+    # WikiAnswers
+    # SimCSE NLI
+    # SNLI
+    # Aya Dataset
+    # eli5
+    # ----
+    # in MTEB:
+    "CodeFeedbackMT": ["train"],
+    "CodeFeedbackST": ["train"],
+    "ArxivClusteringP2P": ["train"],
+    "ArxivClusteringS2S": ["train"],
+    "ArxivClusteringP2P.v2": ["train"],
+    "TRECCOVID": ["train"],
+    "DBPedia": ["train"],
+    "ESCIReranking": ["train"],
+    "FEVER": ["train"],
+    "FiQA2018": ["train"],
+    "FEVERHardNegatives": ["train"],
+    "NanoFEVERRetrieval": ["train"],
+    "FEVER-NL": ["train"],  # translation not trained on
+    "FiQA2018-NL": ["train"],  # translation not trained on
+    "HotpotQA-PL": ["train"],  # translation not trained on
+    "HotpotQA-NL": ["train"],  # translation not trained on
+    "HotpotQAHardNegatives": ["train"],
+    "MultiLongDocRetrieval": ["train"],
+    "MSMARCO": ["train"],
+    "MSMARCOHardNegatives": ["train"],
+    "NanoMSMARCORetrieval": ["train"],
+    "MSMARCO-PL": ["train"],  # translation not trained on
+    "mMARCO-NL": ["train"],  # translation not trained on
+    "MSMARCOv2": ["train"],
+    "NFCorpus": ["train"],
+    "SciFact": ["train"],
+    "NQ": ["train"],
+    "NQHardNegatives": ["train"],
+    "NanoNQRetrieval": ["train"],
+    "NQ-PL": ["train"],  # translation not trained on
+    "NQ-NL": ["train"],  # translation not trained on
+    "YahooAnswersTopicsClassification": ["train"],
+    "ContractNLIConfidentialityOfAgreementLegalBenchClassification": ["train"],
+    "ContractNLIExplicitIdentificationLegalBenchClassification": ["train"],
+    "ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLILimitedUseLegalBenchClassification": ["train"],
+    "ContractNLINoLicensingLegalBenchClassification": ["train"],
+    "ContractNLINoticeOnCompelledDisclosureLegalBenchClassification": ["train"],
+    "ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLIPermissibleCopyLegalBenchClassification": ["train"],
+    "ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification": [
+        "train"
+    ],
+    "ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification": ["train"],
+    "ContractNLIReturnOfConfidentialInformationLegalBenchClassification": ["train"],
+    "ContractNLISharingWithEmployeesLegalBenchClassification": ["train"],
+    "ContractNLISharingWithThirdPartiesLegalBenchClassification": ["train"],
+    "ContractNLISurvivalOfObligationsLegalBenchClassification": ["train"],
+    "QuoraRetrieval": ["train"],
+    "NanoQuoraRetrieval": ["train"],
+    "BiorxivClusteringP2P.v2": ["train"],
+    "BiorxivClusteringS2S.v2": ["train"],
+    "MedrxivClusteringP2P.v2": ["train"],
+    "MedrxivClusteringS2S.v2": ["train"],
+    "Banking77Classification": ["train"],
+    "AmazonPolarityClassification": ["train"],
+    "ImdbClassification": ["train"],
+    "EmotionClassification": ["train"],
+    "TweetSentimentExtractionClassification": ["train"],
+    "ToxicConversationsClassification": ["train"],
+    "MIRACLRetrieval": ["train"],
+    "MIRACLRetrievalHardNegatives": ["train"],
+    "MIRACLReranking": ["train"],
+    "MrTidyRetrieval": ["train"],
+    "PawsXPairClassification": ["train"],
+    "AmazonReviewsClassification": ["train"],
+    "AmazonCounterfactualClassification": ["train"],
+    "MultilingualSentiment": ["train"],
+    "MassiveIntentClassification": ["train"],
+    "MassiveScenarioClassification": ["train"],
+    "MTOPDomainClassification": ["train"],
+    "MTOPIntentClassification": ["train"],
+    "Reddit-Clustering": ["train"],
+    "Reddit-Clustering-P2P": ["train"],
+    "Stackexchange-Clustering": ["train"],
+    "Stackexchange-Clustering-P2P": ["train"],
+    "TwentyNewsgroups-Clustering": ["train"],
+    "ATEC": ["train"],
+    "BQ": ["train"],
+    "CQADupstack": ["train"],
+}
+
+
+KaLM_task_prompts = {
+    "AmazonCounterfactualClassification": "Given an Amazon review, judge whether it is counterfactual.",
+    "AmazonPolarityClassification": "Classifying Amazon reviews into positive or negative sentiment",
+    "AmazonReviewsClassification": "Classifying the given Amazon review into its appropriate rating category",
+    "Banking77Classification": "Given an online banking query, find the corresponding intents",
+    "EmotionClassification": "Classifying the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise",
+    "ImdbClassification": "Classifying the sentiment expressed in the given movie review text from the IMDB dataset",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios",
+    "MTOPDomainClassification": "Classifying the intent domain of the given utterance in task-oriented conversation",
+    "MTOPIntentClassification": "Classifying the intent of the given utterance in task-oriented conversation",
+    "ToxicConversationsClassification": "Classifying the given comments as either toxic or not toxic",
+    "TweetSentimentExtractionClassification": "Classifying the sentiment of a given tweet as either positive, negative, or neutral",
+    "TNews": "Categorizing the given news title",
+    "IFlyTek": "Given an App description text, find the appropriate fine-grained category",
+    "MultilingualSentiment": "Classifying sentiment of the customer review into positive, neutral, or negative",
+    "JDReview": "Classifying sentiment of the customer review for iPhone into positive or negative",
+    "OnlineShopping": "Classifying sentiment of the customer review into positive or negative",
+    "Waimai": "Classify the customer review from a food takeaway platform into positive or negative",
+    "MasakhaNEWSClassification": "Classifying the category of french news.",
+    "CBD": "Classifying the sentiment of polish tweet reviews",
+    "PolEmo2.0-IN": "Classifying the sentiment of in-domain (medicine and hotels) online reviews",
+    "PolEmo2.0-OUT": "Classifying the sentiment of out-of-domain (products and school) online reviews",
+    "AllegroReviews": "Classifying the sentiment of reviews from e-commerce marketplace Allegro",
+    "PAC": 'Classifying the sentence into one of the two types: "BEZPIECZNE_POSTANOWIENIE_UMOWNE" and "KLAUZULA_ABUZYWNA"',
+    "GeoreviewClassification": "Classifying the sentiment of Russian reviews.",
+    "HeadlineClassification": "Classifying the topic of Russian headlines.",
+    "InappropriatenessClassification": "Detecting inappropriate messages on sensitive topics",
+    "KinopoiskClassification": "Classifying the sentiment of Kinopoisk reviews.",
+    "RuReviewsClassification": "Classifying the sentiment of Russian product reviews.",
+    "RuSciBenchGRNTIClassification": "Classifying the topic of Russian scientific papers.",
+    "RuSciBenchOECDClassification": "Classifying the topic of Russian scientific papers.",
+    "CEDRClassification": "Classification of sentences by emotions.",
+    "SensitiveTopicsClassification": "Detecting inappropriate messages on sensitive topics.",
+    "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles",
+    "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts",
+    "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles",
+    "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles and posts",
+    "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts",
+    "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
+    "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles",
+    "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts",
+    "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "AlloProfClusteringP2P": "Identify the main category of Allo Prof document based on the titles and descriptions",
+    "AlloProfClusteringS2S": "Identify the main category of Allo Prof document based on the titles",
+    "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents",
+    "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents",
+    "MLSUMClusteringS2S": "Identify the topic or theme of the given articles based on the titles",
+    "EightTagsClustering": "Identify of headlines from social media posts in Polish into 8 categories: film, history, food, medicine, motorization, work, sport and technology",
+    "GeoreviewClusteringP2P": "Identify the topic or theme of the Russian reviews.",
+    "RuSciBenchGRNTIClusteringP2P": "Identify the topic or theme of the Russian articles.",
+    "RuSciBenchOECDClusteringP2P": "Identify the topic or theme of the Russian articles.",
+}
+
+KaLM_v2_task_prompts = {
+    "AmazonCounterfactualClassification": "Given an Amazon review, judge whether it is counterfactual.",
+    "AmazonPolarityClassification": "Classifying Amazon reviews into positive or negative sentiment",
+    "AmazonReviewsClassification": "Classifying the given Amazon review into its appropriate rating category",
+    "Banking77Classification": "Given an online banking query, find the corresponding intents",
+    "EmotionClassification": "Classifying the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise",
+    "ImdbClassification": "Classifying the sentiment expressed in the given movie review text from the IMDB dataset",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios",
+    "MTOPDomainClassification": "Classifying the intent domain of the given utterance in task-oriented conversation",
+    "MTOPIntentClassification": "Classifying the intent of the given utterance in task-oriented conversation",
+    "ToxicConversationsClassification": "Classifying the given comments as either toxic or not toxic",
+    "TweetSentimentExtractionClassification": "Classifying the sentiment of a given tweet as either positive, negative, or neutral",
+    "TNews": "Categorizing the given news title",
+    "IFlyTek": "Given an App description text, find the appropriate fine-grained category",
+    "MultilingualSentiment": "Classifying sentiment of the customer review into positive, neutral, or negative",
+    "JDReview": "Classifying sentiment of the customer review for iPhone into positive or negative",
+    "OnlineShopping": "Classifying sentiment of the customer review into positive or negative",
+    "Waimai": "Classify the customer review from a food takeaway platform into positive or negative",
+    "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles",
+    "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts",
+    "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles",
+    "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles",
+    "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts",
+    "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the titles",
+    "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
+    "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles",
+    "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts",
+    "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "Cmnli-query": "Retrieve semantically similar text",
+    "Cmnli-document": "Retrieve semantically similar text",
+    "Ocnli-query": "Retrieve semantically similar text",
+    "Ocnli-document": "Retrieve semantically similar text",
+    "SprintDuplicateQuestions-query": "Retrieve semantically similar questions",
+    "SprintDuplicateQuestions-document": "Retrieve semantically similar questions",
+    "TwitterSemEval2015-query": "Retrieve semantically similar text",
+    "TwitterSemEval2015-document": "Retrieve semantically similar text",
+    "TwitterURLCorpus-query": "Retrieve semantically similar text",
+    "TwitterURLCorpus-document": "Retrieve semantically similar text",
+    "CMedQAv1-reranking": "Given a query, retrieve documents that answer the query",
+    "CMedQAv2-reranking": "Given a query, retrieve documents that answer the query",
+    "MMarcoReranking": "Given a query, retrieve documents that answer the query",
+    "T2Reranking": "Given a query, retrieve documents that answer the query",
+    "AskUbuntuDupQuestions-query": "Retrieve semantically similar questions",
+    "AskUbuntuDupQuestions-document": "Retrieve semantically similar questions",
+    "MindSmallReranking": "Given a query, retrieve documents that answer the query",
+    "SciDocsRR-query": "Retrieve relevant paper titles",
+    "SciDocsRR-document": "Retrieve relevant paper titles",
+    "StackOverflowDupQuestions-query": "Retrieve semantically similar questions",
+    "StackOverflowDupQuestions-document": "Retrieve semantically similar questions",
+    "CmedqaRetrieval": "Given a query, retrieve documents that answer the query",
+    "CovidRetrieval": "Given a query, retrieve documents that answer the query",
+    "DuRetrieval": "Given a query, retrieve documents that answer the query",
+    "EcomRetrieval": "Given a query, retrieve documents that answer the query",
+    "MedicalRetrieval": "Given a query, retrieve documents that answer the query",
+    "MMarcoRetrieval": "Given a query, retrieve documents that answer the query",
+    "T2Retrieval": "Given a query, retrieve documents that answer the query",
+    "VideoRetrieval": "Given a query, retrieve documents that answer the query",
+    "MSMARCO": "Given a query, retrieve documents that answer the query",
+    "ArguAna": "Given a query, retrieve documents that answer the query",
+    "ClimateFEVER": "Given a query, retrieve documents that answer the query",
+    "CQADupstackAndroidRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackAndroidRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackEnglishRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackEnglishRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGamingRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGamingRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGisRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGisRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackMathematicaRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackMathematicaRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackPhysicsRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackPhysicsRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackProgrammersRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackProgrammersRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackStatsRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackStatsRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackTexRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackTexRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackUnixRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackUnixRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackWebmastersRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackWebmastersRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackWordpressRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackWordpressRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "DBPedia": "Given a query, retrieve documents that answer the query",
+    "FEVER": "Given a query, retrieve documents that answer the query",
+    "FiQA2018": "Given a query, retrieve documents that answer the query",
+    "HotpotQA": "Given a query, retrieve documents that answer the query",
+    "NFCorpus": "Given a query, retrieve documents that answer the query",
+    "NQ": "Given a query, retrieve documents that answer the query",
+    "QuoraRetrieval-query": "Retrieve semantically similar questions",
+    "QuoraRetrieval-document": "Retrieve semantically similar questions",
+    "SCIDOCS-query": "Given a query, retrieve documents that answer the query",
+    "SCIDOCS-document": "Given a query, retrieve documents that answer the query",
+    "SciFact": "Given a query, retrieve documents that answer the query",
+    "Touche2020": "Given a query, retrieve documents that answer the query",
+    "TRECCOVID": "Given a query, retrieve documents that answer the query",
+    "AFQMC-query": "Retrieve semantically similar text",
+    "AFQMC-document": "Retrieve semantically similar text",
+    "ATEC-query": "Retrieve semantically similar text",
+    "ATEC-document": "Retrieve semantically similar text",
+    "BQ-query": "Retrieve semantically similar text",
+    "BQ-document": "Retrieve semantically similar text",
+    "LCQMC-query": "Retrieve semantically similar text",
+    "LCQMC-document": "Retrieve semantically similar text",
+    "PAWSX-query": "Retrieve semantically similar text",
+    "PAWSX-document": "Retrieve semantically similar text",
+    "QBQTC-query": "Retrieve semantically similar text",
+    "QBQTC-document": "Retrieve semantically similar text",
+    "STSB-query": "Retrieve semantically similar text",
+    "STSB-document": "Retrieve semantically similar text",
+    "BIOSSES-query": "Retrieve semantically similar text",
+    "BIOSSES-document": "Retrieve semantically similar text",
+    "SICK-R-query": "Retrieve semantically similar text",
+    "SICK-R-document": "Retrieve semantically similar text",
+    "STS12-query": "Retrieve semantically similar text",
+    "STS12-document": "Retrieve semantically similar text",
+    "STS13-query": "Retrieve semantically similar text",
+    "STS13-document": "Retrieve semantically similar text",
+    "STS14-query": "Retrieve semantically similar text",
+    "STS14-document": "Retrieve semantically similar text",
+    "STS15-query": "Retrieve semantically similar text",
+    "STS15-document": "Retrieve semantically similar text",
+    "STS16-query": "Retrieve semantically similar text",
+    "STS16-document": "Retrieve semantically similar text",
+    "STS17-query": "Retrieve semantically similar text",
+    "STS17-document": "Retrieve semantically similar text",
+    "STS22-query": "Retrieve semantically similar text",
+    "STS22-document": "Retrieve semantically similar text",
+    "STSBenchmark-query": "Retrieve semantically similar text",
+    "STSBenchmark-document": "Retrieve semantically similar text",
+    "SummEval-query": "Retrieve semantically similar summaries",
+    "SummEval-document": "Retrieve semantically similar summaries",
+}
+
+KaLM_X_task_prompts = {
+    "Classification": "classify the query into different classes.",
+    "MultilabelClassification": "Instruct: classify the query into different classes.",
+    "Clustering": "classify the query into different classes.",
+    "Reranking-query": "Given a query, retrieve documents that answer the query.",
+    "Retrieval-query": "Given a query, retrieve documents that answer the query.",
+    "InstructionRetrieval-query": "Given a query, retrieve documents that answer the query.",
+    "AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not-counterfactual",
+    "AmazonPolarityClassification": "Classify Amazon reviews into positive or negative sentiment",
+    "AmazonReviewsClassification": "Classify the given Amazon review into its appropriate rating category",
+    "Banking77Classification": "Given a online banking query, find the corresponding intents",
+    "EmotionClassification": "Classify the emotion expressed in the given Twitter message into one of the six emotions: anger, fear, joy, love, sadness, and surprise",
+    "ImdbClassification": "Classify the sentiment expressed in the given movie review text from the IMDB dataset",
+    "MassiveIntentClassification": "Given a user utterance as query, find the user intents",
+    "MassiveScenarioClassification": "Given a user utterance as query, find the user scenarios",
+    "MTOPDomainClassification": "Classify the intent domain of the given utterance in task-oriented conversation",
+    "MTOPIntentClassification": "Classify the intent of the given utterance in task-oriented conversation",
+    "ToxicConversationsClassification": "Classify the given comments as either toxic or not toxic",
+    "TweetSentimentExtractionClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral",
+    "TNews": "Classify the fine-grained category of the given news title",
+    "IFlyTek": "Given an App description text, find the appropriate fine-grained category",
+    "MultilingualSentiment": "Classify sentiment of the customer review into positive, neutral, or negative",
+    "JDReview": "Classify the customer review for iPhone on e-commerce platform into positive or negative",
+    "OnlineShopping": "Classify the customer review for online shopping into positive or negative",
+    "Waimai": "Classify the customer review from a food takeaway platform into positive or negative",
+    "ArxivClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArxivClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BiorxivClusteringP2P": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringS2S": "Identify the main category of Biorxiv papers based on the titles",
+    "MedrxivClusteringP2P": "Identify the main category of Medrxiv papers based on the titles and abstracts",
+    "MedrxivClusteringS2S": "Identify the main category of Medrxiv papers based on the titles",
+    "RedditClustering": "Identify the topic or theme of Reddit posts based on the titles",
+    "RedditClusteringP2P": "Identify the topic or theme of Reddit posts based on the titles and posts",
+    "StackExchangeClustering": "Identify the topic or theme of StackExchange posts based on the titles",
+    "StackExchangeClusteringP2P": "Identify the topic or theme of StackExchange posts based on the given paragraphs",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
+    "CLSClusteringS2S": "Identify the main category of scholar papers based on the titles",
+    "CLSClusteringP2P": "Identify the main category of scholar papers based on the titles and abstracts",
+    "ThuNewsClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "ThuNewsClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "AskUbuntuDupQuestions-query": "Retrieve duplicate questions from AskUbuntu forum",
+    "MindSmallReranking-query": "Retrieve relevant news articles based on user browsing history",
+    "SciDocsRR-query": "Given a title of a scientific paper, retrieve the titles of other relevant papers",
+    "StackOverflowDupQuestions-query": "Retrieve duplicate questions from StackOverflow forum",
+    "T2Reranking-query": "Given a Chinese search query, retrieve web passages that answer the question",
+    "MMarcoReranking-query": "Given a Chinese search query, retrieve web passages that answer the question",
+    "CMedQAv1-reranking-query": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "CMedQAv2-reranking-query": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "ArguAna-query": "Given a claim, find documents that refute the claim",
+    "ArguAna-document": "Given a claim, find documents that refute the claim",
+    "ClimateFEVER-query": "Given a claim about climate change, retrieve documents that support or refute the claim",
+    "ClimateFEVERHardNegatives-query": "Given a claim about climate change, retrieve documents that support or refute the claim",
+    "DBPedia-query": "Given a query, retrieve relevant entity descriptions from DBPedia",
+    "FEVER-query": "Given a claim, retrieve documents that support or refute the claim",
+    "FEVERHardNegatives-query": "Given a claim, retrieve documents that support or refute the claim",
+    "FiQA2018-query": "Given a financial question, retrieve user replies that best answer the question",
+    "HotpotQA-query": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "HotpotQAHardNegatives-query": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "MSMARCO-query": "Given a web search query, retrieve relevant passages that answer the query",
+    "NFCorpus-query": "Given a question, retrieve relevant documents that best answer the question",
+    "NQ-query": "Given a question, retrieve Wikipedia passages that answer the question",
+    "QuoraRetrieval-query": "Given a question, retrieve questions that are semantically equivalent to the given question",
+    "SCIDOCS-query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
+    "SciFact-query": "Given a scientific claim, retrieve documents that support or refute the claim",
+    "Touche2020-query": "Given a question, retrieve detailed and persuasive arguments that answer the question",
+    "Touche2020Retrieval.v3-query": "Given a question, retrieve detailed and persuasive arguments that answer the question",
+    "TRECCOVID-query": "Given a query on COVID-19, retrieve documents that answer the query",
+    "T2Retrieval-query": "Given a Chinese search query, retrieve web passages that answer the question",
+    "MMarcoRetrieval-query": "Given a web search query, retrieve relevant passages that answer the query",
+    "DuRetrieval-query": "Given a Chinese search query, retrieve web passages that answer the question",
+    "CovidRetrieval-query": "Given a question on COVID-19, retrieve news articles that answer the question",
+    "CmedqaRetrieval-query": "Given a Chinese community medical question, retrieve replies that best answer the question",
+    "EcomRetrieval-query": "Given a user query from an e-commerce website, retrieve description sentences of relevant products",
+    "MedicalRetrieval-query": "Given a medical question, retrieve user replies that best answer the question",
+    "VideoRetrieval-query": "Given a video search query, retrieve the titles of relevant videos",
+    "MasakhaNEWSClassification": "Classify the News in the given texts into one of the seven category: politics,sports,health,business,entertainment,technology,religion ",
+    "AlloProfClusteringP2P": "Identify the main category of Allo Prof document based on the titles and descriptions",
+    "AlloProfClusteringS2S": "Identify the topic of document titles from Allo Prof dataset",
+    "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents",
+    "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
+    "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
+    "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents",
+    "MLSUMClusteringS2S": "Identify the topic or theme of the given articles based on the titles",
+    "SyntecReranking-query": "Given a question, retrieve passages that answer the question",
+    "AlloprofReranking-query": "Given a question, retrieve passages that answer the question",
+    "AlloprofRetrieval-query": "Given a question, retrieve passages that answer the question",
+    "BSARDRetrieval-query": "Given a question, retrieve passages that answer the question",
+    "SyntecRetrieval-query": "Given a question, retrieve passages that answer the question",
+    "XPQARetrieval-query": "Given a question, retrieve passages that answer the question",
+    "MintakaRetrieval-query": "Given a question, retrieve passages that answer the question",
+    "CBD": "Classify the sentiment of polish tweet reviews",
+    "PolEmo2.0-IN": "Classify the sentiment of in-domain (medicine and hotels) online reviews",
+    "PolEmo2.0-OUT": "Classify the sentiment of out-of-domain (products and school) online reviews",
+    "AllegroReviews": "Classify the sentiment of reviews from e-commerce marketplace Allegro",
+    "PAC": 'Classify the sentence into one of the two types: "BEZPIECZNE_POSTANOWIENIE_UMOWNE" and "KLAUZULA_ABUZYWNA"',
+    "EightTagsClustering": "Identify of headlines from social media posts in Polish  into 8 categories: film, history, food, medicine, motorization, work, sport and technology",
+    "ArguAna-PL-query": "Given a claim, find documents that refute the claim",
+    "DBPedia-PL-query": "Given a query, retrieve relevant entity descriptions from DBPedia",
+    "FiQA-PL-query": "Given a financial question, retrieve user replies that best answer the question",
+    "HotpotQA-PL-query": "Given a multi-hop question, retrieve documents that can help answer the question",
+    "MSMARCO-PL-query": "Given a web search query, retrieve relevant passages that answer the query",
+    "NFCorpus-PL-query": "Given a question, retrieve relevant documents that best answer the question",
+    "NQ-PL-query": "Given a question, retrieve Wikipedia passages that answer the question",
+    "Quora-PL-query": "Given a question, retrieve questions that are semantically equivalent to the given question",
+    "SCIDOCS-PL-query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
+    "SciFact-PL-query": "Given a scientific claim, retrieve documents that support or refute the claim",
+    "TRECCOVID-PL-query": "Given a query on COVID-19, retrieve documents that answer the query",
+    "GeoreviewClassification": "Classify the organization rating based on the reviews",
+    "HeadlineClassification": "Classify the topic or theme of the given news headline",
+    "InappropriatenessClassification": "Classify the given message as either sensitive topic or not",
+    "KinopoiskClassification": "Classify the sentiment expressed in the given movie review text",
+    "RuReviewsClassification": "Classify product reviews into positive, negative or neutral sentiment",
+    "RuSciBenchGRNTIClassification": "Classify the category of scientific papers based on the titles and abstracts",
+    "RuSciBenchOECDClassification": "Classify the category of scientific papers based on the titles and abstracts",
+    "GeoreviewClusteringP2P": "Identify the organization category based on the reviews",
+    "RuSciBenchGRNTIClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts",
+    "RuSciBenchOECDClusteringP2P": "Identify the category of scientific papers based on the titles and abstracts",
+    "RuBQReranking-query": "Given a question, retrieve Wikipedia passages that answer the question",
+    "RiaNewsRetrieval-query": "Given a headline, retrieval relevant articles",
+    "RuBQRetrieval-query": "Given a question, retrieve Wikipedia passages that answer the question",
+    "AppsRetrieval-query": "Given a question about code problem, retrieval code that can solve user's problem",
+    "COIRCodeSearchNetRetrieval-query": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeEditSearchRetrieval-query": "Given a piece of code, retrieval code that in the ",
+    "CodeFeedbackMT-query": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "CodeFeedbackST-query": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "CodeSearchNetCCRetrieval-query": "Given a code comment, retrieve the code snippet corresponding to that comment.",
+    "CodeSearchNetRetrieval-query": "Given a code snippet, retrieve the comment corresponding to that code.",
+    "CodeTransOceanContest-query": "Given a piece for code, retrieval semantically similar code",
+    "CodeTransOceanDL-query": "Given a piece for code, retrieval semantically similar code",
+    "CosQA-query": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "StackOverflowQA-query": "Given a question about coding, retrieval code or passage that can solve user's question",
+    "SyntheticText2SQL-query": "Given a user's question, retrieve SQL queries that are appropriate responses to the question",
+    "BulgarianStoreReviewSentimentClassfication": "Classify user reviews into positive or negative sentiment",
+    "CzechProductReviewSentimentClassification": "Classify product reviews into positive or negative sentiment",
+    "GreekLegalCodeClassification": "Given a greek legal text, classify its topic",
+    "DBpediaClassification": "Given a Wikipedia articles, categorized it into classes based on its DBpedia ontology",
+    "FinancialPhrasebankClassification": "Given financial news, categorized by sentiment into positive, negative, or neutral",
+    "PoemSentimentClassification": "Gvien a poem, categorized by sentiment into positive, no_impact, negative or mixed",
+    "TweetTopicSingleClassification": "Gvien a twitter, classify its topic",
+    "EstonianValenceClassification": "Given a news article, categorized by sentiment into negatiivne, positiivne, neutraalne or vastuolulin",
+    "FilipinoShopeeReviewsClassification": "Given a shop review, classify its rating on a scale from 1 to 5",
+    "GujaratiNewsClassification": "Given a Gujarati news articles, classify ist topic",
+    "SentimentAnalysisHindi": "Given a hindi text, categorized by sentiment into positive, negative or neutral",
+    "IndonesianIdClickbaitClassification": "Given an Indonesian news headlines, classify its into clickbait or non-clickbait",
+    "ItaCaseholdClassification": "Given a judgments, classify its topic",
+    "KorSarcasmClassification": "Given a twitter, categorized it into sarcasm or not_sarcasm",
+    "KurdishSentimentClassification": "Given a text, categorized by sentiment into positive or negative",
+    "MacedonianTweetSentimentClassification": "Given a Macedonian tweet, categorized by sentiment into positive, negative, or neutral",
+    "AfriSentiClassification": "Given a text, categorized by sentiment into positive, negative, or neutral",
+    "CataloniaTweetClassification": "Given a tweet, categorized by sentiment into AGAINST, FAVOR or NEUTRAL",
+    "CyrillicTurkicLangClassification": "Given a text, classify its language",
+    "IndicLangClassification": "Given a text, classify its language",
+    "MultiHateClassification": "Given a text, categorized by sentiment into hate or non-hate",
+    "NusaParagraphEmotionClassification": "Given a paragraph, classify its emotion",
+    "NusaX-senti": "Given a text, categorized by sentiment into positive or negative",
+    "SwissJudgementClassification": "Given a news article, categorized it into approval or dismissal",
+    "NepaliNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
+    "OdiaNewsClassification": "Given a news article, categorized it into business, entertainment or sports",
+    "PunjabiNewsClassification": "Given a news article, categorized it into two-classes",
+    "SinhalaNewsClassification": "Given a news article, categorized it into political, business, technology, sports and Entertainment",
+    "CSFDSKMovieReviewSentimentClassification": "Given a movie review, classify its rating on a scale from 0 to 5",
+    "SiswatiNewsClassification": "Given a news article, classify its topic",
+    "SlovakMovieReviewSentimentClassification": "Given a movie review, categorized it into positive or negative",
+    "SwahiliNewsClassification": "Given a news article, classify its domain",
+    "TswanaNewsClassification": "Given a news article, classify its topic",
+    "IsiZuluNewsClassification": "Given a news article, classify its topic",
+    "WikiCitiesClustering": "Identify of Wikipedia articles of cities by country",
+    "RomaniBibleClustering": "Identify verses from the Bible in Kalderash Romani by book.",
+    "ArXivHierarchicalClusteringP2P": "Identify the main and secondary category of Arxiv papers based on the titles and abstracts",
+    "ArXivHierarchicalClusteringS2S": "Identify the main and secondary category of Arxiv papers based on the titles",
+    "BigPatentClustering.v2": "Identify the category of documents from the Big Patent dataset",
+    "AlloProfClusteringS2S.v2": "Identify the topic of document titles from Allo Prof dataset",
+    "HALClusteringS2S.v2": "Identify the topic of titles from HAL",
+    "SIB200ClusteringS2S": "Identify the category of documents",
+    "WikiClusteringP2P.v2": "Identify the category of wiki passages",
+    "PlscClusteringP2P.v2": "Identify the category of titles+abstracts from Library of Science",
+    "KorHateSpeechMLClassification": "Given a Korean online news comments, classify its fine-grained hate speech classes",
+    "MalteseNewsClassification": "Given a maltese new, classify its topic",
+    "MultiEURLEXMultilabelClassification": "Given a text, classify its topic",
+    "BrazilianToxicTweetsClassification": "Given a tweet, classify its topic",
+    "AILAStatutes-query": "Identifying the most relevant statutes for a given situation",
+    "HagridRetrieval-query": "Retrieval the relevant passage for the given query",
+    "LegalBenchCorporateLobbying-query": "Retrieval the relevant passage for the given query",
+    "LEMBPasskeyRetrieval-query": "Retrieval the relevant passage for the given query",
+    "BelebeleRetrieval-query": "Retrieval the relevant passage for the given query",
+    "MLQARetrieval-query": "Retrieval the relevant passage for the given query",
+    "StatcanDialogueDatasetRetrieval-query": "Retrieval the relevant passage for the given query",
+    "WikipediaRetrievalMultilingual-query": "Retrieval the relevant passage for the given query",
+    "Core17InstructionRetrieval-query": "Retrieval the relevant passage for the given query",
+    "News21InstructionRetrieval-query": "Retrieval the relevant passage for the given query",
+    "Robust04InstructionRetrieval-query": "Retrieval the relevant passage for the given query",
+    "WebLINXCandidatesReranking-query": "Retrieval the relevant passage for the given query",
+    "WikipediaRerankingMultilingual-query": "Retrieval the relevant passage for the given query",
+    "MIRACLRetrievalHardNegatives-query": "Retrieval relevant passage for the given query",
+    "CQADupstackRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGamingRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackGamingRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackUnixRetrieval-query": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+    "CQADupstackUnixRetrieval-document": "Given a question, retrieve detailed question descriptions from Stackexchange that are duplicates to the given question",
+}
+
+KaLM_INSTRUCTION = "Instruct: {instruction} \n Query: "
+
+HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1 = ModelMeta(
+    loader=partial(  # type: ignore
+        KALMWrapper,
+        model_name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
+        revision="45e42c89990c40aca042659133fc8b13c28634b5",
+        instruction_template=KaLM_INSTRUCTION,
+        max_seq_length=512,
+        apply_instruction_to_passages=False,
+        prompts_dict=KaLM_task_prompts,
+    ),
+    name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
+    revision="45e42c89990c40aca042659133fc8b13c28634b5",
+    release_date="2024-10-23",
+    languages=["eng-Latn", "zho-Hans"],
+    n_parameters=494032768,
+    memory_usage_mb=1885,
+    max_tokens=512,
+    embed_dim=896,
+    license="mit",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["PyTorch", "Sentence Transformers"],
+    reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
+    similarity_fn_name="cosine",
+    use_instructions=True,
+    training_datasets=kalm_training_data,  # Replace with actual dataset if available
+    adapted_from="Qwen/Qwen2-0.5B",
+    superseded_by=None,
+)
+
+HIT_TMG__KaLM_embedding_multilingual_mini_v1 = ModelMeta(
+    name="HIT-TMG/KaLM-embedding-multilingual-mini-v1",
+    revision="8a82a0cd2b322b91723e252486f7cce6fd8ac9d3",
+    release_date="2024-08-27",
+    languages=["eng-Latn", "zho-Hans"],
+    n_parameters=494032768,
+    memory_usage_mb=1885,
+    max_tokens=512,
+    embed_dim=896,
+    license="mit",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["PyTorch"],
+    reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-v1",
+    similarity_fn_name="cosine",
+    use_instructions=None,
+    training_datasets=kalm_training_data,
+    adapted_from="Qwen/Qwen2-0.5B",
+    superseded_by=None,
+)
+
+HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v1_5 = ModelMeta(
+    loader=partial(  # type: ignore
+        KALMWrapper,
+        model_name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
+        revision="fcff2f8a54e4cd96b7766fef1ee960a43d42bb3c",
+        instruction_template=KaLM_INSTRUCTION,
+        max_seq_length=512,
+        apply_instruction_to_passages=False,
+        prompts_dict=KaLM_task_prompts,
+    ),
+    name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
+    revision="fcff2f8a54e4cd96b7766fef1ee960a43d42bb3c",
+    release_date="2024-12-26",
+    languages=["eng-Latn", "zho-Hans"],
+    n_parameters=494032768,
+    memory_usage_mb=1885,
+    max_tokens=512,
+    embed_dim=896,
+    license="mit",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["PyTorch", "Sentence Transformers"],
+    reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
+    similarity_fn_name="cosine",
+    use_instructions=True,
+    training_datasets=kalm_training_data,
+    adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1",
+    superseded_by=None,
+)
+
+HIT_TMG__KaLM_embedding_multilingual_mini_instruct_v2 = ModelMeta(
+    loader=partial(  # type: ignore
+        InstructSentenceTransformerWrapper,
+        model_name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
+        revision="d2a21c232dc712ae8230af56d1027cf21b7864bf",
+        instruction_template=KaLM_INSTRUCTION,
+        max_seq_length=512,
+        apply_instruction_to_passages=False,
+        prompts_dict=KaLM_v2_task_prompts,
+    ),
+    name="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
+    revision="d2a21c232dc712ae8230af56d1027cf21b7864bf",
+    release_date="2025-06-25",
+    languages=["eng-Latn", "zho-Hans"],
+    n_parameters=494032768,
+    memory_usage_mb=942,
+    max_tokens=512,
+    embed_dim=896,
+    license="mit",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data=None,
+    framework=["PyTorch", "Sentence Transformers"],
+    reference="https://huggingface.co/HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
+    similarity_fn_name="cosine",
+    use_instructions=True,
+    training_datasets=kalm_v2_training_data,
+    adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v1.5",
+    superseded_by=None,
+)
+
+KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
+    loader=partial(
+        InstructSentenceTransformerWrapper,
+        model_name="KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+        revision="6a4cfc1084cb459ebd4729b53a8656a61448c720",
+        instruction_template=KaLM_INSTRUCTION,
+        max_seq_length=512,
+        apply_instruction_to_passages=False,
+        prompts_dict=KaLM_v2_task_prompts,
+    ),
+    name="KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+    revision="6a4cfc1084cb459ebd4729b53a8656a61448c720",
+    release_date="2025-09-30",
+    languages=["eng-Latn", "zho-Hans"],
+    n_parameters=494032768,
+    memory_usage_mb=1885,
+    max_tokens=512,
+    embed_dim=896,
+    license="apache-2.0",
+    open_weights=True,
+    public_training_code=None,
+    public_training_data="https://huggingface.co/datasets/KaLM-Embedding/KaLM-embedding-finetuning-data",
+    framework=["PyTorch", "Sentence Transformers"],
+    reference="https://huggingface.co/KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+    similarity_fn_name="cosine",
+    use_instructions=True,
+    training_datasets=kalm_v2_training_data,
+    adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
+    superseded_by=None,
+)
+
+
+# KaLM_Embedding_X_0605 = ModelMeta(
+#     loader=partial(
+#         KALMWrapper,
+#         model_name="KaLM-Team/KaLM-Embedding-X-0605",
+#         revision="1",
+#         instruction_template=KaLM_INSTRUCTION,
+#         max_seq_length=512,
+#         apply_instruction_to_passages=True,
+#         prompts_dict=KaLM_X_task_prompts,
+#     ),
+#     name="KaLM-Team/KaLM-Embedding-X-0605",
+#     revision="1",
+#     languages=None,
+#     open_weights=False,
+#     release_date="2025-06-05",
+#     n_parameters=9.24 * 1e9,
+#     memory_usage_mb=35254,
+#     max_tokens=8192,
+#     embed_dim=3584,
+#     license=None,
+#     reference="https://github.com/KaLM-Team/KaLM-Embedding-X",
+#     similarity_fn_name="cosine",
+#     framework=["Sentence Transformers", "PyTorch"],
+#     use_instructions=True,
+#     public_training_code="https://github.com/HITsz-TMG/KaLM-Embedding",
+#     public_training_data=None,
+#     training_datasets=kalm_training_data,
+# )