diff --git a/docs/create_tasks_table.py b/docs/create_tasks_table.py index 33dca958cb..f7e79331ec 100644 --- a/docs/create_tasks_table.py +++ b/docs/create_tasks_table.py @@ -106,13 +106,14 @@ def create_task_lang_table(tasks: list[mteb.AbsTask], sort_by_sum=False) -> str: df = df.sort(by="sum", descending=True) total = df.sum() - - task_names_md = " | ".join(sorted(get_args(TASK_TYPE))) - horizontal_line_md = "---|---" * (len(sorted(get_args(TASK_TYPE))) + 1) - table = f""" -| ISO Code | Language | Family | {task_names_md} | Sum | -|{horizontal_line_md}| -""" + task_names = sorted(get_args(TASK_TYPE)) + headers = ["ISO Code", "Language", "Family"] + task_names + ["Sum"] + table_header = "| " + " | ".join(headers) + " |" + separator_line = "|" + for header in headers: + width = len(header) + 2 + separator_line += "-" * width + "|" + table = table_header + "\n" + separator_line + "\n" for row in df.iter_rows(): table += f"| {row[0]} " @@ -138,7 +139,11 @@ def insert_tables( for table, tag in zip(tables, tags): start = f"" end = f"" - md = md.replace(md[md.index(start) + len(start) : md.index(end)], table) + # Ensure a newline after the start tag + md = md.replace( + md[md.index(start) + len(start) : md.index(end)], + f"\n{table}\n", + ) Path(file_path).write_text(md, encoding="utf-8") diff --git a/docs/tasks.md b/docs/tasks.md index 97f75c7d98..a544748912 100644 --- a/docs/tasks.md +++ b/docs/tasks.md @@ -5,6 +5,7 @@ The following tables give you an overview of the tasks in MTEB. + | Name | Languages | Type | Category | Domains | # Samples | Dataset statistics | |------|-----------|------|----------|---------|-----------|--------------------| | [AFQMC](https://aclanthology.org/2021.emnlp-main.357) | ['cmn'] | STS | s2s | | None | None | @@ -893,6 +894,7 @@ The following tables give you an overview of the tasks in MTEB. | [mFollowIRCrossLingualInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['eng', 'fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 75, 'min_instruction_length': 93, 'average_instruction_length': 389.95, 'max_instruction_length': 887, 'unique_instructions': 75, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 450.55, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 80.08, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 150, 'average_instruction_length': 396.88, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 205, 'average_changed_instruction_length': 463.18, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 40, 'min_instruction_length': 93, 'average_instruction_length': 371.12, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 431.8, 'max_changed_instruction_length': 957, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 32, 'average_query_length': 83.56, 'max_query_length': 159, 'unique_queries': 43, 'min_instruction_length': 157, 'average_instruction_length': 401.02, 'max_instruction_length': 731, 'unique_instructions': 43, 'min_changed_instruction_length': 209, 'average_changed_instruction_length': 456.26, 'max_changed_instruction_length': 822, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | | [mFollowIRInstructionRetrieval](https://neuclir.github.io/) (Weller et al., 2024) | ['fas', 'rus', 'zho'] | Retrieval | s2p | [News, Written] | {'test': 121758} | {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 10, 'average_query_length': 57.11, 'max_query_length': 136, 'unique_queries': 123, 'min_instruction_length': 37, 'average_instruction_length': 281.07, 'max_instruction_length': 1009, 'unique_instructions': 123, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 326.94, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 72.65, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 121, 'average_instruction_length': 358.93, 'max_instruction_length': 759, 'unique_instructions': 40, 'min_changed_instruction_length': 163, 'average_changed_instruction_length': 415.32, 'max_changed_instruction_length': 842, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 26, 'average_query_length': 77.5, 'max_query_length': 136, 'unique_queries': 40, 'min_instruction_length': 78, 'average_instruction_length': 387.0, 'max_instruction_length': 1009, 'unique_instructions': 40, 'min_changed_instruction_length': 187, 'average_changed_instruction_length': 458.0, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 10, 'average_query_length': 23.7, 'max_query_length': 44, 'unique_queries': 43, 'min_instruction_length': 37, 'average_instruction_length': 110.09, 'max_instruction_length': 209, 'unique_instructions': 43, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 122.81, 'max_changed_instruction_length': 229, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}} | | [mMARCO-NL](https://github.com/unicamp-dl/mMARCO) (Luiz Bonifacio and Israel Campiotti and Roberto de Alencar Lotufo and Rodrigo Frassetto Nogueira, 2021) | ['nld'] | Retrieval | s2p | [Web, Written] | None | None | + @@ -905,7 +907,7 @@ The following tables give you an overview of the tasks in MTEB. | ISO Code | Language | Family | Any2AnyMultiChoice | Any2AnyMultilingualRetrieval | Any2AnyRetrieval | BitextMining | Classification | Clustering | Compositionality | DocumentUnderstanding | ImageClassification | ImageClustering | ImageMultilabelClassification | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | VisionCentricQA | VisualSTS(eng) | VisualSTS(multi) | ZeroShotClassification | Sum | -|---|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|---| +|----------|----------|--------|--------------------|------------------------------|------------------|--------------|----------------|------------|------------------|-----------------------|---------------------|-----------------|-------------------------------|----------------------|--------------------------|--------------------|-----------|-----------|-----|-------|---------------|-----------------|----------------|------------------|------------------------|-----| | aai | Arifama-Miniafia | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aak | Ankave | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aau | Abau | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | @@ -1959,6 +1961,7 @@ The following tables give you an overview of the tasks in MTEB. | zul | Zulu | Atlantic-Congo | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | zyp | Zyphe Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | Total | None | None | None | 0 | 55 | 49 | 1492 | 836 | 316 | 7 | 10 | 22 | 5 | 0 | 3 | 28 | 91 | 56 | 591 | 88 | 2 | 2 | 6 | 7 | 37 | 24 | +