diff --git a/mteb/benchmarks/benchmarks/__init__.py b/mteb/benchmarks/benchmarks/__init__.py index 3988cd972a..862ecaa66d 100644 --- a/mteb/benchmarks/benchmarks/__init__.py +++ b/mteb/benchmarks/benchmarks/__init__.py @@ -14,6 +14,7 @@ JINA_VDR, JMTEB_LITE_V1, JMTEB_V2, + KOVIDORE_V2, LONG_EMBED, MIEB_ENG, MIEB_IMG, @@ -79,6 +80,7 @@ "JINA_VDR", "JMTEB_LITE_V1", "JMTEB_V2", + "KOVIDORE_V2", "LONG_EMBED", "MIEB_ENG", "MIEB_IMG", diff --git a/mteb/benchmarks/benchmarks/benchmarks.py b/mteb/benchmarks/benchmarks/benchmarks.py index 16a84b3893..6b06e44122 100644 --- a/mteb/benchmarks/benchmarks/benchmarks.py +++ b/mteb/benchmarks/benchmarks/benchmarks.py @@ -2728,3 +2728,27 @@ """, contacts=["lsz05"], ) + +KOVIDORE_V2 = Benchmark( + name="KoViDoRe(v2)", + display_name="KoViDoRe v2", + tasks=get_tasks( + tasks=[ + "KoVidore2CybersecurityRetrieval", + "KoVidore2EconomicRetrieval", + "KoVidore2EnergyRetrieval", + "KoVidore2HrRetrieval", + ] + ), + description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.", + reference="https://github.com/whybe-choi/kovidore-data-generator", + citation=r""" +@misc{choi2026kovidorev2, + author = {Yongbin Choi}, + note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains}, + title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases}, + url = {https://github.com/whybe-choi/kovidore-data-generator}, + year = {2026}, +} +""", +) diff --git a/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json new file mode 100644 index 0000000000..c175631c22 --- /dev/null +++ b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json @@ -0,0 +1,32 @@ +{ + "test": { + "num_samples": 1299, + "number_of_characters": 9254, + "documents_text_statistics": null, + "documents_image_statistics": { + "min_image_width": 2245, + "average_image_width": 2370.324347826087, + "max_image_width": 3508, + "min_image_height": 2481, + "average_image_height": 3289.8060869565215, + "max_image_height": 3580, + "unique_images": 1132 + }, + "queries_text_statistics": { + "total_text_length": 9254, + "min_text_length": 15, + "average_text_length": 62.10738255033557, + "max_text_length": 108, + "unique_texts": 149 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 409, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.7449664429530203, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 316 + }, + "top_ranked_statistics": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json new file mode 100644 index 0000000000..1e64ebda1e --- /dev/null +++ b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json @@ -0,0 +1,32 @@ +{ + "test": { + "num_samples": 1640, + "number_of_characters": 8331, + "documents_text_statistics": null, + "documents_image_statistics": { + "min_image_width": 2313, + "average_image_width": 2347.5321597833445, + "max_image_width": 2481, + "min_image_height": 3138, + "average_image_height": 3214.301963439404, + "max_image_height": 3508, + "unique_images": 1442 + }, + "queries_text_statistics": { + "total_text_length": 8331, + "min_text_length": 23, + "average_text_length": 51.11042944785276, + "max_text_length": 110, + "unique_texts": 163 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 413, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 2.5337423312883436, + "max_relevant_docs_per_query": 6, + "unique_relevant_docs": 349 + }, + "top_ranked_statistics": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json new file mode 100644 index 0000000000..3e35d9ce02 --- /dev/null +++ b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json @@ -0,0 +1,32 @@ +{ + "test": { + "num_samples": 2101, + "number_of_characters": 10417, + "documents_text_statistics": null, + "documents_image_statistics": { + "min_image_width": 2221, + "average_image_width": 2305.0387231815803, + "max_image_width": 2480, + "min_image_height": 3036, + "average_image_height": 3186.1962323390894, + "max_image_height": 3508, + "unique_images": 1900 + }, + "queries_text_statistics": { + "total_text_length": 10417, + "min_text_length": 22, + "average_text_length": 54.82631578947368, + "max_text_length": 103, + "unique_texts": 189 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 571, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.0052631578947366, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 464 + }, + "top_ranked_statistics": null + } +} \ No newline at end of file diff --git a/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json new file mode 100644 index 0000000000..c837283bfb --- /dev/null +++ b/mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json @@ -0,0 +1,32 @@ +{ + "test": { + "num_samples": 2330, + "number_of_characters": 13131, + "documents_text_statistics": null, + "documents_image_statistics": { + "min_image_width": 1949, + "average_image_width": 2430.1152204836417, + "max_image_width": 3505, + "min_image_height": 2480, + "average_image_height": 3350.3921289710765, + "max_image_height": 3626, + "unique_images": 2096 + }, + "queries_text_statistics": { + "total_text_length": 13131, + "min_text_length": 21, + "average_text_length": 59.41628959276018, + "max_text_length": 112, + "unique_texts": 221 + }, + "queries_image_statistics": null, + "relevant_docs_statistics": { + "num_relevant_docs": 726, + "min_relevant_docs_per_query": 1, + "average_relevant_docs_per_query": 3.2850678733031673, + "max_relevant_docs_per_query": 7, + "unique_relevant_docs": 575 + }, + "top_ranked_statistics": null + } +} \ No newline at end of file diff --git a/mteb/tasks/retrieval/kor/__init__.py b/mteb/tasks/retrieval/kor/__init__.py index 9f680addc0..ae6254e752 100644 --- a/mteb/tasks/retrieval/kor/__init__.py +++ b/mteb/tasks/retrieval/kor/__init__.py @@ -1,5 +1,19 @@ from .auto_rag_retrieval import AutoRAGRetrieval from .ko_strategy_qa import KoStrategyQA +from .kovidore2_bench_retrieval import ( + KoVidore2CybersecurityRetrieval, + KoVidore2EconomicRetrieval, + KoVidore2EnergyRetrieval, + KoVidore2HrRetrieval, +) from .squad_kor_v1_retrieval import SQuADKorV1Retrieval -__all__ = ["AutoRAGRetrieval", "KoStrategyQA", "SQuADKorV1Retrieval"] +__all__ = [ + "AutoRAGRetrieval", + "KoStrategyQA", + "KoVidore2CybersecurityRetrieval", + "KoVidore2EconomicRetrieval", + "KoVidore2EnergyRetrieval", + "KoVidore2HrRetrieval", + "SQuADKorV1Retrieval", +] diff --git a/mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py b/mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py new file mode 100644 index 0000000000..1e904a92bf --- /dev/null +++ b/mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py @@ -0,0 +1,142 @@ +from mteb.abstasks.retrieval import AbsTaskRetrieval +from mteb.abstasks.task_metadata import TaskMetadata + + +class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="KoVidore2CybersecurityRetrieval", + description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.", + reference="https://github.com/whybe-choi/kovidore-data-generator", + dataset={ + "path": "whybe-choi/kovidore-v2-cybersecurity-mteb", + "revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956", + }, + type="DocumentUnderstanding", + category="t2i", + eval_splits=["test"], + eval_langs=["kor-Hang"], + main_score="ndcg_at_10", + date=("2025-12-21", "2026-01-06"), + domains=["Social"], + task_subtypes=["Image Text Retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation=""" +@misc{choi2026kovidorev2, + author = {Yongbin Choi}, + note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains}, + title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases}, + url = {https://github.com/whybe-choi/kovidore-data-generator}, + year = {2026}, +} +""", + prompt={"query": "Find a screenshot that is relevant to the user's question."}, + ) + + +class KoVidore2EconomicRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="KoVidore2EconomicRetrieval", + description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.", + reference="https://github.com/whybe-choi/kovidore-data-generator", + dataset={ + "path": "whybe-choi/kovidore-v2-economic-mteb", + "revision": "0189c26211290a902cd9d41a0db932808a54c0a8", + }, + type="DocumentUnderstanding", + category="t2i", + eval_splits=["test"], + eval_langs=["kor-Hang"], + main_score="ndcg_at_10", + date=("2025-12-21", "2026-01-06"), + domains=["Social"], + task_subtypes=["Image Text Retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation=""" +@misc{choi2026kovidorev2, + author = {Yongbin Choi}, + note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains}, + title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases}, + url = {https://github.com/whybe-choi/kovidore-data-generator}, + year = {2026}, +} +""", + prompt={"query": "Find a screenshot that is relevant to the user's question."}, + ) + + +class KoVidore2EnergyRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="KoVidore2EnergyRetrieval", + description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.", + reference="https://github.com/whybe-choi/kovidore-data-generator", + dataset={ + "path": "whybe-choi/kovidore-v2-energy-mteb", + "revision": "f967fa70b5cf287d6d39ec16520786cb78e971a4", + }, + type="DocumentUnderstanding", + category="t2i", + eval_splits=["test"], + eval_langs=["kor-Hang"], + main_score="ndcg_at_10", + date=("2025-12-21", "2026-01-06"), + domains=["Social"], + task_subtypes=["Image Text Retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation=""" +@misc{choi2026kovidorev2, + author = {Yongbin Choi}, + note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains}, + title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases}, + url = {https://github.com/whybe-choi/kovidore-data-generator}, + year = {2026}, +} +""", + prompt={"query": "Find a screenshot that is relevant to the user's question."}, + ) + + +class KoVidore2HrRetrieval(AbsTaskRetrieval): + metadata = TaskMetadata( + name="KoVidore2HrRetrieval", + description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.", + reference="https://github.com/whybe-choi/kovidore-data-generator", + dataset={ + "path": "whybe-choi/kovidore-v2-hr-mteb", + "revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99", + }, + type="DocumentUnderstanding", + category="t2i", + eval_splits=["test"], + eval_langs=["kor-Hang"], + main_score="ndcg_at_10", + date=("2025-12-21", "2026-01-06"), + domains=["Social"], + task_subtypes=["Image Text Retrieval"], + license="cc-by-4.0", + annotations_creators="derived", + dialect=[], + modalities=["text", "image"], + sample_creation="created", + bibtex_citation=""" +@misc{choi2026kovidorev2, + author = {Yongbin Choi}, + note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains}, + title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases}, + url = {https://github.com/whybe-choi/kovidore-data-generator}, + year = {2026}, +} +""", + prompt={"query": "Find a screenshot that is relevant to the user's question."}, + )