Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/benchmarks/benchmarks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
JINA_VDR,
JMTEB_LITE_V1,
JMTEB_V2,
KOVIDORE_V2,
LONG_EMBED,
MIEB_ENG,
MIEB_IMG,
Expand Down Expand Up @@ -79,6 +80,7 @@
"JINA_VDR",
"JMTEB_LITE_V1",
"JMTEB_V2",
"KOVIDORE_V2",
"LONG_EMBED",
"MIEB_ENG",
"MIEB_IMG",
Expand Down
24 changes: 24 additions & 0 deletions mteb/benchmarks/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2728,3 +2728,27 @@
""",
contacts=["lsz05"],
)

KOVIDORE_V2 = Benchmark(
name="KoViDoRe(v2)",
display_name="KoViDoRe v2",
tasks=get_tasks(
tasks=[
"KoVidore2CybersecurityRetrieval",
"KoVidore2EconomicRetrieval",
"KoVidore2EnergyRetrieval",
"KoVidore2HrRetrieval",
]
),
description="KoViDoRe v2 sets a new industry gold standard for multi-modal, enterprise document visual retrieval evaluation. It addresses a critical challenge in production RAG systems: retrieving accurate information from complex, visually-rich documents.",
reference="https://github.com/whybe-choi/kovidore-data-generator",
citation=r"""
@misc{choi2026kovidorev2,
author = {Yongbin Choi},
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
url = {https://github.com/whybe-choi/kovidore-data-generator},
year = {2026},
}
""",
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"test": {
"num_samples": 1299,
"number_of_characters": 9254,
"documents_text_statistics": null,
"documents_image_statistics": {
"min_image_width": 2245,
"average_image_width": 2370.324347826087,
"max_image_width": 3508,
"min_image_height": 2481,
"average_image_height": 3289.8060869565215,
"max_image_height": 3580,
"unique_images": 1132
},
"queries_text_statistics": {
"total_text_length": 9254,
"min_text_length": 15,
"average_text_length": 62.10738255033557,
"max_text_length": 108,
"unique_texts": 149
},
"queries_image_statistics": null,
"relevant_docs_statistics": {
"num_relevant_docs": 409,
"min_relevant_docs_per_query": 1,
"average_relevant_docs_per_query": 2.7449664429530203,
"max_relevant_docs_per_query": 7,
"unique_relevant_docs": 316
},
"top_ranked_statistics": null
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"test": {
"num_samples": 1640,
"number_of_characters": 8331,
"documents_text_statistics": null,
"documents_image_statistics": {
"min_image_width": 2313,
"average_image_width": 2347.5321597833445,
"max_image_width": 2481,
"min_image_height": 3138,
"average_image_height": 3214.301963439404,
"max_image_height": 3508,
"unique_images": 1442
},
"queries_text_statistics": {
"total_text_length": 8331,
"min_text_length": 23,
"average_text_length": 51.11042944785276,
"max_text_length": 110,
"unique_texts": 163
},
"queries_image_statistics": null,
"relevant_docs_statistics": {
"num_relevant_docs": 413,
"min_relevant_docs_per_query": 1,
"average_relevant_docs_per_query": 2.5337423312883436,
"max_relevant_docs_per_query": 6,
"unique_relevant_docs": 349
},
"top_ranked_statistics": null
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"test": {
"num_samples": 2101,
"number_of_characters": 10417,
"documents_text_statistics": null,
"documents_image_statistics": {
"min_image_width": 2221,
"average_image_width": 2305.0387231815803,
"max_image_width": 2480,
"min_image_height": 3036,
"average_image_height": 3186.1962323390894,
"max_image_height": 3508,
"unique_images": 1900
},
"queries_text_statistics": {
"total_text_length": 10417,
"min_text_length": 22,
"average_text_length": 54.82631578947368,
"max_text_length": 103,
"unique_texts": 189
},
"queries_image_statistics": null,
"relevant_docs_statistics": {
"num_relevant_docs": 571,
"min_relevant_docs_per_query": 1,
"average_relevant_docs_per_query": 3.0052631578947366,
"max_relevant_docs_per_query": 7,
"unique_relevant_docs": 464
},
"top_ranked_statistics": null
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"test": {
"num_samples": 2330,
"number_of_characters": 13131,
"documents_text_statistics": null,
"documents_image_statistics": {
"min_image_width": 1949,
"average_image_width": 2430.1152204836417,
"max_image_width": 3505,
"min_image_height": 2480,
"average_image_height": 3350.3921289710765,
"max_image_height": 3626,
"unique_images": 2096
},
"queries_text_statistics": {
"total_text_length": 13131,
"min_text_length": 21,
"average_text_length": 59.41628959276018,
"max_text_length": 112,
"unique_texts": 221
},
"queries_image_statistics": null,
"relevant_docs_statistics": {
"num_relevant_docs": 726,
"min_relevant_docs_per_query": 1,
"average_relevant_docs_per_query": 3.2850678733031673,
"max_relevant_docs_per_query": 7,
"unique_relevant_docs": 575
},
"top_ranked_statistics": null
}
}
16 changes: 15 additions & 1 deletion mteb/tasks/retrieval/kor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
from .auto_rag_retrieval import AutoRAGRetrieval
from .ko_strategy_qa import KoStrategyQA
from .kovidore2_bench_retrieval import (
KoVidore2CybersecurityRetrieval,
KoVidore2EconomicRetrieval,
KoVidore2EnergyRetrieval,
KoVidore2HrRetrieval,
)
from .squad_kor_v1_retrieval import SQuADKorV1Retrieval

__all__ = ["AutoRAGRetrieval", "KoStrategyQA", "SQuADKorV1Retrieval"]
__all__ = [
"AutoRAGRetrieval",
"KoStrategyQA",
"KoVidore2CybersecurityRetrieval",
"KoVidore2EconomicRetrieval",
"KoVidore2EnergyRetrieval",
"KoVidore2HrRetrieval",
"SQuADKorV1Retrieval",
]
142 changes: 142 additions & 0 deletions mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from mteb.abstasks.retrieval import AbsTaskRetrieval
from mteb.abstasks.task_metadata import TaskMetadata


class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="KoVidore2CybersecurityRetrieval",
description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
reference="https://github.com/whybe-choi/kovidore-data-generator",
dataset={
"path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
"revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
},
type="DocumentUnderstanding",
category="t2i",
eval_splits=["test"],
eval_langs=["kor-Hang"],
main_score="ndcg_at_10",
date=("2025-12-21", "2026-01-06"),
domains=["Social"],
task_subtypes=["Image Text Retrieval"],
license="cc-by-4.0",
annotations_creators="derived",
dialect=[],
modalities=["text", "image"],
sample_creation="created",
bibtex_citation="""
@misc{choi2026kovidorev2,
author = {Yongbin Choi},
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
url = {https://github.com/whybe-choi/kovidore-data-generator},
year = {2026},
}
""",
prompt={"query": "Find a screenshot that is relevant to the user's question."},
)


class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="KoVidore2EconomicRetrieval",
description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
reference="https://github.com/whybe-choi/kovidore-data-generator",
dataset={
"path": "whybe-choi/kovidore-v2-economic-mteb",
"revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
},
type="DocumentUnderstanding",
category="t2i",
eval_splits=["test"],
eval_langs=["kor-Hang"],
main_score="ndcg_at_10",
date=("2025-12-21", "2026-01-06"),
domains=["Social"],
task_subtypes=["Image Text Retrieval"],
license="cc-by-4.0",
annotations_creators="derived",
dialect=[],
modalities=["text", "image"],
sample_creation="created",
bibtex_citation="""
@misc{choi2026kovidorev2,
author = {Yongbin Choi},
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
url = {https://github.com/whybe-choi/kovidore-data-generator},
year = {2026},
}
""",
prompt={"query": "Find a screenshot that is relevant to the user's question."},
)


class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="KoVidore2EnergyRetrieval",
description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
reference="https://github.com/whybe-choi/kovidore-data-generator",
dataset={
"path": "whybe-choi/kovidore-v2-energy-mteb",
"revision": "f967fa70b5cf287d6d39ec16520786cb78e971a4",
},
type="DocumentUnderstanding",
category="t2i",
eval_splits=["test"],
eval_langs=["kor-Hang"],
main_score="ndcg_at_10",
date=("2025-12-21", "2026-01-06"),
domains=["Social"],
task_subtypes=["Image Text Retrieval"],
license="cc-by-4.0",
annotations_creators="derived",
dialect=[],
modalities=["text", "image"],
sample_creation="created",
bibtex_citation="""
@misc{choi2026kovidorev2,
author = {Yongbin Choi},
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
url = {https://github.com/whybe-choi/kovidore-data-generator},
year = {2026},
}
""",
prompt={"query": "Find a screenshot that is relevant to the user's question."},
)


class KoVidore2HrRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="KoVidore2HrRetrieval",
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
reference="https://github.com/whybe-choi/kovidore-data-generator",
dataset={
"path": "whybe-choi/kovidore-v2-hr-mteb",
"revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
},
type="DocumentUnderstanding",
category="t2i",
eval_splits=["test"],
eval_langs=["kor-Hang"],
main_score="ndcg_at_10",
date=("2025-12-21", "2026-01-06"),
domains=["Social"],
task_subtypes=["Image Text Retrieval"],
license="cc-by-4.0",
annotations_creators="derived",
dialect=[],
modalities=["text", "image"],
sample_creation="created",
bibtex_citation="""
@misc{choi2026kovidorev2,
author = {Yongbin Choi},
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
url = {https://github.com/whybe-choi/kovidore-data-generator},
year = {2026},
}
""",
prompt={"query": "Find a screenshot that is relevant to the user's question."},
)