Skip to content
11 changes: 11 additions & 0 deletions mteb/descriptive_stats/Retrieval/AutoRAGRetrieval.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"test": {
"number_of_characters": 894.2168128654971,
"num_samples": 834,
"num_queries": 114,
"num_documents": 720,
"average_document_length": 1.1452816358024691,
"average_query_length": 0.610649430594029,
"average_relevant_docs_per_query": 1.0
}
}
1 change: 1 addition & 0 deletions mteb/tasks/Retrieval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
from .jpn.NLPJournalTitleAbsRetrieval import *
from .jpn.NLPJournalTitleIntroRetrieval import *
from .kat.GeorgianFAQRetrieval import *
from .kor.AutoRAGRetrieval import *
from .kor.KoStrategyQA import *
from .multilingual.BelebeleRetrieval import *
from .multilingual.CrossLingualSemanticDiscriminationWMT19 import *
Expand Down
40 changes: 40 additions & 0 deletions mteb/tasks/Retrieval/kor/AutoRAGRetrieval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import annotations

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class AutoRAGRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="AutoRAGRetrieval",
description="This dataset enables the evaluation of Korean RAG performance across various domains—finance, public sector, healthcare, legal, and commerce—by providing publicly accessible documents, questions, and answers.",
reference="https://arxiv.org/abs/2410.20878",
dataset={
"path": "yjoonjang/markers_bm",
"revision": "fd7df84ac089bbec763b1c6bb1b56e985df5cc5c",
},
type="Retrieval",
prompt="Retrieve text based on user query.",
category="s2p",
modalities=["text"],
eval_splits=["test"],
eval_langs=["kor-Hang"],
main_score="ndcg_at_10",
date=("2024-08-03", "2024-08-03"),
domains=["Government", "Medical", "Legal", "Social"],
task_subtypes=["Article retrieval"],
license="mit",
annotations_creators="human-annotated",
dialect=[],
sample_creation="created",
bibtex_citation="""@misc{kim2024autoragautomatedframeworkoptimization,
title={AutoRAG: Automated Framework for optimization of Retrieval Augmented Generation Pipeline},
author={Dongkyu Kim and Byoungwook Kim and Donggeon Han and Matouš Eibich},
year={2024},
eprint={2410.20878},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2410.20878},
}""",
)