Skip to content

Commit

Permalink
Improve documentation to resolve #1
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks authored May 2, 2023
1 parent 8bf8fb4 commit 22e3e3a
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions ir-datasets/tutorial/pangrams.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""This python file registers a new ir_datasets class 'pangrams'.
You can find the ir_datasets documentation here: https://github.com/allenai/ir_datasets/.
This file is intended to work inside the Docker image produced during this tutorial (the Dockerfile copies it and the other files loaded below to the correct locations).
"""
import ir_datasets
from ir_datasets.formats import JsonlDocs, TrecXmlQueries, TrecQrels
from typing import NamedTuple, Dict
from ir_datasets.util.download import RequestsDownload
from ir_datasets.datasets.base import Dataset

DATASET_URL = 'https://raw.githubusercontent.com/tira-io/ir-experiment-platform/main/ir-datasets/tutorial/'

class PangramDocument(NamedTuple):
doc_id: str
text: str
Expand All @@ -15,8 +16,7 @@ def default_text(self):
return self.text

ir_datasets.registry.register('pangrams', Dataset(
JsonlDocs(ir_datasets.util.Download([RequestsDownload(DATASET_URL + 'pangram-documents.jsonl')], expected_md5='3f67adc5d99a7b6b7a410d4aefc8fe3b'), doc_cls=PangramDocument, lang='en'),
TrecXmlQueries(ir_datasets.util.Download([RequestsDownload(DATASET_URL + 'pangram-topics.xml')], expected_md5='411647769eabf8dbcaac85cdb734c50d'), lang='en'),
TrecQrels(ir_datasets.util.Download([RequestsDownload(DATASET_URL + 'pangram-qrels.txt')], expected_md5='2ef82edf2e8c1f6724e92d9f422b3f5f'), {0: 'Not Relevant', 1: 'Relevant'})
JsonlDocs(ir_datasets.util.PackageDataFile(path='datasets_in_progress/pangram-documents.jsonl'), doc_cls=PangramDocument, lang='en'),
TrecXmlQueries(ir_datasets.util.PackageDataFile(path='datasets_in_progress/pangram-topics.xml'), lang='en'),
TrecQrels(ir_datasets.util.PackageDataFile(path='datasets_in_progress/pangram-qrels.txt'), {0: 'Not Relevant', 1: 'Relevant'})
))

0 comments on commit 22e3e3a

Please sign in to comment.