diff --git a/flash/text/question_answering/cli.py b/flash/text/question_answering/cli.py index 0433a068c8..471cf13eca 100644 --- a/flash/text/question_answering/cli.py +++ b/flash/text/question_answering/cli.py @@ -25,12 +25,12 @@ def from_squad( num_workers: int = 0, **preprocess_kwargs, ) -> QuestionAnsweringData: - """Downloads and loads the XSum data set.""" - download_data("https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "./data/") - download_data("https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json", "./data/") + """Downloads and loads a tiny subset of the squad V2 data set.""" + download_data("https://pl-flash-data.s3.amazonaws.com/squad_tiny.zip", "./data/") + return QuestionAnsweringData.from_squad_v2( - train_file="./data/train-v2.0.json", - val_file="./data/dev-v2.0.json", + train_file="./data/squad_tiny/train.json", + val_file="./data/squad_tiny/val.json", backbone=backbone, batch_size=batch_size, num_workers=num_workers, diff --git a/flash_examples/question_answering.py b/flash_examples/question_answering.py index 78e119d108..8620e5aed1 100644 --- a/flash_examples/question_answering.py +++ b/flash_examples/question_answering.py @@ -16,12 +16,11 @@ from flash.text import QuestionAnsweringData, QuestionAnsweringTask # 1. Create the DataModule -download_data("https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json", "./data/") -download_data("https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json", "./data/") +download_data("https://pl-flash-data.s3.amazonaws.com/squad_tiny.zip", "./data/") datamodule = QuestionAnsweringData.from_squad_v2( - train_file="./data/train-v2.0.json", - val_file="./data/dev-v2.0.json", + train_file="./data/squad_tiny/train.json", + val_file="./data/squad_tiny/val.json", ) # 2. Build the task