Skip to content

Commit

Permalink
Add some more French lemmas
Browse files Browse the repository at this point in the history
  • Loading branch information
AngledLuffa committed Jan 11, 2025
1 parent 56fc83e commit 37b0416
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions stanza/utils/datasets/prepare_tokenizer_treebank.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,11 @@ def build_extra_combined_french_dataset(paths, model_type, dataset):
handparsed_sentences = read_sentences_from_conllu(handparsed_path)
print("Loaded %d sentences from %s" % (len(handparsed_sentences), handparsed_path))
sents.extend(handparsed_sentences)

handparsed_path = os.path.join(handparsed_dir, "french-lemmas", "french1st_6thGrade.conllu")
handparsed_sentences = read_sentences_from_conllu(handparsed_path)
print("Loaded %d sentences from %s" % (len(handparsed_sentences), handparsed_path))
sents.extend(handparsed_sentences)
return sents

def build_extra_combined_german_dataset(paths, model_type, dataset):
Expand Down

0 comments on commit 37b0416

Please sign in to comment.