diff --git a/brown_gd_to_conll.py b/brown_gd_to_conll.py index 4d1df97..ea96bd7 100644 --- a/brown_gd_to_conll.py +++ b/brown_gd_to_conll.py @@ -7,8 +7,8 @@ import re import sys from collections import namedtuple -from gdtools.acainn import Lemmatizer -from gdtools.acainn import Features +from gd_tools.acainn import Lemmatizer +from gd_tools.acainn import Features from pyconll.unit import Conll Split = namedtuple("split", "form1 upos1 xpos1 form2 upos2 xpos2") diff --git a/brown_gd_to_dot_ccg.py b/brown_gd_to_dot_ccg.py index 9a8932b..8495860 100644 --- a/brown_gd_to_dot_ccg.py +++ b/brown_gd_to_dot_ccg.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pickle import sys -from gdtools.acainn import Lemmatizer, Retagger, Subcat, Typer +from gd_tools.acainn import Lemmatizer, Retagger, Subcat, Typer def tidy_word(string): """outputs string suitable for XMLification further down the pipeline""" diff --git a/checker.py b/checker.py index 4ff6420..6cb9146 100644 --- a/checker.py +++ b/checker.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from gdtools.acainn import Morphology +from gd_tools.acainn import Morphology class Checker(): # for simple matches diff --git a/fix_feats.py b/fix_feats.py index 90e4918..5eaef7b 100644 --- a/fix_feats.py +++ b/fix_feats.py @@ -1,7 +1,7 @@ import re import sys import pyconll -from gdtools.acainn import Features +from gd_tools.acainn import Features f = Features() diff --git a/lemmatise.py b/lemmatise.py index 56db424..c8f58b3 100644 --- a/lemmatise.py +++ b/lemmatise.py @@ -1,7 +1,7 @@ """Overwrites the lemmata in a CoNLL-U file based on the form and XPOS.""" import sys import pyconll -from gdtools.acainn import Lemmatizer +from gd_tools.acainn import Lemmatizer corpus = pyconll.load_from_file(sys.argv[1]) l = Lemmatizer() diff --git a/readme.md b/readme.md index 64e58f5..572ef16 100644 --- a/readme.md +++ b/readme.md @@ -19,6 +19,9 @@ In practice I have postprocessed the results with the following Python 3 scripts There is one small test tree bank in `ud`: * `gd_iomasgladh-ud-test.conllu` is a hand-built corpus from 2014 which has been converted to UD. +The lemmatiser, code to convert ARCOSG parts of speech to UD features and categorial grammar code are now in the https://github.com/colinbatchelor/gd_tools repository. + + Earlier work -- ### gramaran @@ -37,8 +40,6 @@ Each sentence has three lines beginning with hashes preceding it. These are an I The guidelines used for the construction of the corpus in LaTeX format. Currently no special packages are used for it. - - * `brown_gd_to_dot_ccg.py` takes a Brown-format corpus assuming ARCOSG tags and outputs a .ccg file * `mend_xml.py` fixes the output of OpenCCG's ccg2xml. * `prepareARCOSG.py` takes a local installation of the Annotated Reference Corpus of Scottish Gaelic (ARCOSG), replaces spaces within tokens with underscores and puts the results in `arcosg.pkl`. @@ -61,5 +62,5 @@ The citation for the material in `ccg` and `gramaran` is: Colin Batchelor -2024-02-06 +2024-02-07 diff --git a/test_checker.py b/test_checker.py index 801aeb8..f236552 100644 --- a/test_checker.py +++ b/test_checker.py @@ -1,5 +1,5 @@ import unittest -from gdtools.acainn import Morphology +from gd_tools.acainn import Morphology from checker import Checker import numpy as np