diff --git a/hazm/dependency_parser.py b/hazm/dependency_parser.py index 572f2a88..8910415b 100644 --- a/hazm/dependency_parser.py +++ b/hazm/dependency_parser.py @@ -16,6 +16,10 @@ from tqdm import tqdm from typing import List, Tuple import os +import spacy +from spacy.tokens import Doc +from tqdm import tqdm +from typing import List, Tuple class MaltParser(NLTKMaltParser): """این کلاس شامل توابعی برای شناسایی وابستگی‌های دستوری است. @@ -133,7 +137,7 @@ def parse_tagged_sents( raise Exception("MaltParser parsing failed: %s" % " ".join(cmd)) return ( - DependencyGraph(item, top_relation_label='root') + DependencyGraph(item,top_relation_label='root') for item in open(output_file.name, encoding="utf8").read().split("\n\n") # noqa: SIM115, PTH123 if item.strip() ) @@ -318,7 +322,7 @@ def parse_sents(self: MaltParser, sentences: str, verbose: bool = False) -> str: for sentence in sentences: self._add_sentence2dict(sentence) - tagged_sentences = self.tagger.tag_sents(sentences,universal_tag=True) + tagged_sentences = self.tagger.tag_sents(sentences) return self.parse_tagged_sents(tagged_sentences, verbose) @@ -332,8 +336,8 @@ def _spacy_to_conll(self,doc): token.i + 1, token.text.replace(" ", "_"), self.lemmatize(token.text, token.pos_).replace(" ", "_"), - token.pos_, - token.pos_, + token.tag_, + token.tag_, "_", head_id, token.dep_, @@ -357,12 +361,13 @@ def parse_tagged_sents(self: "SpacyDependencyParser", sentences: List[List[Tuple for doc_id , doc in enumerate(docs): pos_tags = [tag for w , tag in sentences[doc_id]] for i in range(len(doc)): - docs[doc_id][i].pos_ = pos_tags[i] + docs[doc_id][i].pos_ = pos_tags[i].split(',')[0] + docs[doc_id][i].tag_ = pos_tags[i] conll_sample = self._spacy_to_conll(docs[doc_id]) conll_list.append(conll_sample) return ( - DependencyGraph(item) + DependencyGraph(item, top_relation_label='root') for item in conll_list if item.strip() ) \ No newline at end of file