Skip to content

Commit

Permalink
added test and call for stemmer
Browse files Browse the repository at this point in the history
  • Loading branch information
bdalal authored and miso-belica committed Oct 28, 2019
1 parent b00a177 commit a4dff96
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
6 changes: 5 additions & 1 deletion sumy/summarizers/sum_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ def _get_all_words_in_doc(sentences):
def _get_content_words_in_sentence(self, sentence):
normalized_words = self._normalize_words(sentence.words)
normalized_content_words = self._filter_out_stop_words(normalized_words)
return normalized_content_words
stemmed_normalized_content_words = self._stem_words(normalized_content_words)
return stemmed_normalized_content_words

def _stem_words(self, words):
return [self.stem_word(w) for w in words]

def _normalize_words(self, words):
return [self.normalize_word(w) for w in words]
Expand Down
13 changes: 11 additions & 2 deletions tests/test_summarizers/test_sum_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@
from sumy.models.dom._sentence import Sentence
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.sum_basic import SumBasicSummarizer
from sumy.nlp.stemmers import Stemmer
from ..utils import build_document


EMPTY_STOP_WORDS = []
COMMON_STOP_WORDS = ["the", "and", "i"]


def _build_summarizer(stop_words):
summarizer = SumBasicSummarizer()
def _build_summarizer(stop_words, stemmer=None):
summarizer = SumBasicSummarizer() if stemmer is None else SumBasicSummarizer(stemmer)
summarizer.stop_words = stop_words
return summarizer

Expand Down Expand Up @@ -48,6 +49,14 @@ def test_normalize_words():
assert words_normalized == words_correctly_normalized


def test_stemmer():
summarizer_w_stemmer = _build_summarizer(EMPTY_STOP_WORDS, Stemmer('english'))
summarizer_wo_stemmer = _build_summarizer(EMPTY_STOP_WORDS)
word = Sentence('testing', Tokenizer('english'))
assert summarizer_w_stemmer._get_content_words_in_sentence(word) == ['test']
assert summarizer_wo_stemmer._get_content_words_in_sentence(word) == ['testing']


def test_filter_out_stop_words():
summarizer = _build_summarizer(COMMON_STOP_WORDS)
sentence = "the dog and i went on a walk"
Expand Down

0 comments on commit a4dff96

Please sign in to comment.