Skip to content

Commit 65235fb

Browse files
fix language detection and tweak tests
1 parent 0ed8a37 commit 65235fb

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

lib/treat/workers/extractors/language/what_language.rb

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Language detection using a probabilistic algorithm
2-
# that checks for the presence of words with Bloom
2+
# that checks for the presence of words with Bloom
33
# filters built from dictionaries for each language.
44
#
5-
# Original paper: Grothoff. 2007. A Quick Introduction to
6-
# Bloom Filters. Department of Computer Sciences, Purdue
5+
# Original paper: Grothoff. 2007. A Quick Introduction to
6+
# Bloom Filters. Department of Computer Sciences, Purdue
77
# University.
88
class Treat::Workers::Extractors::Language::WhatLanguage
99

@@ -35,7 +35,7 @@ def self.language(entity, options = {})
3535

3636
options = DefaultOptions.merge(options)
3737

38-
@@detector ||= ::WhatLanguage.new(:possibilities)
38+
@@detector ||= ::WhatLanguage.new(:all)
3939
possibilities = @@detector.process_text(entity.to_s)
4040
lang = {}
4141

spec/entities/entity.rb

+8-7
Original file line numberDiff line numberDiff line change
@@ -434,14 +434,15 @@ module Treat::Specs::Entities
434434
it "guesses the language of the entity" do
435435

436436
Treat.core.language.detect = true
437-
a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
438-
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
439-
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
440-
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
437+
a = 'I want to know God\'s thoughts; the rest are details.' # Albert Einstein
438+
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran?' # Pablo Picasso
439+
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France.' # Goethe
440+
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen.' # Friedrich Nietzsche
441+
441442
a.language.should eql :english
442-
#b.language.should eql :spanish
443-
#c.language.should eql :french
444-
#d.language.should eql :german
443+
b.language.should eql :spanish
444+
c.language.should eql :french
445+
d.language.should eql :german
445446

446447
# Reset default
447448
Treat.core.language.detect = false

0 commit comments

Comments
 (0)