Skip to content

Commit

Permalink
rebase to original fork
Browse files Browse the repository at this point in the history
  • Loading branch information
souvikg10 committed Oct 18, 2023
1 parent 7520daa commit a0cd873
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 2 deletions.
5 changes: 5 additions & 0 deletions Duckling/Numeral/DE/Corpus.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ allExamples = concat
, "Eine"
, "einen"
]
, examples (NumeralValue 2)
[ "2"
, "Zwei"
, "Zwo"
]
, examples (NumeralValue 3)
[ "3"
, "Drei"
Expand Down
6 changes: 5 additions & 1 deletion Duckling/Numeral/DE/NumParser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ two =
, times10 = [assign 20 "zwanzig"]
}

two_alternative :: NumItem
two_alternative = defaultNumItem 2 "zwo"


three :: NumItem
three =
(defaultNumItem 3 "drei")
Expand Down Expand Up @@ -139,7 +143,7 @@ nine :: NumItem
nine = defaultNumItem 9 "neun"

digitLexicon :: [NumItem]
digitLexicon = [one, two, three, four, five, six, seven, eight, nine]
digitLexicon = [one, two_alternative, two, three, four, five, six, seven, eight, nine]

from1to9 :: NumParser
from1to9 = foldr ((<|>) . base) empty digitLexicon
Expand Down
11 changes: 10 additions & 1 deletion Duckling/Numeral/DE/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,15 @@ ruleDecimalNumeral = Rule
_ -> Nothing
}

ruleTwo :: Rule
ruleTwo = Rule
{ name = "integer 2"
, pattern =
[ regex "(zwei|zwo)"
]
, prod = \_ -> integer 2
}

ruleNumeralsUnd :: Rule
ruleNumeralsUnd = Rule
{ name = "numbers und"
Expand Down Expand Up @@ -218,7 +227,7 @@ ruleIntegerWithThousandsSeparator = Rule
ruleAllNumeralWords :: Rule
ruleAllNumeralWords = Rule
{ name = "simple and complex numerals written as one word"
, pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
, pattern = [regex "(ein|zwei|zwo|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch matches) : _) ->
(parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)
Expand Down

0 comments on commit a0cd873

Please sign in to comment.