diff --git a/.deepsource.toml b/.deepsource.toml index 1037570..8c179c3 100644 --- a/.deepsource.toml +++ b/.deepsource.toml @@ -1,10 +1,9 @@ version = 1 +test_patterns = ["**/*_test.go"] + [[analyzers]] name = "go" [analyzers.meta] - import_root = "github.com/pablodz/itn" - -[[transformers]] -name = "gofumpt" \ No newline at end of file + import_root = "github.com/sopro-dev/sopro-core" diff --git a/.github/workflows/tagger.yml b/.github/workflows/tagger.yml new file mode 100644 index 0000000..5648743 --- /dev/null +++ b/.github/workflows/tagger.yml @@ -0,0 +1,31 @@ +name: tagger +on: + push: + branches: + - main +permissions: + contents: write + +jobs: + tagger: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Create tag + id: tag + run: | + VERSION_FILE=".version" + VERSION_VALUE=$(cat $VERSION_FILE) + MAX_BRANCH_LENGTH=40 + FIXED_BRANCH=$(echo ${GITHUB_REF:11:${MAX_BRANCH_LENGTH}} | sed 's/[^[:alnum:]]/-/g') + NEW_TAG=$(echo "$VERSION_VALUE-${FIXED_BRANCH}.$(date +%Y%m%d-%H%M%S)") + echo "NEW_TAG=$NEW_TAG" >> $GITHUB_ENV + + - name: Push tag + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + git tag ${{ env.NEW_TAG }} + git push origin ${{ env.NEW_TAG }} diff --git a/.version b/.version new file mode 100644 index 0000000..a1c2c6a --- /dev/null +++ b/.version @@ -0,0 +1 @@ +v0.1.1 \ No newline at end of file diff --git a/LICENSE b/LICENSE index 4b7df60..3ef92ac 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2024 Pablo +Copyright (c) 2024 Pablo & text2num developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 0a77692..a080f18 100644 --- a/README.md +++ b/README.md @@ -1 +1,18 @@ -# itn \ No newline at end of file +# Inverse Text Normalization + +## Installation + +```bash +go get -v github.com/pablodz/itn@latest +``` + +## Examples + +Check [folder](/examples/) + +## Supported languages + +- ✅ español +- 🌀 francés +- 🌀 italiano +- 🌀 portugués diff --git a/examples/alpha/main.go b/examples/alpha/main.go new file mode 100644 index 0000000..bd18027 --- /dev/null +++ b/examples/alpha/main.go @@ -0,0 +1,16 @@ +package main + +import ( + "github.com/pablodz/itn/itn" +) + +func main() { + processor := itn.NewLanguageES() + new_string := processor.Alpha2Digit( + "uno dos quince", + false, + true, + 3, + ) + println(new_string) +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1ef080b --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/pablodz/itn + +go 1.22.3 diff --git a/itn/base.go b/itn/base.go new file mode 100644 index 0000000..3f59d9a --- /dev/null +++ b/itn/base.go @@ -0,0 +1,296 @@ +package itn + +import ( + "fmt" + "log" + "regexp" + "strings" +) + +type Language struct { + Multipliers map[string]int + Units map[string]int + STens map[string]int + MTens map[string]int + MTensWSTens []string + Hundred map[string]int + MHundreds map[string]int + Numbers map[string]int + Sign map[string]string + Zero []string + DecimalSep string + DecimalSYM string + AndNums []string + And string + NeverIfAlone []string + Relaxed map[string]RelaxTuple + Simplify_check_coef_appliable bool +} + +func NewLanguageES() *Language { + l := &Language{ + Multipliers: map[string]int{ + "mil": 1000, + "miles": 1000, + "millon": 1000000, + "millón": 1000000, + "millones": 1000000, + }, + Units: map[string]int{ + "uno": 1, + "dos": 2, + "tres": 3, + "cuatro": 4, + "cinco": 5, + "seis": 6, + "siete": 7, + "ocho": 8, + "nueve": 9, + "un": 1, // optional + "una": 1, // optional + + }, + STens: map[string]int{ + "diez": 10, + "once": 11, + "doce": 12, + "trece": 13, + "catorce": 14, + "quince": 15, + "dieciseis": 16, + "diecisiete": 17, + "dieciocho": 18, + "diecinueve": 19, + "veinte": 20, + "veintiuno": 21, + "veintidos": 22, + "veintitres": 23, + "veinticuatro": 24, + "veinticinco": 25, + "veintiseis": 26, + "veintisiete": 27, + "veintiocho": 28, + "veintinueve": 29, + "veintitrés": 23, // with accent + "veintidós": 22, // with accent + }, + MTens: map[string]int{ + "treinta": 30, + "cuarenta": 40, + "cincuenta": 50, + "sesenta": 60, + "setenta": 70, + "ochenta": 80, + "noventa": 90, + }, + MTensWSTens: []string{}, + Hundred: map[string]int{ + "cien": 100, + "ciento": 100, + "cienta": 100, + "doscientos": 200, + "trescientos": 300, + "cuatrocientos": 400, + "quinientos": 500, + "seiscientos": 600, + "setecientos": 700, + "ochocientos": 800, + "novecientos": 900, + "doscientas": 200, // with feminine + "trescientas": 300, // with feminine + "cuatrocientas": 400, // with feminine + "quinientas": 500, // with feminine + "seiscientas": 600, // with feminine + "setecientas": 700, // with feminine + "ochocientas": 800, // with feminine + "novecientas": 900, // with feminine + }, + Sign: map[string]string{ + "mas": "+", + "menos": "-", + }, + Zero: []string{ + "cero", + }, + DecimalSep: "coma", + DecimalSYM: ".", + AndNums: []string{ + "un", + "uno", + "una", + "dos", + "tres", + "cuatro", + "cinco", + "seis", + "siete", + "ocho", + "nueve", + }, + + And: "y", + NeverIfAlone: []string{ + "un", + "uno", + "una", + }, + Relaxed: map[string]RelaxTuple{}, + } + + // deep copy from l.multipliers + l.Numbers = map[string]int{ + "mil": 1000, + "miles": 1000, + "millon": 1000000, + "millón": 1000000, + "millones": 1000000, + } + + for k, v := range l.Units { + l.Numbers[k] = v + } + for k, v := range l.STens { + l.Numbers[k] = v + } + for k, v := range l.MTens { + l.Numbers[k] = v + } + for k, v := range l.Hundred { + l.Numbers[k] = v + } + + return l +} + +type RelaxTuple struct { + Zero string + One string +} + +func (lg *Language) Ord2Card(word string) string { + return "" +} + +func (lg *Language) NumOrd(digits string, originalWord string) string { + if strings.HasSuffix(originalWord, "o") { + return fmt.Sprintf("%sº", digits) + } + return fmt.Sprintf("%sª", digits) +} + +func (lg *Language) Normalize(word string) string { + return word +} + +func (lg *Language) NotNumericWord(word string) bool { + return word == "" || word != lg.DecimalSep && !containsKey(lg.Numbers, word) && !contains(lg.Zero, word) +} + +var WORDSEP = regexp.MustCompile(`\s*[\.,;\(\)…\[\]:!\?]+\s*|\n`) + +type segmentAndPunct struct { + segment string + punct string +} + +type LookAhead struct { + Word string + Ahead string +} + +func lookAhead(tokens []string) []LookAhead { + if len(tokens) == 0 { + return []LookAhead{} + } + + lookAheads := []LookAhead{} + for i := 0; i < len(tokens); i++ { + + nextWord := "" + if i+1 >= len(tokens) { + nextWord = "" + } else { + nextWord = tokens[i+1] + } + + lookAheads = append(lookAheads, LookAhead{tokens[i], nextWord}) + } + // fill the last element with empty next + lookAheads[len(lookAheads)-1].Ahead = "" + + return lookAheads +} + +func (lg Language) Alpha2Digit(text string, relaxed bool, signed bool, ordinalThreshold int) string { + segments := WORDSEP.Split(text, -1) + // for i, segment := range segments { + // log.Println("[segment]", i, segment) + // } + punct := WORDSEP.FindAllString(text, -1) + // for i, p := range punct { + // log.Println("[punct]", i, p) + // } + + if len(punct) < len(segments) { + punct = append(punct, "") + } + + segmentAndPuncts := []segmentAndPunct{} + for i, segment := range segments { + segmentAndPuncts = append(segmentAndPuncts, + segmentAndPunct{ + segment, + punct[i], + }, + ) + } + + outSegments := []string{} + for _, sp := range segmentAndPuncts { + tokens := strings.Split(sp.segment, " ") + log.Printf("tokens %v", tokens) + + numBuilder := NewWordToDigitParser(lg, relaxed, signed, ordinalThreshold, "") + lastWord := "" + inNumber := false + outTokens := []string{} + for _, couple := range lookAhead(tokens) { + + log.Printf("✅ [word] %s [ahead] %s", couple.Word, couple.Ahead) + + pushed := numBuilder.push(strings.ToLower(couple.Word), strings.ToLower(couple.Ahead)) + if pushed { + log.Printf("> condition 1: word %s ahead %s", couple.Word, couple.Ahead) + inNumber = true + } else if inNumber { + log.Printf("> condition 2: word %s ahead %s", couple.Word, couple.Ahead) + outTokens = append(outTokens, numBuilder.GetValue()) + numBuilder = NewWordToDigitParser(lg, relaxed, signed, ordinalThreshold, lastWord) + inNumber = numBuilder.push(strings.ToLower(couple.Word), strings.ToLower(couple.Ahead)) + } + + if !inNumber { + log.Printf("> condition 3: word %s ahead %s", couple.Word, couple.Ahead) + outTokens = append(outTokens, couple.Word) + } + + lastWord = strings.ToLower(couple.Word) + + log.Printf("... lastWord %s, inNumber %t, outTokens %v", lastWord, inNumber, outTokens) + + } + + log.Printf("---") + numBuilder.close() + if numBuilder.GetValue() != "" { + outTokens = append(outTokens, numBuilder.GetValue()) + } + + outSegments = append(outSegments, strings.Join(outTokens, " ")) + outSegments = append(outSegments, sp.punct) + + } + text = strings.Join(outSegments, "") + + return text +} diff --git a/itn/es_test.go b/itn/es_test.go new file mode 100644 index 0000000..b8b6014 --- /dev/null +++ b/itn/es_test.go @@ -0,0 +1,113 @@ +package itn + +import ( + "testing" +) + +func TestAlpha2Digit(t *testing.T) { + type test struct { + input string + output string + } + + tests := []test{ + { + input: "uno coma uno", + output: "1.1", + }, + { + input: "uno coma cuatrocientos uno", + output: "1.401", + }, + { + input: "veinticinco vacas, doce gallinas y ciento veinticinco kg de patatas.", + output: "25 vacas, 12 gallinas y 125 kg de patatas.", + }, + { + input: "Habían trescientos hombres y quinientas mujeres", + output: "Habían 300 hombres y 500 mujeres", + }, + { + input: "mil doscientos sesenta y seis dolares.", + output: "1266 dolares.", + }, + { + input: "un dos tres cuatro veinte quince", + output: "1 2 3 4 20 15", + }, + { + input: "veintiuno, treinta y uno.", + output: "21, 31.", + }, + { + input: "un dos tres cuatro treinta cinco.", + output: "1 2 3 4 35.", + }, + { + input: "un dos tres cuatro veinte, cinco.", + output: "1 2 3 4 20, 5.", + }, + { + input: "treinta y cuatro = treinta cuatro", + output: "34 = 34", + }, + { + input: "mas treinta y tres nueve sesenta cero seis doce veintiuno", + output: "+33 9 60 06 12 21", + }, + { + input: "cero nueve sesenta cero seis doce veintiuno", + output: "09 60 06 12 21", + }, + { + input: "cincuenta sesenta treinta y once", + output: "50 60 30 y 11", + }, + { + input: "trece mil cero noventa", + output: "13000 090", + }, + { + input: "cero", + output: "0", + }, + { + input: "doce coma noventa y nueve, ciento veinte coma cero cinco, uno coma doscientos treinta y seis, uno coma dos tres seis.", + output: "12.99, 120.05, 1.236, 1.2 3 6.", + }, + { + input: "coma quince", + output: "0.15", + }, + { + input: "Tenemos mas veinte grados dentro y menos quince fuera.", + output: "Tenemos +20 grados dentro y -15 fuera.", + }, + { + input: "Un momento por favor! treinta y un gatos. Uno dos tres cuatro!", + output: "Un momento por favor! 31 gatos. 1 2 3 4!", + }, + { + input: "Ni uno. Uno uno. Treinta y uno", + output: "Ni uno. 1 1. 31", + }, + { + input: "un millon", + output: "1000000", + }, + { + input: "un millón", + output: "1000000", + }, + } + + for _, tt := range tests { + processor := NewLanguageES() + new_string := processor.Alpha2Digit(tt.input, false, true, 3) + if new_string != tt.output { + t.Errorf("❌ Expected <%s>, got <%s>", tt.output, new_string) + } else { + t.Logf("✅ Expected <%s>, got <%s>", tt.output, new_string) + } + } +} diff --git a/itn/parsers.go b/itn/parsers.go new file mode 100644 index 0000000..629077c --- /dev/null +++ b/itn/parsers.go @@ -0,0 +1,287 @@ +package itn + +import ( + "fmt" + "log" + "strings" +) + +type WordStreamValueParser struct { + Skip string + n000Val int + grpVal int + lastWord string + lang Language + relaxed bool +} + +func NewWordStreamValueParser(lang Language, relaxed bool) *WordStreamValueParser { + return &WordStreamValueParser{ + lang: lang, + relaxed: relaxed, + } +} + +func (w *WordStreamValueParser) GetValue() int { + log.Printf("+ WordStreamValueParser.GetValue") + return w.n000Val + w.grpVal +} + +func (w *WordStreamValueParser) groupExpects(word string, update bool) bool { + log.Printf("+ WordStreamValueParser.groupExpects.word %s [lastWord] %s [update] %t", word, w.lastWord, update) + expected := false + if w.lastWord == "" { + log.Printf(">> WordStreamValueParser.groupExpects.condition 0: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + expected = true + } else if containsKey(w.lang.Units, w.lastWord) && w.grpVal < 10 || containsKey(w.lang.STens, w.lastWord) && w.grpVal < 20 { + log.Printf(">> WordStreamValueParser.groupExpects.condition 1: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + expected = containsKey(w.lang.Hundred, word) + } else if containsKey(w.lang.MHundreds, w.lastWord) { + log.Printf(">> WordStreamValueParser.groupExpects.condition 2: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + expected = true + } else if containsKey(w.lang.MTens, w.lastWord) { + log.Printf(">> WordStreamValueParser.groupExpects.condition 3: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + expected = containsKey(w.lang.Units, word) || containsKey(w.lang.STens, word) && contains(w.lang.MTensWSTens, w.lastWord) + } else if containsKey(w.lang.Hundred, w.lastWord) { + log.Printf(">> WordStreamValueParser.groupExpects.condition 4: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + expected = !containsKey(w.lang.Hundred, word) + } + + if update { + log.Printf(">> WordStreamValueParser.groupExpects.condition 5: [word]%s [lastWord] %s [update] %t", word, w.lastWord, update) + w.lastWord = word + } + + return expected +} + +func (w *WordStreamValueParser) isCoefAppliable(coef int) bool { + log.Printf("+ WordStreamValueParser.isCoefAppliable.coef %d", coef) + if w.lang.Simplify_check_coef_appliable { + return coef != w.GetValue() + } + + if coef > w.GetValue() && (w.GetValue() > 0 || coef >= 100) { + return true + } + + if coef*1000 <= w.n000Val || coef == 100 && 100 > w.grpVal { + return (w.grpVal > 0 || coef == 1000 || coef == 100) + } + + return false +} + +func (w *WordStreamValueParser) push(word string, lookAhead string) bool { + log.Printf("+ WordStreamValueParser.push.word %s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + + if word == "" { + log.Printf(">> WordStreamValueParser.push.condition 0: [word]%s [ahead] %s", word, lookAhead) + return false + } + + if word == w.lang.And && contains(w.lang.AndNums, lookAhead) { + log.Printf(">> WordStreamValueParser.push.condition 1: [word]%s [ahead] %s", word, lookAhead) + return true + } + + word = w.lang.Normalize(word) + if !containsKey(w.lang.Numbers, word) { + log.Printf(">> WordStreamValueParser.push.condition 2: [word]%s [ahead] %s", word, lookAhead) + return false + } + + RELAXED := w.lang.Relaxed + if containsKey(w.lang.Multipliers, word) { + log.Printf(">> WordStreamValueParser.push.condition 3: [word]%s [ahead] %s", word, lookAhead) + coef := w.lang.Multipliers[word] + log.Printf(">>> WordStreamValueParser.push.coef %d", coef) + if !w.isCoefAppliable(coef) { + log.Printf(">> WordStreamValueParser.push.condition 3.1: [word]%s [ahead] %s", word, lookAhead) + return false + } + + if coef < 1000 { + value := w.grpVal + if value == 0 { + value = 1 + } + w.grpVal = value * coef + w.lastWord = "" + log.Printf(">> WordStreamValueParser.push.condition 3.2: [word]%s [ahead] %s", word, lookAhead) + return true + } + if coef < w.n000Val { + value := w.n000Val + if value == 0 { + value = 1 + } + w.n000Val = w.n000Val + coef*(value) + } else { + value := w.GetValue() + if value == 0 { + value = 1 + } + w.n000Val = value * coef + } + w.grpVal = 0 + w.lastWord = "" + } else if w.relaxed && containsKey(RELAXED, word) && lookAhead != "" && strings.HasPrefix(RELAXED[word].Zero, lookAhead) && w.groupExpects(RELAXED[word].One, false) { + log.Printf(">> WordStreamValueParser.push.condition 4: [word]%s [ahead] %s", word, lookAhead) + w.Skip = RELAXED[word].Zero + w.grpVal = w.grpVal + w.lang.Numbers[RELAXED[word].One] + } else if w.Skip != "" && strings.HasPrefix(w.Skip, word) { + log.Printf(">> WordStreamValueParser.push.condition 5: [word]%s [ahead] %s", word, lookAhead) + w.Skip = "" + } else if w.groupExpects(word, true) { + log.Printf(">> WordStreamValueParser.push.condition 6: [word]%s [ahead] %s", word, lookAhead) + if containsKey(w.lang.Hundred, word) { + log.Printf(">> WordStreamValueParser.push.condition 6.1: [word]%s [ahead] %s", word, lookAhead) + if w.grpVal != 0 { + w.grpVal = 100 * w.grpVal + } else { + w.grpVal = w.lang.Hundred[word] + } + } else if containsKey(w.lang.MHundreds, word) { + log.Printf(">> WordStreamValueParser.push.condition 6.2: [word]%s [ahead] %s", word, lookAhead) + w.grpVal = w.lang.MHundreds[word] + } else { + log.Printf(">> WordStreamValueParser.push.condition 6.3: [word]%s [ahead] %s", word, lookAhead) + w.grpVal = w.grpVal + w.lang.Numbers[word] + log.Printf(">>> WordStreamValueParser.push.grpVal %d", w.grpVal) + } + } else { + log.Printf(">> WordStreamValueParser.push.condition 7: [word]%s [ahead] %s", word, lookAhead) + w.Skip = "" + return false + } + + log.Printf(">> WordStreamValueParser.push.condition 8: [word]%s [ahead] %s", word, lookAhead) + return true +} + +type WordToDigitParser struct { + Lang Language + the_value []string + IntBuilder *WordStreamValueParser + FracBuilder *WordStreamValueParser + Signed bool + InFrac bool + Closed bool + Open bool + lastWord string + OrdinalThreshold int +} + +func NewWordToDigitParser(lang Language, relaxed bool, signed bool, ordinalThreshold int, precedingWord string) WordToDigitParser { + return WordToDigitParser{ + Lang: lang, + the_value: []string{}, + IntBuilder: NewWordStreamValueParser(lang, relaxed), + FracBuilder: NewWordStreamValueParser(lang, relaxed), + Signed: signed, + InFrac: false, + Closed: false, + Open: false, + lastWord: precedingWord, + OrdinalThreshold: ordinalThreshold, + } +} + +func (w *WordToDigitParser) GetValue() string { + log.Printf("+ WordToDigitParser.GetValue") + return strings.Join(w.the_value, "") +} + +func (w *WordToDigitParser) close() { + log.Printf("+ WordToDigitParser.close") + if !w.Closed { + if w.InFrac && w.FracBuilder.GetValue() != 0 { + log.Printf(">> WordToDigitParser.close.condition 0: adding FracBuilder %d", w.FracBuilder.GetValue()) + w.the_value = append(w.the_value, fmt.Sprint(w.FracBuilder.GetValue())) + } else if !w.InFrac && w.IntBuilder.GetValue() != 0 { + log.Printf(">> WordToDigitParser.close.condition 1: adding IntBuilder %d", w.IntBuilder.GetValue()) + w.the_value = append(w.the_value, fmt.Sprint(w.IntBuilder.GetValue())) + } + w.Closed = true + } +} + +func (w *WordToDigitParser) atStartOfSeq() bool { + print(">> WordToDigitParser.atStartOfSeq") + return w.InFrac && w.FracBuilder.GetValue() == 0 || !w.InFrac && w.IntBuilder.GetValue() == 0 +} + +func (w *WordToDigitParser) atStart() bool { + print(">> WordToDigitParser.atStart") + return !w.Open +} + +func (w *WordToDigitParser) the_push(word string, lookAhead string) bool { + log.Printf("🌀 >> inFrac %v [word] %s [lookAhead] %s [lastWord] %s", w.InFrac, word, lookAhead, w.lastWord) + if w.InFrac { + builder := w.FracBuilder + return builder.push(word, lookAhead) + } else { + builder := w.IntBuilder + return builder.push(word, lookAhead) + } +} + +func (w *WordToDigitParser) isAlone(word string, nextWord string) bool { + return !w.Open && contains(w.Lang.NeverIfAlone, word) && w.Lang.NotNumericWord(nextWord) && w.Lang.NotNumericWord(w.lastWord) && !(nextWord == "" && w.lastWord == "") +} + +func (w *WordToDigitParser) push(word string, lookAhead string) bool { + if w.Closed || w.isAlone(word, lookAhead) { + log.Printf(">> WordToDigitParser.push.condition 0:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + w.lastWord = word + return false + } + + if w.Signed && containsKey(w.Lang.Sign, word) && containsKey(w.Lang.Numbers, lookAhead) && w.atStart() { + log.Printf(">> WordToDigitParser.push.condition 1:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + w.the_value = append(w.the_value, w.Lang.Sign[word]) + } else if contains(w.Lang.Zero, word) && w.atStartOfSeq() && lookAhead != "" && strings.Contains(w.Lang.DecimalSep, lookAhead) { + log.Printf(">> WordToDigitParser.push.condition 2:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + } else if contains(w.Lang.Zero, word) && w.atStartOfSeq() { + log.Printf(">> WordToDigitParser.push.condition 3:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + w.the_value = append(w.the_value, "0") + } else if w.the_push(w.Lang.Ord2Card(word), lookAhead) { + log.Printf(">> WordToDigitParser.push.condition 4:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + value2Add := "" + if w.IntBuilder.GetValue() > w.OrdinalThreshold { + digits := 0 + if w.InFrac { + digits = w.FracBuilder.GetValue() + } else { + digits = w.IntBuilder.GetValue() + } + value2Add = w.Lang.NumOrd(fmt.Sprint(digits), word) + } else { + value2Add = word + } + w.the_value = append(w.the_value, value2Add) + w.Closed = true + } else if (word == w.Lang.DecimalSep || contains(strings.Split(w.Lang.DecimalSep, ","), word)) && (containsKey(w.Lang.Numbers, lookAhead) || contains(w.Lang.Zero, lookAhead)) && !w.InFrac { + log.Printf(">> WordToDigitParser.push.condition 5:[word]%s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + if w.GetValue() == "" { + w.the_value = append(w.the_value, fmt.Sprint(w.IntBuilder.GetValue())) + } + w.the_value = append(w.the_value, w.Lang.DecimalSYM) + w.InFrac = true + } else if !w.the_push(word, lookAhead) { + log.Printf(">> WordToDigitParser.push.condition 6:[word] %s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + if w.Open { + w.close() + } + w.lastWord = word + return false + } + + log.Printf(">> WordToDigitParser.push.condition 7:[word] %s [ahead] %s [lastWord] %s", word, lookAhead, w.lastWord) + + w.Open = true + w.lastWord = word + return true +} diff --git a/itn/utils.go b/itn/utils.go new file mode 100644 index 0000000..d652285 --- /dev/null +++ b/itn/utils.go @@ -0,0 +1,15 @@ +package itn + +func contains(slice []string, word string) bool { + for _, v := range slice { + if v == word { + return true + } + } + return false +} + +func containsKey[T int | string | RelaxTuple](dict map[string]T, key string) bool { + _, ok := dict[key] + return ok +}