Skip to content

Commit ff6eedb

Browse files
authored
Fix: sort words in descending order of length before regex generation (#496)
* Fix: sort words in descending order of length before regex generation * Avoid code duplication in Raku lexer
1 parent 225e186 commit ff6eedb

File tree

2 files changed

+9
-15
lines changed

2 files changed

+9
-15
lines changed

Diff for: lexers/r/raku.go

+5-15
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package r
22

33
import (
44
"regexp"
5-
"sort"
65
"strings"
76
"unicode/utf8"
87

@@ -70,7 +69,7 @@ func rakuRules() Rules {
7069
`dynamic-scope`, `built`, `temp`,
7170
}
7271

73-
keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, sortWords(keywords)...)
72+
keywordsPattern := Words(`(?<!['\w:-])`, `(?!['\w:-])`, keywords...)
7473

7574
wordOperators := []string{
7675
`X`, `Z`, `R`, `after`, `and`, `andthen`, `before`, `cmp`, `div`, `eq`, `eqv`, `extra`, `ge`,
@@ -80,7 +79,7 @@ func rakuRules() Rules {
8079
`(cont)`, `(<)`, `(<=)`, `(>)`, `(>=)`, `minmax`, `notandthen`, `S`,
8180
}
8281

83-
wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, sortWords(wordOperators)...)
82+
wordOperatorsPattern := Words(`(?<=^|\b|\s)`, `(?=$|\b|\s)`, wordOperators...)
8483

8584
operators := []string{
8685
`++`, `--`, `-`, `**`, `!`, `+`, `~`, `?`, `+^`, `~^`, `?^`, `^`, `*`, `/`, `%`, `%%`, `+&`,
@@ -93,7 +92,7 @@ func rakuRules() Rules {
9392
`⊃`, `⊅`, `⊇`, `⊉`, `:`, `!!!`, `???`, `¯`, `×`, `÷`, `−`, `⁺`, `⁻`,
9493
}
9594

96-
operatorsPattern := Words(``, ``, sortWords(operators)...)
95+
operatorsPattern := Words(``, ``, operators...)
9796

9897
builtinTypes := []string{
9998
`False`, `True`, `Order`, `More`, `Less`, `Same`, `Any`, `Array`, `Associative`, `AST`,
@@ -142,7 +141,7 @@ func rakuRules() Rules {
142141
`strict`, `trace`, `variables`,
143142
}
144143

145-
builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, sortWords(builtinTypes)...)
144+
builtinTypesPattern := Words(`(?<!['\w:-])`, `(?::[_UD])?(?!['\w:-])`, builtinTypes...)
146145

147146
builtinRoutines := []string{
148147
`ACCEPTS`, `abs`, `abs2rel`, `absolute`, `accept`, `accepts_type`, `accessed`, `acos`,
@@ -266,7 +265,7 @@ func rakuRules() Rules {
266265
`yyyy-mm-dd`, `z`, `zip`, `zip-latest`, `HOW`, `s`, `DEPRECATED`, `trait_mod`,
267266
}
268267

269-
builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, sortWords(builtinRoutines)...)
268+
builtinRoutinesPattern := Words(`(?<!['\w:-])`, `(?!['\w-])`, builtinRoutines...)
270269

271270
// A map of opening and closing brackets
272271
brackets := map[rune]rune{
@@ -1197,15 +1196,6 @@ func joinRuneMap(m map[rune]rune) string {
11971196
return string(runes)
11981197
}
11991198

1200-
// Sorts words in descending order
1201-
func sortWords(words []string) []string {
1202-
sort.Slice(words, func(i, j int) bool {
1203-
return len([]rune(words[i])) > len([]rune(words[j]))
1204-
})
1205-
1206-
return words
1207-
}
1208-
12091199
// Finds the index of substring in the string starting at position n
12101200
func indexAt(str []rune, substr []rune, pos int) int {
12111201
text := string(str[pos:])

Diff for: regexp.go

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"os"
66
"regexp"
7+
"sort"
78
"strings"
89
"sync"
910
"time"
@@ -141,6 +142,9 @@ func UsingSelf(stateName string) Emitter {
141142

142143
// Words creates a regex that matches any of the given literal words.
143144
func Words(prefix, suffix string, words ...string) string {
145+
sort.Slice(words, func(i, j int) bool {
146+
return len(words[j]) < len(words[i])
147+
})
144148
for i, word := range words {
145149
words[i] = regexp.QuoteMeta(word)
146150
}

0 commit comments

Comments
 (0)