Skip to content

Commit 10329f8

Browse files
authored
Add ByGroupNames function, same as ByGroups but use named groups (#519)
For named groups that are not given, an Error will be emitted anyway. This also handles the case when an Emitter for group `0` is provided or not. Since numbers can also be used for names. But it might be over-doing, because why would anyone use ByGroupNames if they wanted to assign a token to the whole match?!
1 parent 22cbca5 commit 10329f8

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

Diff for: regexp.go

+28
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,34 @@ func ByGroups(emitters ...Emitter) Emitter {
5252
})
5353
}
5454

55+
// ByGroupNames emits a token for each named matching group in the rule's regex.
56+
func ByGroupNames(emitters map[string]Emitter) Emitter {
57+
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
58+
iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
59+
if len(state.NamedGroups)-1 == 0 {
60+
if emitter, ok := emitters[`0`]; ok {
61+
iterators = append(iterators, emitter.Emit(groups, state))
62+
} else {
63+
iterators = append(iterators, Error.Emit(groups, state))
64+
}
65+
} else {
66+
ruleRegex := state.Rules[state.State][state.Rule].Regexp
67+
for i := 1; i < len(state.NamedGroups); i++ {
68+
groupName := ruleRegex.GroupNameFromNumber(i)
69+
group := state.NamedGroups[groupName]
70+
if emitter, ok := emitters[groupName]; ok {
71+
if emitter != nil {
72+
iterators = append(iterators, emitter.Emit([]string{group}, state))
73+
}
74+
} else {
75+
iterators = append(iterators, Error.Emit([]string{group}, state))
76+
}
77+
}
78+
}
79+
return Concaterator(iterators...)
80+
})
81+
}
82+
5583
// UsingByGroup emits tokens for the matched groups in the regex using a
5684
// "sublexer". Used when lexing code blocks where the name of a sublexer is
5785
// contained within the block, for example on a Markdown text block or SQL

Diff for: regexp_test.go

+85
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,88 @@ func TestEnsureLFFunc(t *testing.T) {
9999
assert.Equal(t, out, test.out)
100100
}
101101
}
102+
103+
func TestByGroupNames(t *testing.T) {
104+
l := Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
105+
"root": {
106+
{
107+
`(?<key>\w+)(?<operator>=)(?<value>\w+)`,
108+
ByGroupNames(map[string]Emitter{
109+
`key`: String,
110+
`operator`: Operator,
111+
`value`: String,
112+
}),
113+
nil,
114+
},
115+
},
116+
}))
117+
it, err := l.Tokenise(nil, `abc=123`)
118+
assert.NoError(t, err)
119+
assert.Equal(t, []Token{{String, `abc`}, {Operator, `=`}, {String, `123`}}, it.Tokens())
120+
121+
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
122+
"root": {
123+
{
124+
`(?<key>\w+)(?<operator>=)(?<value>\w+)`,
125+
ByGroupNames(map[string]Emitter{
126+
`key`: String,
127+
`value`: String,
128+
}),
129+
nil,
130+
},
131+
},
132+
}))
133+
it, err = l.Tokenise(nil, `abc=123`)
134+
assert.NoError(t, err)
135+
assert.Equal(t, []Token{{String, `abc`}, {Error, `=`}, {String, `123`}}, it.Tokens())
136+
137+
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
138+
"root": {
139+
{
140+
`(?<key>\w+)=(?<value>\w+)`,
141+
ByGroupNames(map[string]Emitter{
142+
`key`: String,
143+
`value`: String,
144+
}),
145+
nil,
146+
},
147+
},
148+
}))
149+
it, err = l.Tokenise(nil, `abc=123`)
150+
assert.NoError(t, err)
151+
assert.Equal(t, []Token{{String, `abc123`}}, it.Tokens())
152+
153+
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
154+
"root": {
155+
{
156+
`(?<key>\w+)(?<op>=)(?<value>\w+)`,
157+
ByGroupNames(map[string]Emitter{
158+
`key`: String,
159+
`operator`: Operator,
160+
`value`: String,
161+
}),
162+
nil,
163+
},
164+
},
165+
}))
166+
it, err = l.Tokenise(nil, `abc=123`)
167+
assert.NoError(t, err)
168+
assert.Equal(t, []Token{{String, `abc`}, {Error, `=`}, {String, `123`}}, it.Tokens())
169+
170+
l = Coalesce(MustNewLexer(nil, Rules{ // nolint: forbidigo
171+
"root": {
172+
{
173+
`\w+=\w+`,
174+
ByGroupNames(map[string]Emitter{
175+
`key`: String,
176+
`operator`: Operator,
177+
`value`: String,
178+
}),
179+
nil,
180+
},
181+
},
182+
}))
183+
it, err = l.Tokenise(nil, `abc=123`)
184+
assert.NoError(t, err)
185+
assert.Equal(t, []Token{{Error, `abc=123`}}, it.Tokens())
186+
}

0 commit comments

Comments
 (0)