Skip to content

Commit 2cfca2e

Browse files
committed
bug: Make exists:false work
closes: #115 Signed-off-by: Tim Bray <[email protected]>
1 parent 9435a5e commit 2cfca2e

File tree

3 files changed

+304
-107
lines changed

3 files changed

+304
-107
lines changed

Diff for: core_matcher.go

+118-79
Original file line numberDiff line numberDiff line change
@@ -19,34 +19,48 @@ import (
1919
)
2020

2121
// coreMatcher uses a finite automaton to implement the matchesForJSONEvent and MatchesForFields functions.
22-
// state is the start of the automaton
23-
// namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically,
24-
// patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider
25-
// fields that do not appear in patterns when using the automaton for matching
26-
// the updateable fields are grouped into the coreStart member so they can be updated atomically using atomic.Load()
22+
// The updateable fields are grouped into the coreFields member so they can be updated atomically using atomic.Load()
2723
// and atomic.Store(). This is necessary for coreMatcher to be thread-safe.
2824
type coreMatcher struct {
29-
updateable atomic.Value // always holds a *coreStart
25+
updateable atomic.Value // always holds a *coreFields
3026
lock sync.Mutex
3127
}
32-
type coreStart struct {
33-
state *fieldMatcher
34-
namesUsed map[string]bool
35-
presumedExistFalseMatches *matchSet
28+
29+
// coreFields groups the updateable fields in coreMatcher.
30+
// state is the start of the automaton.
31+
// namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically,
32+
// patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider
33+
// fields that do not appear in patterns when using the automaton for matching.
34+
// fakeField is used when the flattener for an event returns no fields, because it could still match if
35+
// there were patterns with "exists":false. So in this case we run one fake field through the matcher
36+
// which will cause it to notice that any "exists":false patterns should match.
37+
type coreFields struct {
38+
state *fieldMatcher
39+
namesUsed map[string]bool
40+
fakeField []Field
3641
}
3742

3843
func newCoreMatcher() *coreMatcher {
44+
// because of the way the matcher works, to serve its purpose of ensuring that "exists":false maches
45+
// will be detected, the Path has to be lexically greater than any field path that appears in
46+
// "exists":false. The value with byteCeiling works because that byte can't actually appear in any
47+
// user-supplied path-name because it's not valid in UTF-8
48+
fake := Field{
49+
Path: []byte{byte(byteCeiling)},
50+
Val: []byte(""),
51+
ArrayTrail: []ArrayPos{{0, 0}},
52+
}
3953
m := coreMatcher{}
40-
m.updateable.Store(&coreStart{
41-
state: newFieldMatcher(),
42-
namesUsed: make(map[string]bool),
43-
presumedExistFalseMatches: newMatchSet(),
54+
m.updateable.Store(&coreFields{
55+
state: newFieldMatcher(),
56+
namesUsed: make(map[string]bool),
57+
fakeField: []Field{fake},
4458
})
4559
return &m
4660
}
4761

48-
func (m *coreMatcher) start() *coreStart {
49-
return m.updateable.Load().(*coreStart)
62+
func (m *coreMatcher) start() *coreFields {
63+
return m.updateable.Load().(*coreFields)
5064
}
5165

5266
// addPattern - the patternBytes is a JSON object. The X is what the matcher returns to indicate that the
@@ -67,36 +81,39 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error {
6781
defer m.lock.Unlock()
6882

6983
// we build up the new coreMatcher state in freshStart so we can atomically switch it in once complete
70-
freshStart := &coreStart{}
84+
freshStart := &coreFields{}
7185
freshStart.namesUsed = make(map[string]bool)
7286
current := m.start()
7387
freshStart.state = current.state
88+
freshStart.fakeField = current.fakeField
7489

7590
for k := range current.namesUsed {
7691
freshStart.namesUsed[k] = true
7792
}
7893
for used := range patternNamesUsed {
7994
freshStart.namesUsed[used] = true
8095
}
81-
freshStart.presumedExistFalseMatches = newMatchSet()
82-
for presumedExistsFalseMatch := range current.presumedExistFalseMatches.set {
83-
freshStart.presumedExistFalseMatches = freshStart.presumedExistFalseMatches.addX(presumedExistsFalseMatch)
84-
}
8596

8697
// now we add each of the name/value pairs in fields slice to the automaton, starting with the start state -
8798
// the addTransition for a field returns a list of the fieldMatchers transitioned to for that name/val
8899
// combo.
89100
states := []*fieldMatcher{current.state}
90101
for _, field := range patternFields {
91102
var nextStates []*fieldMatcher
92-
for _, state := range states {
93-
ns := state.addTransition(field)
94103

95-
// special handling for exists:false, in which case there can be only one val and one next state
96-
if field.vals[0].vType == existsFalseType {
97-
ns[0].addExistsFalseFailure(x)
98-
freshStart.presumedExistFalseMatches = freshStart.presumedExistFalseMatches.addX(x)
104+
// separate handling for field exists:true/false and regular field name/val matches. Since the exists
105+
// true/false are only allowed one value, we can test vals[0] to figure out which type
106+
for _, state := range states {
107+
var ns []*fieldMatcher
108+
switch field.vals[0].vType {
109+
case existsTrueType:
110+
ns = state.addExists(true, field)
111+
case existsFalseType:
112+
ns = state.addExists(false, field)
113+
default:
114+
ns = state.addTransition(field)
99115
}
116+
100117
nextStates = append(nextStates, ns...)
101118
}
102119
states = nextStates
@@ -106,9 +123,7 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error {
106123
// by matching each field in the pattern so update the matches value to indicate this (skipping those that
107124
// are only there to serve exists:false processing)
108125
for _, endState := range states {
109-
if !endState.fields().existsFalseFailures.contains(x) {
110-
endState.addMatch(x)
111-
}
126+
endState.addMatch(x)
112127
}
113128
m.updateable.Store(freshStart)
114129

@@ -129,76 +144,100 @@ func (m *coreMatcher) matchesForJSONEvent(event []byte) ([]X, error) {
129144
if err != nil {
130145
return nil, err
131146
}
147+
148+
// see the commentary on coreMatcher for an explanation of this.
149+
// tl;dr: If the flattener returns no fields because there's nothing in the event that's mentioned in
150+
// any patterns, the event could still match if there are only "exists":false patterns.
151+
if len(fields) == 0 {
152+
fields = m.start().fakeField
153+
}
154+
132155
return m.matchesForFields(fields)
133156
}
134157

135-
// matchesForFields takes a list of Field structures and sorts them by pathname; the fields in a pattern to
136-
// matched are similarly sorted; thus running an automaton over them works
158+
// matchesForFields takes a list of Field structures, sorts them by pathname, and launches the field-matching
159+
// process. The fields in a pattern to match are similarly sorted; thus running an automaton over them works
137160
func (m *coreMatcher) matchesForFields(fields []Field) ([]X, error) {
138161
sort.Slice(fields, func(i, j int) bool { return string(fields[i].Path) < string(fields[j].Path) })
139-
return m.matchesForSortedFields(fields).matches(), nil
140-
}
162+
matches := newMatchSet()
141163

142-
// proposedTransition represents a suggestion that the name/value pair at fields[fieldIndex] might allow a transition
143-
// in the indicated state
144-
type proposedTransition struct {
145-
matcher *fieldMatcher
146-
fieldIndex int
164+
// for each of the fields, we'll try to match the automaton start state to that field - the tryToMatch
165+
// routine will, in the case that there's a match, call itself to see if subsequent fields after the
166+
// first matched will transition through the machine and eventually achieve a match
167+
s := m.start()
168+
for i := 0; i < len(fields); i++ {
169+
tryToMatch(fields, i, s.state, matches)
170+
}
171+
return matches.matches(), nil
147172
}
148173

149-
// matchesForSortedFields runs the provided list of name/value pairs against the automaton and returns
150-
// a possibly-empty list of the patterns that match
151-
func (m *coreMatcher) matchesForSortedFields(fields []Field) *matchSet {
152-
failedExistsFalseMatches := newMatchSet()
153-
matches := newMatchSet()
174+
// tryToMatch tries to match the field at fields[index] to the provided state. If it does match and generate
175+
// 1 or more transitions to other states, it calls itself recursively to see if any of the remaining fields
176+
// can continue the process by matching that state.
177+
func tryToMatch(fields []Field, index int, state *fieldMatcher, matches *matchSet) {
178+
stateFields := state.fields()
154179

155-
// The idea is that we add potential field transitions to the proposals list; any time such a transition
156-
// succeeds, i.e. matches a particular field and moves to a new state, we propose transitions from that
157-
// state on all the following fields in the event
158-
// Start by giving each field a chance to match against the start state. Doing it by pre-allocating the
159-
// proposals and filling in their values is observably faster than the more idiomatic append()
160-
proposals := make([]proposedTransition, len(fields))
161-
for i := range fields {
162-
proposals[i].fieldIndex = i
163-
proposals[i].matcher = m.start().state
180+
// transition on exists:true?
181+
existsTrans, ok := stateFields.existsTrue[string(fields[index].Path)]
182+
if ok {
183+
matches = matches.addXSingleThreaded(existsTrans.fields().matches...)
184+
for nextIndex := index + 1; nextIndex < len(fields); nextIndex++ {
185+
if noArrayTrailConflict(fields[index].ArrayTrail, fields[nextIndex].ArrayTrail) {
186+
tryToMatch(fields, nextIndex, existsTrans, matches)
187+
}
188+
}
164189
}
165190

166-
// as long as there are still potential transitions
167-
for len(proposals) > 0 {
168-
// go slices could usefully have a "pop" primitive
169-
lastIndex := len(proposals) - 1
170-
proposal := proposals[lastIndex]
171-
proposals = proposals[0:lastIndex]
191+
// an exists:false transition is possible if there is no matching field in the event
192+
// func checkExistsFalse(stateFields *fmFields, fields []Field, index int, matches *matchSet) {
193+
checkExistsFalse(stateFields, fields, index, matches)
172194

173-
// generate the possibly-empty list of transitions from state on the name/value pair
174-
nextStates := proposal.matcher.transitionOn(&fields[proposal.fieldIndex])
195+
// try to transition through the machine
196+
nextStates := state.transitionOn(&fields[index])
175197

176-
// for each state in the set of transitions from the proposed state
177-
for _, nextState := range nextStates {
178-
// if arriving at this state means we've matched one or more patterns, record that fact
179-
matches = matches.addXSingleThreaded(nextState.fields().matches...)
198+
// for each state in the possibly-empty list of transitions from this state on fields[index]
199+
for _, nextState := range nextStates {
200+
nextStateFields := nextState.fields()
201+
matches = matches.addXSingleThreaded(nextStateFields.matches...)
180202

181-
// have we invalidated a presumed exists:false pattern?
182-
for existsMatch := range nextState.fields().existsFalseFailures.set {
183-
failedExistsFalseMatches = failedExistsFalseMatches.addXSingleThreaded(existsMatch)
203+
// for each state we've transitioned to, give each subsequent field a chance to
204+
// transition on it, assuming it's not in an object that's in a different element
205+
// of the same array
206+
for nextIndex := index + 1; nextIndex < len(fields); nextIndex++ {
207+
if noArrayTrailConflict(fields[index].ArrayTrail, fields[nextIndex].ArrayTrail) {
208+
tryToMatch(fields, nextIndex, nextState, matches)
184209
}
210+
}
211+
// now we've run out of fields to match this nextState against. But suppose it has an exists:false
212+
// transition, and it so happens that the exists:false pattern field is lexically larger than the other
213+
// fields and that in fact such a field does not exist. That state would be left hanging. So…
214+
checkExistsFalse(nextStateFields, fields, index, matches)
215+
}
216+
}
185217

186-
// for each state we've transitioned to, give each subsequent field a chance to
187-
// transition on it, assuming it's not in an object that's in a different element
188-
// of the same array
189-
for nextIndex := proposal.fieldIndex + 1; nextIndex < len(fields); nextIndex++ {
190-
if noArrayTrailConflict(fields[proposal.fieldIndex].ArrayTrail, fields[nextIndex].ArrayTrail) {
191-
proposals = append(proposals, proposedTransition{fieldIndex: nextIndex, matcher: nextState})
218+
func checkExistsFalse(stateFields *fmFields, fields []Field, index int, matches *matchSet) {
219+
for existsFalsePath, existsFalseTrans := range stateFields.existsFalse {
220+
// it seems like there ought to be a more state-machine-idiomatic way to do this but
221+
// I thought of a few and none of them worked. Quite likely someone will figure it out eventually.
222+
var i int
223+
var thisFieldIsAnExistsFalse bool
224+
for i = 0; i < len(fields); i++ {
225+
if string(fields[i].Path) == existsFalsePath {
226+
if i == index {
227+
thisFieldIsAnExistsFalse = true
192228
}
229+
break
193230
}
194231
}
195-
}
196-
for presumedExistsFalseMatch := range m.start().presumedExistFalseMatches.set {
197-
if !failedExistsFalseMatches.contains(presumedExistsFalseMatch) {
198-
matches = matches.addXSingleThreaded(presumedExistsFalseMatch)
232+
if i == len(fields) {
233+
matches = matches.addXSingleThreaded(existsFalseTrans.fields().matches...)
234+
if thisFieldIsAnExistsFalse {
235+
tryToMatch(fields, index+1, existsFalseTrans, matches)
236+
} else {
237+
tryToMatch(fields, index, existsFalseTrans, matches)
238+
}
199239
}
200240
}
201-
return matches
202241
}
203242

204243
func noArrayTrailConflict(from []ArrayPos, to []ArrayPos) bool {

0 commit comments

Comments
 (0)