@@ -19,34 +19,48 @@ import (
19
19
)
20
20
21
21
// coreMatcher uses a finite automaton to implement the matchesForJSONEvent and MatchesForFields functions.
22
- // state is the start of the automaton
23
- // namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically,
24
- // patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider
25
- // fields that do not appear in patterns when using the automaton for matching
26
- // the updateable fields are grouped into the coreStart member so they can be updated atomically using atomic.Load()
22
+ // The updateable fields are grouped into the coreFields member so they can be updated atomically using atomic.Load()
27
23
// and atomic.Store(). This is necessary for coreMatcher to be thread-safe.
28
24
type coreMatcher struct {
29
- updateable atomic.Value // always holds a *coreStart
25
+ updateable atomic.Value // always holds a *coreFields
30
26
lock sync.Mutex
31
27
}
32
- type coreStart struct {
33
- state * fieldMatcher
34
- namesUsed map [string ]bool
35
- presumedExistFalseMatches * matchSet
28
+
29
+ // coreFields groups the updateable fields in coreMatcher.
30
+ // state is the start of the automaton.
31
+ // namesUsed is a map of field names that are used in any of the patterns that this automaton encodes. Typically,
32
+ // patterns only consider a subset of the fields in an incoming data object, and there is no reason to consider
33
+ // fields that do not appear in patterns when using the automaton for matching.
34
+ // fakeField is used when the flattener for an event returns no fields, because it could still match if
35
+ // there were patterns with "exists":false. So in this case we run one fake field through the matcher
36
+ // which will cause it to notice that any "exists":false patterns should match.
37
+ type coreFields struct {
38
+ state * fieldMatcher
39
+ namesUsed map [string ]bool
40
+ fakeField []Field
36
41
}
37
42
38
43
func newCoreMatcher () * coreMatcher {
44
+ // because of the way the matcher works, to serve its purpose of ensuring that "exists":false maches
45
+ // will be detected, the Path has to be lexically greater than any field path that appears in
46
+ // "exists":false. The value with byteCeiling works because that byte can't actually appear in any
47
+ // user-supplied path-name because it's not valid in UTF-8
48
+ fake := Field {
49
+ Path : []byte {byte (byteCeiling )},
50
+ Val : []byte ("" ),
51
+ ArrayTrail : []ArrayPos {{0 , 0 }},
52
+ }
39
53
m := coreMatcher {}
40
- m .updateable .Store (& coreStart {
41
- state : newFieldMatcher (),
42
- namesUsed : make (map [string ]bool ),
43
- presumedExistFalseMatches : newMatchSet () ,
54
+ m .updateable .Store (& coreFields {
55
+ state : newFieldMatcher (),
56
+ namesUsed : make (map [string ]bool ),
57
+ fakeField : [] Field { fake } ,
44
58
})
45
59
return & m
46
60
}
47
61
48
- func (m * coreMatcher ) start () * coreStart {
49
- return m .updateable .Load ().(* coreStart )
62
+ func (m * coreMatcher ) start () * coreFields {
63
+ return m .updateable .Load ().(* coreFields )
50
64
}
51
65
52
66
// addPattern - the patternBytes is a JSON object. The X is what the matcher returns to indicate that the
@@ -67,36 +81,39 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error {
67
81
defer m .lock .Unlock ()
68
82
69
83
// we build up the new coreMatcher state in freshStart so we can atomically switch it in once complete
70
- freshStart := & coreStart {}
84
+ freshStart := & coreFields {}
71
85
freshStart .namesUsed = make (map [string ]bool )
72
86
current := m .start ()
73
87
freshStart .state = current .state
88
+ freshStart .fakeField = current .fakeField
74
89
75
90
for k := range current .namesUsed {
76
91
freshStart .namesUsed [k ] = true
77
92
}
78
93
for used := range patternNamesUsed {
79
94
freshStart .namesUsed [used ] = true
80
95
}
81
- freshStart .presumedExistFalseMatches = newMatchSet ()
82
- for presumedExistsFalseMatch := range current .presumedExistFalseMatches .set {
83
- freshStart .presumedExistFalseMatches = freshStart .presumedExistFalseMatches .addX (presumedExistsFalseMatch )
84
- }
85
96
86
97
// now we add each of the name/value pairs in fields slice to the automaton, starting with the start state -
87
98
// the addTransition for a field returns a list of the fieldMatchers transitioned to for that name/val
88
99
// combo.
89
100
states := []* fieldMatcher {current .state }
90
101
for _ , field := range patternFields {
91
102
var nextStates []* fieldMatcher
92
- for _ , state := range states {
93
- ns := state .addTransition (field )
94
103
95
- // special handling for exists:false, in which case there can be only one val and one next state
96
- if field .vals [0 ].vType == existsFalseType {
97
- ns [0 ].addExistsFalseFailure (x )
98
- freshStart .presumedExistFalseMatches = freshStart .presumedExistFalseMatches .addX (x )
104
+ // separate handling for field exists:true/false and regular field name/val matches. Since the exists
105
+ // true/false are only allowed one value, we can test vals[0] to figure out which type
106
+ for _ , state := range states {
107
+ var ns []* fieldMatcher
108
+ switch field .vals [0 ].vType {
109
+ case existsTrueType :
110
+ ns = state .addExists (true , field )
111
+ case existsFalseType :
112
+ ns = state .addExists (false , field )
113
+ default :
114
+ ns = state .addTransition (field )
99
115
}
116
+
100
117
nextStates = append (nextStates , ns ... )
101
118
}
102
119
states = nextStates
@@ -106,9 +123,7 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error {
106
123
// by matching each field in the pattern so update the matches value to indicate this (skipping those that
107
124
// are only there to serve exists:false processing)
108
125
for _ , endState := range states {
109
- if ! endState .fields ().existsFalseFailures .contains (x ) {
110
- endState .addMatch (x )
111
- }
126
+ endState .addMatch (x )
112
127
}
113
128
m .updateable .Store (freshStart )
114
129
@@ -129,76 +144,100 @@ func (m *coreMatcher) matchesForJSONEvent(event []byte) ([]X, error) {
129
144
if err != nil {
130
145
return nil , err
131
146
}
147
+
148
+ // see the commentary on coreMatcher for an explanation of this.
149
+ // tl;dr: If the flattener returns no fields because there's nothing in the event that's mentioned in
150
+ // any patterns, the event could still match if there are only "exists":false patterns.
151
+ if len (fields ) == 0 {
152
+ fields = m .start ().fakeField
153
+ }
154
+
132
155
return m .matchesForFields (fields )
133
156
}
134
157
135
- // matchesForFields takes a list of Field structures and sorts them by pathname; the fields in a pattern to
136
- // matched are similarly sorted; thus running an automaton over them works
158
+ // matchesForFields takes a list of Field structures, sorts them by pathname, and launches the field-matching
159
+ // process. The fields in a pattern to match are similarly sorted; thus running an automaton over them works
137
160
func (m * coreMatcher ) matchesForFields (fields []Field ) ([]X , error ) {
138
161
sort .Slice (fields , func (i , j int ) bool { return string (fields [i ].Path ) < string (fields [j ].Path ) })
139
- return m .matchesForSortedFields (fields ).matches (), nil
140
- }
162
+ matches := newMatchSet ()
141
163
142
- // proposedTransition represents a suggestion that the name/value pair at fields[fieldIndex] might allow a transition
143
- // in the indicated state
144
- type proposedTransition struct {
145
- matcher * fieldMatcher
146
- fieldIndex int
164
+ // for each of the fields, we'll try to match the automaton start state to that field - the tryToMatch
165
+ // routine will, in the case that there's a match, call itself to see if subsequent fields after the
166
+ // first matched will transition through the machine and eventually achieve a match
167
+ s := m .start ()
168
+ for i := 0 ; i < len (fields ); i ++ {
169
+ tryToMatch (fields , i , s .state , matches )
170
+ }
171
+ return matches .matches (), nil
147
172
}
148
173
149
- // matchesForSortedFields runs the provided list of name/value pairs against the automaton and returns
150
- // a possibly-empty list of the patterns that match
151
- func ( m * coreMatcher ) matchesForSortedFields ( fields [] Field ) * matchSet {
152
- failedExistsFalseMatches := newMatchSet ()
153
- matches := newMatchSet ()
174
+ // tryToMatch tries to match the field at fields[index] to the provided state. If it does match and generate
175
+ // 1 or more transitions to other states, it calls itself recursively to see if any of the remaining fields
176
+ // can continue the process by matching that state.
177
+ func tryToMatch ( fields [] Field , index int , state * fieldMatcher , matches * matchSet ) {
178
+ stateFields := state . fields ()
154
179
155
- // The idea is that we add potential field transitions to the proposals list; any time such a transition
156
- // succeeds, i.e. matches a particular field and moves to a new state, we propose transitions from that
157
- // state on all the following fields in the event
158
- // Start by giving each field a chance to match against the start state. Doing it by pre-allocating the
159
- // proposals and filling in their values is observably faster than the more idiomatic append()
160
- proposals := make ([] proposedTransition , len ( fields ))
161
- for i := range fields {
162
- proposals [ i ]. fieldIndex = i
163
- proposals [ i ]. matcher = m . start (). state
180
+ // transition on exists:true?
181
+ existsTrans , ok := stateFields . existsTrue [ string ( fields [ index ]. Path )]
182
+ if ok {
183
+ matches = matches . addXSingleThreaded ( existsTrans . fields (). matches ... )
184
+ for nextIndex := index + 1 ; nextIndex < len ( fields ); nextIndex ++ {
185
+ if noArrayTrailConflict ( fields [ index ]. ArrayTrail , fields [ nextIndex ]. ArrayTrail ) {
186
+ tryToMatch ( fields , nextIndex , existsTrans , matches )
187
+ }
188
+ }
164
189
}
165
190
166
- // as long as there are still potential transitions
167
- for len (proposals ) > 0 {
168
- // go slices could usefully have a "pop" primitive
169
- lastIndex := len (proposals ) - 1
170
- proposal := proposals [lastIndex ]
171
- proposals = proposals [0 :lastIndex ]
191
+ // an exists:false transition is possible if there is no matching field in the event
192
+ // func checkExistsFalse(stateFields *fmFields, fields []Field, index int, matches *matchSet) {
193
+ checkExistsFalse (stateFields , fields , index , matches )
172
194
173
- // generate the possibly-empty list of transitions from state on the name/value pair
174
- nextStates := proposal . matcher . transitionOn (& fields [proposal . fieldIndex ])
195
+ // try to transition through the machine
196
+ nextStates := state . transitionOn (& fields [index ])
175
197
176
- // for each state in the set of transitions from the proposed state
177
- for _ , nextState := range nextStates {
178
- // if arriving at this state means we've matched one or more patterns, record that fact
179
- matches = matches .addXSingleThreaded (nextState . fields () .matches ... )
198
+ // for each state in the possibly-empty list of transitions from this state on fields[index]
199
+ for _ , nextState := range nextStates {
200
+ nextStateFields := nextState . fields ()
201
+ matches = matches .addXSingleThreaded (nextStateFields .matches ... )
180
202
181
- // have we invalidated a presumed exists:false pattern?
182
- for existsMatch := range nextState .fields ().existsFalseFailures .set {
183
- failedExistsFalseMatches = failedExistsFalseMatches .addXSingleThreaded (existsMatch )
203
+ // for each state we've transitioned to, give each subsequent field a chance to
204
+ // transition on it, assuming it's not in an object that's in a different element
205
+ // of the same array
206
+ for nextIndex := index + 1 ; nextIndex < len (fields ); nextIndex ++ {
207
+ if noArrayTrailConflict (fields [index ].ArrayTrail , fields [nextIndex ].ArrayTrail ) {
208
+ tryToMatch (fields , nextIndex , nextState , matches )
184
209
}
210
+ }
211
+ // now we've run out of fields to match this nextState against. But suppose it has an exists:false
212
+ // transition, and it so happens that the exists:false pattern field is lexically larger than the other
213
+ // fields and that in fact such a field does not exist. That state would be left hanging. So…
214
+ checkExistsFalse (nextStateFields , fields , index , matches )
215
+ }
216
+ }
185
217
186
- // for each state we've transitioned to, give each subsequent field a chance to
187
- // transition on it, assuming it's not in an object that's in a different element
188
- // of the same array
189
- for nextIndex := proposal .fieldIndex + 1 ; nextIndex < len (fields ); nextIndex ++ {
190
- if noArrayTrailConflict (fields [proposal .fieldIndex ].ArrayTrail , fields [nextIndex ].ArrayTrail ) {
191
- proposals = append (proposals , proposedTransition {fieldIndex : nextIndex , matcher : nextState })
218
+ func checkExistsFalse (stateFields * fmFields , fields []Field , index int , matches * matchSet ) {
219
+ for existsFalsePath , existsFalseTrans := range stateFields .existsFalse {
220
+ // it seems like there ought to be a more state-machine-idiomatic way to do this but
221
+ // I thought of a few and none of them worked. Quite likely someone will figure it out eventually.
222
+ var i int
223
+ var thisFieldIsAnExistsFalse bool
224
+ for i = 0 ; i < len (fields ); i ++ {
225
+ if string (fields [i ].Path ) == existsFalsePath {
226
+ if i == index {
227
+ thisFieldIsAnExistsFalse = true
192
228
}
229
+ break
193
230
}
194
231
}
195
- }
196
- for presumedExistsFalseMatch := range m .start ().presumedExistFalseMatches .set {
197
- if ! failedExistsFalseMatches .contains (presumedExistsFalseMatch ) {
198
- matches = matches .addXSingleThreaded (presumedExistsFalseMatch )
232
+ if i == len (fields ) {
233
+ matches = matches .addXSingleThreaded (existsFalseTrans .fields ().matches ... )
234
+ if thisFieldIsAnExistsFalse {
235
+ tryToMatch (fields , index + 1 , existsFalseTrans , matches )
236
+ } else {
237
+ tryToMatch (fields , index , existsFalseTrans , matches )
238
+ }
199
239
}
200
240
}
201
- return matches
202
241
}
203
242
204
243
func noArrayTrailConflict (from []ArrayPos , to []ArrayPos ) bool {
0 commit comments