From 7366c95431c3010f6cb4ee8f02cfb0ec79367c06 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Mon, 9 Sep 2024 14:36:09 -0700 Subject: [PATCH 1/3] pat: add wildcard pattern addresses: #322 Signed-off-by: Tim Bray --- PATTERNS.md | 40 +++++++-- README.md | 10 +-- pattern.go | 3 + pattern_test.go | 40 ++++++++- value_matcher.go | 12 +++ value_matcher_test.go | 72 ++++++++++++++++ wildcard.go | 131 +++++++++++++++++++++++++++++ wildcard_test.go | 191 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 485 insertions(+), 14 deletions(-) create mode 100644 wildcard.go create mode 100644 wildcard_test.go diff --git a/PATTERNS.md b/PATTERNS.md index efa5e8b..fe27f94 100644 --- a/PATTERNS.md +++ b/PATTERNS.md @@ -149,9 +149,9 @@ is not equal to any of the strings in the array. If a Field in a Pattern contains an Anything-But Pattern, it **MUST NOT** contain any other values. -### Shellstyle Pattern +### Wildcard Pattern -The Pattern Type of a Shellstyle Pattern is `shellstyle` +The Pattern Type of a Wildcard Pattern is `wildcard` and its value **MUST** be a string which **MAY** contain `*` (“star”) characters. The star character functions exactly as the same character does in @@ -164,13 +164,37 @@ Consider the following Event: ```json {"img": "https://example.com/9943.jpg"} ``` -The following Shellstyle Patterns would match it: +The following Wildcard Patterns would match it: +```json +{"img": [ {"wildcard": "*.jpg"} ] } +{"img": [ {"wildcard": "https://example.com/*"} ] } +{"img": [ {"wildcard": "https://example.com/*.jpg"} ] } +{"img": [ {"wildcard": "https://example.*/*.jpg"} ] } +``` + +If it is desired to match the actual character "*", it may be “escaped” +with backslash, "\". For example, consider the following Event: + ```json -{"img": [ {"shellstyle": "*.jpg"} ] } -{"img": [ {"shellstyle": "https://example.com/*"} ] } -{"img": [ {"shellstyle": "https://example.com/*.jpg"} ] } -{"img": [ {"shellstyle": "https://example.*/*.jpg"} ] } +{"example-regex": "a**\\.b"} ``` + +The following Wildcard pattern would match it. + +```json +{"example-regex": [ {"wildcard": "a\\*\\*\\\\.b"}]} +``` + +Note that the "\" backslashes must be doubled to deal with the +fact that they are escape characters for JSON as well as for Quamina. + +After a "\", the appearance of any character other than "*" or "\" is an error. + +### Shellstyle Pattern + +This is an earlier version of the Wildcard pattern, differing only that +\-escaping the "*" and "\" characters is not supported. + ### Equals-Ignore-Case Pattern The Pattern Type of an Equals-Ignore-Case pattern is `equals-ignore-case` @@ -192,6 +216,6 @@ the AWS EventBridge service, as documented in As of release 1.0, Quamina supports Exists and Anything-But Patterns, but does not yet support any other EventBridge patterns. Note that a -Shellstyle Pattern with a trailing `*` is equivalent +Wildcard Pattern with a trailing `*` is equivalent to a `prefix` pattern. diff --git a/README.md b/README.md index 88c8fc6..bd097e5 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ The following Patterns would match it: ```json { "Image": { - "Thumbnail": { "Url": [ { "shellstyle": "*9943" } ] } + "Thumbnail": { "Url": [ { "wildcard": "*9943" } ] } } } ``` @@ -97,7 +97,7 @@ The following Patterns would match it: { "Image": { "Thumbnail": { "Url": - [ { "shellstyle": "http://www.example.com/*" } ] } + [ { "wildcard": "http://www.example.com/*" } ] } } } ``` @@ -105,7 +105,7 @@ The following Patterns would match it: { "Image": { "Thumbnail": { "Url": - [ { "shellstyle": "http://www.example.*/*9943" } ] } + [ { "wildcard": "http://www.example.*/*9943" } ] } } } ``` @@ -298,7 +298,7 @@ I used to say that the performance of `MatchesForEvent` was O(1) in the number of Patterns. That’s probably a reasonable way to think about it, because it’s *almost* right, except in the -case where a very large number of `shellstyle` patterns +case where a very large number of `wildcard` patterns have been added; this is discussed in the next section. To be correct, the performance is a little worse than @@ -342,7 +342,7 @@ So, adding a new Pattern that only mentions fields which are already mentioned in previous Patterns is effectively free, i.e. O(1) in terms of run-time performance. -### Quamina instances with large numbers of `shellstyle` Patterns +### Quamina instances with large numbers of `wildcard` Patterns A study of the theory of finite automata reveals that processing regular-expression constructs such as `*` increases the complexity of diff --git a/pattern.go b/pattern.go index c9a51b2..2339d31 100644 --- a/pattern.go +++ b/pattern.go @@ -21,6 +21,7 @@ const ( anythingButType prefixType monocaseType + wildcardType ) // typedVal represents the value of a field in a pattern, giving the value and the type of pattern. @@ -198,6 +199,8 @@ func readSpecialPattern(pb *patternBuild, valsIn []typedVal) (pathVals []typedVa pathVals, err = readExistsSpecial(pb, pathVals) case "shellstyle": pathVals, err = readShellStyleSpecial(pb, pathVals) + case "wildcard": + pathVals, err = readWildcardSpecial(pb, pathVals) case "prefix": pathVals, err = readPrefixSpecial(pb, pathVals) case "equals-ignore-case": diff --git a/pattern_test.go b/pattern_test.go index b27b554..04b6e7f 100644 --- a/pattern_test.go +++ b/pattern_test.go @@ -69,6 +69,10 @@ func TestPatternFromJSON(t *testing.T) { `{"abc": [ {"prefix": - "a" }, "foo" ] }`, `{"abc": [ {"prefix": "a" {, "foo" ] }`, `{"abc": [ {"equals-ignore-case":23}, "foo" ] }`, + `{"abc": [ {"wildcard":"15", "x", 1} ] }`, + `{"abc": [ {"wildcard":"a**b"}, "foo" ] }`, + `{"abc": [ {"wildcard":"a\\b"}, "foo" ] }`, // after JSON parsing, code sees `a/b` + `{"abc": [ {"wildcard":"a\\"}, "foo" ] }`, // after JSON parsing, code sees `a\` "{\"a\": [ { \"anything-but\": { \"equals-ignore-case\": [\"1\", \"2\" \"3\"] } } ] }", // missing , "{\"a\": [ { \"anything-but\": { \"equals-ignore-case\": [1, 2, 3] } } ] }", // no numbers "{\"a\": [ { \"anything-but\": { \"equals-ignore-case\": [\"1\", \"2\" } } ] }", // missing ] @@ -93,6 +97,10 @@ func TestPatternFromJSON(t *testing.T) { `{"abc": [ {"shellstyle":"a*b"}, "foo" ] }`, `{"abc": [ {"shellstyle":"a*b*c"} ] }`, `{"x": [ {"equals-ignore-case":"a*b*c"} ] }`, + `{"abc": [ 3, {"wildcard":"a*b"} ] }`, + `{"abc": [ {"wildcard":"a*b"}, "foo" ] }`, + `{"abc": [ {"wildcard":"a*b*c"} ] }`, + `{"abc": [ {"wildcard":"a*b\\*c"} ] }`, } w1 := []*patternField{{path: "x", vals: []typedVal{{vType: numberType, val: "2"}}}} w2 := []*patternField{{path: "x", vals: []typedVal{ @@ -156,7 +164,37 @@ func TestPatternFromJSON(t *testing.T) { }, }, } - wanted := [][]*patternField{w1, w2, w3, w4, w5, w6, w7, w8, w9} + w10 := []*patternField{ + { + path: "abc", vals: []typedVal{ + {vType: stringType, val: "3"}, + {vType: wildcardType, val: `"a*b"`}, + }, + }, + } + w11 := []*patternField{ + { + path: "abc", vals: []typedVal{ + {vType: wildcardType, val: `"a*b"`}, + {vType: stringType, val: `"foo"`}, + }, + }, + } + w12 := []*patternField{ + { + path: "abc", vals: []typedVal{ + {vType: wildcardType, val: `"a*b*c"`}, + }, + }, + } + w13 := []*patternField{ + { + path: "abc", vals: []typedVal{ + {vType: wildcardType, val: `"a*b\*c"`}, + }, + }, + } + wanted := [][]*patternField{w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13} for i, good := range goods { fields, err := patternFromJSON([]byte(good)) diff --git a/value_matcher.go b/value_matcher.go index 52d5541..118b41e 100644 --- a/value_matcher.go +++ b/value_matcher.go @@ -116,6 +116,9 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche case shellStyleType: newFA, nextField = makeShellStyleFA(valBytes, printer) fields.isNondeterministic = true + case wildcardType: + newFA, nextField = makeWildcardFA(valBytes, printer) + fields.isNondeterministic = true case prefixType: newFA, nextField = makePrefixFA(valBytes) case monocaseType: @@ -156,6 +159,12 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche fields.isNondeterministic = true m.update(fields) return nextField + case wildcardType: + newAutomaton, nextField := makeWildcardFA(valBytes, printer) + fields.startTable = newAutomaton + fields.isNondeterministic = true + m.update(fields) + return nextField case prefixType: newFA, nextField := makePrefixFA(valBytes) fields.startTable = newFA @@ -194,6 +203,9 @@ func (m *valueMatcher) addTransition(val typedVal, printer printer) *fieldMatche case shellStyleType: newFA, nextField = makeShellStyleFA(valBytes, printer) fields.isNondeterministic = true + case wildcardType: + newFA, nextField = makeWildcardFA(valBytes, printer) + fields.isNondeterministic = true case prefixType: newFA, nextField = makePrefixFA(valBytes) case monocaseType: diff --git a/value_matcher_test.go b/value_matcher_test.go index 1f86b4d..9661de3 100644 --- a/value_matcher_test.go +++ b/value_matcher_test.go @@ -347,3 +347,75 @@ func TestMakeFAFragment(t *testing.T) { } } } +func TestExerciseSingletonReplacement(t *testing.T) { + cm := newCoreMatcher() + err := cm.addPattern("x", `{"x": [ "a"]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + err = cm.addPattern("x", `{"x": [1]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + events := []string{`{"x": 1}`, `{"x": "a"}`} + for _, e := range events { + matches, err := cm.matchesForJSONEvent([]byte(e)) + if err != nil { + t.Error("m4: " + err.Error()) + } + if len(matches) != 1 || matches[0] != "x" { + t.Error("match failed on: " + e) + } + } + events = []string{`{"x": 1}`, `{"x": "a"}`} + for _, e := range events { + matches, err := cm.matchesForJSONEvent([]byte(e)) + if err != nil { + t.Error("m4: " + err.Error()) + } + if len(matches) != 1 || matches[0] != "x" { + t.Error("match failed on: " + e) + } + } + cm = newCoreMatcher() + err = cm.addPattern("x", `{"x": ["x"]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + err = cm.addPattern("x", `{"x": [ {"wildcard": "x*y"}]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + events = []string{`{"x": "x"}`, `{"x": "x..y"}`} + for _, e := range events { + matches, err := cm.matchesForJSONEvent([]byte(e)) + if err != nil { + t.Error("m4: " + err.Error()) + } + if len(matches) != 1 || matches[0] != "x" { + t.Error("match failed on: " + e) + } + } +} + +func TestMergeNfaAndNumeric(t *testing.T) { + cm := newCoreMatcher() + err := cm.addPattern("x", `{"x": [{"wildcard":"x*y"}]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + err = cm.addPattern("x", `{"x": [3]}`) + if err != nil { + t.Error("AP: " + err.Error()) + } + events := []string{`{"x": 3}`, `{"x": "xasdfy"}`} + for _, e := range events { + matches, err := cm.matchesForJSONEvent([]byte(e)) + if err != nil { + t.Error("M4: " + err.Error()) + } + if len(matches) != 1 || matches[0] != "x" { + t.Error("Match failed on " + e) + } + } +} diff --git a/wildcard.go b/wildcard.go new file mode 100644 index 0000000..cba6a2e --- /dev/null +++ b/wildcard.go @@ -0,0 +1,131 @@ +package quamina + +import ( + "encoding/json" + "errors" + "fmt" +) + +type wcState int + +const ( + wcChilling wcState = iota + wcAfterBS + wcAfterGlob +) + +func readWildcardSpecial(pb *patternBuild, valsIn []typedVal) ([]typedVal, error) { + t, err := pb.jd.Token() + if err != nil { + return nil, err + } + pathVals := valsIn + wcInput, ok := t.(string) + if !ok { + return nil, errors.New("value for `wildcard` must be a string") + } + inBytes := []byte(wcInput) + state := wcChilling + for i, b := range inBytes { + switch state { + case wcChilling: + switch b { + case '\\': + if i == len(inBytes)-1 { + return nil, errors.New("'\\' at end of string not allowed") + } + state = wcAfterBS + case '*': + state = wcAfterGlob + } + case wcAfterBS: + switch b { + case '\\', '*': + state = wcChilling + default: + return nil, errors.New("`\\` can only be followed by '\\' or '*'") + } + case wcAfterGlob: + switch b { + case '*': + return nil, fmt.Errorf("adjacent '*' characters not allowed") + case '\\': + state = wcAfterBS + default: + state = wcChilling + } + } + } + pathVals = append(pathVals, typedVal{vType: wildcardType, val: `"` + wcInput + `"`}) + + t, err = pb.jd.Token() + if err != nil { + return nil, err + } + switch t.(type) { + case json.Delim: + // } is all that will be returned + default: + return nil, errors.New("trailing garbage in wildcard pattern") + } + + return pathVals, nil +} + +// makeWildcardFA is a replacement for shellstyle patterns, the only difference being that escaping is +// provided for * and \. +func makeWildcardFA(val []byte, printer printer) (start *smallTable, nextField *fieldMatcher) { + table := newSmallTable() + start = table + nextField = newFieldMatcher() + + // for each byte in the pattern. \-escape processing is simplified because illegal constructs such as \a and \ + // at the end of the value have been rejected by readWildcardSpecial. + valIndex := 0 + for valIndex < len(val) { + ch := val[valIndex] + escaped := ch == '\\' + if escaped { + valIndex++ + ch = val[valIndex] + } + if ch == '*' && !escaped { + // special-case handling for string ending in '*"' - transition to field match on any character. + // we know the trailing '"' will be there because of JSON syntax. We could use an epsilon state + // but then the matcher will process through all the rest of the bytes, when it doesn't need to + if valIndex == len(val)-2 { + step := &faState{ + table: newSmallTable(), + fieldTransitions: []*fieldMatcher{nextField}, + } + table.epsilon = []*faState{step} + printer.labelTable(table, fmt.Sprintf("prefix escape at %d", valIndex)) + return + } + globStep := &faState{table: table} + printer.labelTable(table, fmt.Sprintf("gS at %d", valIndex)) + table.epsilon = []*faState{globStep} + + valIndex++ + // ** is forbidden, if we're seeing *\* then the second * is non-magic, if we're seeing *\\, it + // just means \, so either way, all we need to do is hop over this \ + if val[valIndex] == '\\' { + valIndex++ + } + globNext := &faState{table: newSmallTable()} + printer.labelTable(globNext.table, fmt.Sprintf("gX on %c at %d", val[valIndex], valIndex)) + table.addByteStep(val[valIndex], &faNext{states: []*faState{globNext}}) + table = globNext.table + } else { + nextStep := &faState{table: newSmallTable()} + printer.labelTable(nextStep.table, fmt.Sprintf("on %c at %d", val[valIndex], valIndex)) + table.addByteStep(ch, &faNext{states: []*faState{nextStep}}) + table = nextStep.table + } + valIndex++ + } + lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}} + printer.labelTable(lastStep.table, fmt.Sprintf("last step at %d", valIndex)) + table.addByteStep(valueTerminator, &faNext{states: []*faState{lastStep}}) + return +} diff --git a/wildcard_test.go b/wildcard_test.go new file mode 100644 index 0000000..45a4b11 --- /dev/null +++ b/wildcard_test.go @@ -0,0 +1,191 @@ +package quamina + +import ( + "fmt" + "testing" +) + +func TestWildcardSyntax(t *testing.T) { + cm := newCoreMatcher() + busted1 := `{"x": [{"wildcard": . }]}` + err := cm.addPattern("x", busted1) + if err == nil { + t.Error("accepted " + busted1) + } + busted2 := `{"x": [{"wildcard": 3}]}` + err = cm.addPattern("x", busted2) + if err == nil { + t.Error("accepted " + busted2) + } + busted3 := `{"x": [{"wildcard": "x" ]}` + err = cm.addPattern("x", busted3) + if err == nil { + t.Error("accepted " + busted3) + } +} + +// these tests copied with thanks from aws/event-ruler - didn't grab them all, just too many for my poor fingers +func TestWildcardMatching(t *testing.T) { + exercisePattern(t, "*", []string{"", "*", "h", "hello"}, []string{}) + exercisePattern(t, "*hello", []string{"hello", "hhello", "xxxhello", "*hello", "23Őzhello"}, []string{"", "ello", "hellx", "xhellx", "hell5rHGGHo"}) + exercisePattern(t, "h*llo", []string{"hllo", "hello", "hxxxllo", "hel23Őzlllo"}, []string{"", "hlo", "hll", "hellol", "hel5rHGGHlo"}) + exercisePattern(t, "hel*o", []string{"helo", "hello", "helxxxo", "hel23Őzlllo"}, []string{"", "hell", "helox", "hellox", "hel5rHGGHe"}) + exercisePattern(t, "hello*", []string{"hello", "hellox", "hellooo", "hello*", "hello23Őzlllo"}, []string{"", "hell", "hellx", "hellxo", "hol5rHGGHo"}) + exercisePattern(t, "h*l*o", []string{"hlo", "helo", "hllo", "hloo", "hello", "hxxxlxxxo", "h*l*o", "hel*o", "h*llo", "hel23Őzlllo"}, []string{"", "ho", "heeo", "helx", "llo", "hex5rHGGHo"}) + exercisePattern(t, "he*l*", []string{"hel", "hexl", "helx", "helxx", "helxl", "helxlx", "helxxl", "helxxlxx", "helxxlxxl"}, []string{"", "he", "hex", "hexxx"}) + exercisePattern(t, "*l*", []string{"l", "xl", "lx", "xlx", "xxl", "lxx", "xxlxx", "xlxlxlxlxl", "lxlxlxlxlx"}, []string{"", "x", "xx", "xtx"}) + exercisePattern(t, `hel\\*o`, []string{"hel*o"}, []string{"helo", "hello"}) + exercisePattern(t, `he\\**o`, []string{"he*o", "he*llo", "he*hello"}, []string{"heo", "helo", "hello", "he*l"}) + exercisePattern(t, `he\\\\llo`, []string{"he\\\\llo"}, []string{"hello", "he\\llo"}) + exercisePattern(t, `he\\\\\\*llo`, []string{`he\\*llo`}, []string{`hello`, `he\\\\llo`, `he\\llo`, `he\\xxllo`}) + exercisePattern(t, `he\\\\*llo`, []string{`he\\llo`, `he\\*llo`, `he\\\\llo`, `he\\\\\\llo`, `he\\xxllo`}, []string{`hello`, `he\\ll`}) + exerciseMultiPatterns(t, nil, []pwanted{ + {`{"x":[{"wildcard": "*"}]}`, []string{"", "*", "h", "ho", "hello"}}, + {`{"x":[{"wildcard": "h*o"}]}`, []string{"ho", "hello"}}, + {`{"x":["hello"]}`, []string{"hello"}}}) + exerciseMultiPatterns(t, []string{"", "hellox", "blahabc"}, []pwanted{ + {`{"x":[{"wildcard": "*hello"}]}`, []string{"hello", "xhello", "hehello"}}, + {`{"x":["abc"]}`, []string{"abc"}}}) + exerciseMultiPatterns(t, []string{"", "h", "ello", "hel", "hlo", "hell"}, []pwanted{ + {`{"x":[{"wildcard": "*hello"}]}`, []string{"hello", "xhello", "hehello"}}, + {`{"x":[{"wildcard": "h*llo"}]}`, []string{"hllo", "hello", "hehello"}}}) + exerciseMultiPatterns(t, []string{"", "h", "ello", "hel", "heo", "hell"}, []pwanted{ + {`{"x":[{"wildcard": "*hello"}]}`, []string{"hello", "xhello", "hehello"}}, + {`{"x":[{"wildcard": "he*lo"}]}`, []string{"helo", "hello", "hehello"}}}) + exerciseMultiPatterns(t, []string{"", "e", "l", "lo", "hel"}, []pwanted{ + {`{"x":[{"wildcard": "*elo"}]}`, []string{"elo", "helo", "xhelo"}}, + {`{"x":[{"wildcard": "e*l*"}]}`, []string{"el", "elo", "exl", "elx", "exlx", "exxl", "elxx", "exxlxx"}}}) + exerciseMultiPatterns(t, []string{"", "he", "hexxo", "ello"}, []pwanted{ + {`{"x":[{"wildcard": "*hello"}]}`, []string{"hello", "xhello", "xxhello"}}, + {`{"x":[{"wildcard": "he*l*"}]}`, []string{"hel", "hello", "helo", "hexl", "hexlx", "hexxl", "helxx", "hexxlxx"}}}) + exerciseMultiPatterns(t, []string{"", "hlo", "heo", "hllol", "helol"}, []pwanted{ + {`{"x":[{"wildcard": "h*llo"}]}`, []string{"hllo", "hello", "hxxxllo", "hexxxllo"}}, + {`{"x":[{"wildcard": "he*lo"}]}`, []string{"helo", "hello", "hexxxlo", "hexxxllo"}}}) + exerciseMultiPatterns(t, []string{"", "hlox", "hllo", "helo", "heox", "helx", "hellx", "helloxx", "heloxx"}, []pwanted{ + {`{"x":[{"wildcard": "h*llox"}]}`, []string{"hllox", "hellox", "hxxxllox", "helhllox", "hheloxllox"}}, + {`{"x":[{"wildcard": "hel*ox"}]}`, []string{"helox", "hellox", "helxxxox", "helhllox", "helhlloxox"}}}) + exerciseMultiPatterns(t, []string{"", "h", "he", "hl", "el", "hlo", "llo", "hllol", "hxll", "hexxx"}, []pwanted{ + {`{"x":[{"wildcard": "h*llo"}]}`, []string{"hllo", "hello", "hxxxllo", "hexxxllo", "hexxxlllo"}}, + {`{"x":[{"wildcard": "he*l*"}]}`, []string{"hel", "helo", "hexl", "hello", "helol", "hexxxlo", "hexxxllo", "hexxxlllo"}}}) + exerciseMultiPatterns(t, []string{"", "h", "hex", "hl", "exl", "hxlo", "xllo", "hxllol", "hxxll", "hexxx"}, []pwanted{ + {`{"x":[{"wildcard": "h*xllo"}]}`, []string{"hxllo", "hexllo", "hxxxllo", "hexxxllo"}}, + {`{"x":[{"wildcard": "hex*l*"}]}`, []string{"hexl", "hexlo", "hexxl", "hexllo", "hexlol", "hexxxlo", "hexxxllo", "hexxxlllo"}}}) + exerciseMultiPatterns(t, []string{"", "hel", "heo", "hlo", "hellxox"}, []pwanted{ + {`{"x":[{"wildcard": "he*lo"}]}`, []string{"helo", "hello", "hexxxlo", "helxxxlo"}}, + {`{"x":[{"wildcard": "hel*o"}]}`, []string{"helo", "hello", "hellxo", "helxxxo", "helxxxlo"}}}) + exerciseMultiPatterns(t, []string{"", "hlo", "hll", "hel", "helox"}, []pwanted{ + {`{"x":[{"wildcard": "h*llo"}]}`, []string{"hllo", "hello", "hxxxllo", "helllo"}}, + {`{"x":[{"wildcard": "hel*o"}]}`, []string{"helo", "hello", "helxo", "helllo"}}}) + exerciseMultiPatterns(t, []string{"", "he", "hel", "helox", "helx", "hxlo"}, []pwanted{ + {`{"x":[{"wildcard": "he*lo"}]}`, []string{"helo", "hello", "helllo", "helxlo"}}, + {`{"x":[{"wildcard": "hell*"}]}`, []string{"hell", "hello", "helllo", "hellx", "hellxxx"}}}) + exerciseMultiPatterns(t, []string{"", "hel", "helox", "helxox", "hexo"}, []pwanted{ + {`{"x":[{"wildcard": "hel*o"}]}`, []string{"helo", "hello", "helllo", "hellloo", "helloo", "heloo"}}, + {`{"x":[{"wildcard": "hell*"}]}`, []string{"hell", "hello", "helllo", "hellloo", "helloo", "hellox"}}}) + exerciseMultiPatterns(t, []string{"", "he", "hex", "hexlo"}, []pwanted{ + {`{"x":[{"wildcard": "hel*"}]}`, []string{"hel", "helx", "hello", "hellox"}}, + {`{"x":[{"wildcard": "hello*"}]}`, []string{"hello", "hellox"}}}) + exerciseMultiPatterns(t, []string{"", "he", "hex", "hexlo"}, []pwanted{ + {`{"x":[{"wildcard": "*hello"}]}`, []string{"hello", "hhello", "hhhello"}}, + {`{"x":["hello"]}`, []string{"hello"}}}) + exerciseMultiPatterns(t, []string{"", "he", "hel", "heo", "heloz", "hellox", "heloxo"}, []pwanted{ + {`{"x":[{"wildcard": "he*lo"}]}`, []string{"helo", "hello", "helllo"}}, + {`{"x":["helox"]}`, []string{"helox"}}}) + exerciseMultiPatterns(t, []string{"", "he", "helx", "helo", "hexlx", "hellox", "heloxx"}, []pwanted{ + {`{"x":[{"wildcard": "he*l"}]}`, []string{"hel", "hexl", "hexxxl"}}, + {`{"x":["helox"]}`, []string{"helox"}}}) + exerciseMultiPatterns(t, []string{"", "h", "hxlox", "hxelox"}, []pwanted{ + {`{"x":[{"wildcard": "he*"}]}`, []string{"he", "helo", "helox", "heloxx"}}, + {`{"x":["helox"]}`, []string{"helox"}}}) + exerciseMultiPatterns(t, []string{"", "h", "he", "hel", "hexxo", "hexxohexxo"}, []pwanted{ + {`{"x":[{"wildcard": "h*l*o"}]}`, []string{"hlo", "helo", "hllo", "hello", "hexloo", "hellohello", "hellohellxo"}}, + {`{"x":["hellohello"]}`, []string{"hellohello"}}}) + exerciseMultiPatterns(t, []string{"", "h", "he", "hlo", "hexxo", "hexxohexxo"}, []pwanted{ + {`{"x":[{"wildcard": "he*l*"}]}`, []string{"hel", "helo", "hexl", "hello", "hexloo", "hellohellx", "hellohello"}}, + {`{"x":["hellohello"]}`, []string{"hellohello"}}}) +} + +func TestWildcardInvalidEscape(t *testing.T) { + cm := newCoreMatcher() + goods := []string{ + `he*\\**`, + } + bads := []string{ + `he\\llo`, `foo**bar`, `**f`, `x**`, `x\\`, + } + for _, good := range goods { + pattern := fmt.Sprintf(`{"x": [{"wildcard": "%s"}]}`, good) + err := cm.addPattern("x", pattern) + if err != nil { + t.Error("rejected \\:", good) + } + } + for _, bad := range bads { + pattern := fmt.Sprintf(`{"x": [{"wildcard": "%s"}]}`, bad) + err := cm.addPattern("x", pattern) + if err == nil { + t.Error("Allowed bad \\:", bad) + } + } +} + +type pwanted struct { + pattern string + wanted []string +} + +func exerciseMultiPatterns(t *testing.T, nos []string, pws []pwanted) { + t.Helper() + cm := newCoreMatcher() + for _, pw := range pws { + err := cm.addPattern(pw.pattern, pw.pattern) + if err != nil { + t.Errorf("Addpattern %s: %s", pw.pattern, err.Error()) + } + } + for _, pw := range pws { + for _, want := range pw.wanted { + event := fmt.Sprintf(`{"x":"%s"}`, want) + matches, _ := cm.matchesForJSONEvent([]byte(event)) + var i int + for i = 0; i < len(matches); i++ { + if matches[i] == pw.pattern { + break + } + } + if i == len(matches) { + t.Errorf("event [%s] didn't match pattern[%s]", event, pw.pattern) + } + } + } + for _, n := range nos { + event := fmt.Sprintf(`{"x": "%s"}`, n) + matches, _ := cm.matchesForJSONEvent([]byte(event)) + if len(matches) != 0 { + t.Errorf("%s did match", n) + } + } +} + +func exercisePattern(t *testing.T, pattern string, yes []string, no []string) { + t.Helper() + cm := newCoreMatcher() + err := cm.addPattern(pattern, fmt.Sprintf(`{"x": [ {"wildcard": "%s"}]}`, pattern)) + if err != nil { + t.Errorf("Addpattern %s: %s", pattern, err.Error()) + } + for _, y := range yes { + event := fmt.Sprintf(`{"x": "%s"}`, y) + matches, _ := cm.matchesForJSONEvent([]byte(event)) + if len(matches) != 1 || matches[0] != pattern { + t.Errorf("[%s] doesn't match %s", y, pattern) + } + } + for _, n := range no { + event := fmt.Sprintf(`{"x": "%s"}`, n) + matches, _ := cm.matchesForJSONEvent([]byte(event)) + if len(matches) != 0 { + t.Errorf("%s did match %s", n, pattern) + } + } +} From c9c0223389be8abb39e473a09c72f6a728896064 Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Mon, 9 Sep 2024 14:45:24 -0700 Subject: [PATCH 2/3] address lint complaints Signed-off-by: Tim Bray --- prettyprinter.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prettyprinter.go b/prettyprinter.go index be208f9..af179e8 100644 --- a/prettyprinter.go +++ b/prettyprinter.go @@ -38,18 +38,18 @@ var sharedNullPrinter = &nullPrinter{} type prettyPrinter struct { randInts rand.Source tableLabels map[*smallTable]string - tableSerials map[*smallTable]uint + tableSerials map[*smallTable]uint64 } func newPrettyPrinter(seed int) *prettyPrinter { return &prettyPrinter{ randInts: rand.NewSource(int64(seed)), tableLabels: make(map[*smallTable]string), - tableSerials: make(map[*smallTable]uint), + tableSerials: make(map[*smallTable]uint64), } } -func (pp *prettyPrinter) tableSerial(t *smallTable) uint { +func (pp *prettyPrinter) tableSerial(t *smallTable) uint64 { return pp.tableSerials[t] } func (pp *prettyPrinter) tableLabel(t *smallTable) string { @@ -58,7 +58,7 @@ func (pp *prettyPrinter) tableLabel(t *smallTable) string { func (pp *prettyPrinter) labelTable(table *smallTable, label string) { pp.tableLabels[table] = label - pp.tableSerials[table] = uint(pp.randInts.Int63()%500 + 500) + pp.tableSerials[table] = uint64(pp.randInts.Int63()%500 + 500) } func (pp *prettyPrinter) printNFA(t *smallTable) string { From e95536c9c14704e78866629a3679ab4401e6006b Mon Sep 17 00:00:00 2001 From: Tim Bray Date: Mon, 9 Sep 2024 14:54:28 -0700 Subject: [PATCH 3/3] address lint complaints, again Signed-off-by: Tim Bray --- prettyprinter.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/prettyprinter.go b/prettyprinter.go index af179e8..722f21d 100644 --- a/prettyprinter.go +++ b/prettyprinter.go @@ -38,18 +38,18 @@ var sharedNullPrinter = &nullPrinter{} type prettyPrinter struct { randInts rand.Source tableLabels map[*smallTable]string - tableSerials map[*smallTable]uint64 + tableSerials map[*smallTable]uint } func newPrettyPrinter(seed int) *prettyPrinter { return &prettyPrinter{ randInts: rand.NewSource(int64(seed)), tableLabels: make(map[*smallTable]string), - tableSerials: make(map[*smallTable]uint64), + tableSerials: make(map[*smallTable]uint), } } -func (pp *prettyPrinter) tableSerial(t *smallTable) uint64 { +func (pp *prettyPrinter) tableSerial(t *smallTable) uint { return pp.tableSerials[t] } func (pp *prettyPrinter) tableLabel(t *smallTable) string { @@ -58,7 +58,9 @@ func (pp *prettyPrinter) tableLabel(t *smallTable) string { func (pp *prettyPrinter) labelTable(table *smallTable, label string) { pp.tableLabels[table] = label - pp.tableSerials[table] = uint64(pp.randInts.Int63()%500 + 500) + newSerial := pp.randInts.Int63()%500 + 500 + //nolint:gosec + pp.tableSerials[table] = uint(newSerial) } func (pp *prettyPrinter) printNFA(t *smallTable) string {