Skip to content
This repository was archived by the owner on May 27, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ef10942
add HTTP header well known type and C++ UUID
asraa Dec 16, 2019
3f0ef08
update validate.pb.go
asraa Dec 16, 2019
0ada956
simplify conditions
asraa Dec 17, 2019
b538b97
fix java build
asraa Dec 17, 2019
183e106
bad test
asraa Dec 17, 2019
cfaec72
java
asraa Dec 17, 2019
c9f0f24
check for slashes in hdr name
asraa Dec 17, 2019
a8bdbfb
add more testcases
asraa Dec 17, 2019
084d9ab
empty
asraa Dec 17, 2019
61dad58
define regex for http header name/value
asraa Dec 18, 2019
52baf9e
make patterns in to well known regex
asraa Dec 23, 2019
cb4812e
fix bazel
asraa Dec 23, 2019
9bc951e
fix bazel maven_jar defn
asraa Dec 23, 2019
70b715f
Merge remote-tracking branch 'upstream/master' into header-valid
asraa Dec 26, 2019
d311924
Merge remote-tracking branch 'upstream/master' into header-valid
asraa Dec 26, 2019
a7e22e9
remove fixes for build
asraa Dec 26, 2019
5dba63d
fix merge mistake
asraa Dec 26, 2019
4675686
encode in utf-8 when writing out
asraa Jan 6, 2020
b2d81f7
Merge remote-tracking branch 'upstream/master' into header-valid
asraa Jan 6, 2020
94d5d8d
cleanup
asraa Jan 6, 2020
a41290c
remove unused regex definitions
asraa Jan 7, 2020
b43d32e
add backtick
asraa Jan 24, 2020
87e9ca4
simplify header value regex as blacklist
asraa Jan 24, 2020
a7debef
fixup to match entire string + also fixup dependency
asraa Jan 24, 2020
5a47460
python regex
asraa Jan 27, 2020
bbc4c85
remove pyc
asraa Feb 3, 2020
7995cbb
Merge remote-tracking branch 'upstream/master' into header-valid
asraa Feb 3, 2020
1093391
there's gotta be another way
asraa Feb 4, 2020
6bd356b
Merge remote-tracking branch 'upstream/master' into header-valid
asraa Feb 11, 2020
0b11e92
remove autosaved pgs
asraa Feb 11, 2020
78ca604
Merge branch 'master' into header-valid
akonradi Feb 14, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,12 @@ Check the [constraint rule comparison matrix](rule_comparison.md) for language-s

// x must be a valid UUID (via RFC 4122)
string x = 1 [(validate.rules).string.uuid = true];

// x must conform to a well known regex for HTTP header names (via RFC 7230)
string x = 1 [(validate.rules).string.well_known_regex = HTTP_HEADER_NAME]

// x must conform to a well known regex for HTTP header values (via RFC 7230)
string x = 1 [(validate.rules).string.well_known_regex = HTTP_HEADER_VALUE];
```

### Bytes
Expand Down
14 changes: 7 additions & 7 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ def pgv_dependencies():
if not native.existing_rule("io_bazel_rules_go"):
http_archive(
name = "io_bazel_rules_go",
urls = ["https://github.com/bazelbuild/rules_go/releases/download/0.18.5/rules_go-0.18.5.tar.gz"],
sha256 = "a82a352bffae6bee4e95f68a8d80a70e87f42c4741e6a448bec11998fcc82329",
urls = ["https://github.com/bazelbuild/rules_go/releases/download/v0.21.0/rules_go-v0.21.0.tar.gz"],
sha256 = "b27e55d2dcc9e6020e17614ae6e0374818a3e3ce6f2024036e688ada24110444",
)

if not native.existing_rule("bazel_gazelle"):
Expand Down Expand Up @@ -63,7 +63,7 @@ def pgv_dependencies():
name = "com_google_re2j",
artifact = "com.google.re2j:re2j:1.2",
artifact_sha256 = "e9dc705fd4c570344b54a7146b2e3a819cdc271a29793f4acc1a93b56a388e59",
server_urls = ["http://central.maven.org/maven2"],
server_urls = ["https://repo.maven.apache.org/maven2"],
)

if not native.existing_rule("com_googlesource_code_re2"):
Expand All @@ -79,7 +79,7 @@ def pgv_dependencies():
name = "com_google_guava",
artifact = "com.google.guava:guava:27.0-jre",
artifact_sha256 = "63b09db6861011e7fb2481be7790c7fd4b03f0bb884b3de2ecba8823ad19bf3f",
server_urls = ["http://central.maven.org/maven2"],
server_urls = ["https://repo.maven.apache.org/maven2"],
)

if not native.existing_rule("guava"):
Expand All @@ -93,7 +93,7 @@ def pgv_dependencies():
name = "com_google_gson",
artifact = "com.google.code.gson:gson:2.8.5",
artifact_sha256 = "233a0149fc365c9f6edbd683cfe266b19bdc773be98eabdaf6b3c924b48e7d81",
server_urls = ["http://central.maven.org/maven2"],
server_urls = ["https://repo.maven.apache.org/maven2"],
)

if not native.existing_rule("gson"):
Expand All @@ -107,7 +107,7 @@ def pgv_dependencies():
name = "error_prone_annotations_maven",
artifact = "com.google.errorprone:error_prone_annotations:2.3.2",
artifact_sha256 = "357cd6cfb067c969226c442451502aee13800a24e950fdfde77bcdb4565a668d",
server_urls = ["http://central.maven.org/maven2"],
server_urls = ["https://repo.maven.apache.org/maven2"],
)

if not native.existing_rule("error_prone_annotations"):
Expand All @@ -121,7 +121,7 @@ def pgv_dependencies():
name = "org_apache_commons_validator",
artifact = "commons-validator:commons-validator:1.6",
artifact_sha256 = "bd62795d7068a69cbea333f6dbf9c9c1a6ad7521443fb57202a44874f240ba25",
server_urls = ["http://central.maven.org/maven2"],
server_urls = ["https://repo.maven.apache.org/maven2"],
)

if not native.existing_rule("io_bazel_rules_python"):
Expand Down
22 changes: 20 additions & 2 deletions module/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,20 @@ import (
"github.com/golang/protobuf/ptypes"
"github.com/golang/protobuf/ptypes/duration"
"github.com/golang/protobuf/ptypes/timestamp"
"github.com/lyft/protoc-gen-star"
pgs "github.com/lyft/protoc-gen-star"
)

var unknown = ""
var httpHeaderName = "^:?[0-9a-zA-Z!#$%&'*+-.^_|~\x60]+$"
var httpHeaderValue = "^[^\u0000-\u0008\u000A-\u001F\u007F]*$"
Comment on lines +17 to +19

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like these regular expressions are duplicated here and in the Python code. Is it possible to unify those so we can have a single source of truth?

@asraa asraa Feb 4, 2020

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The python code doesn't go through the go code checkers, but possibly I could have a const file with these regexes that python parses? Or is there a way that i can define constants in a protobuf file (validate.proto)? I think default values aren't in proto3 and that's probably an issue

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think defining these in a separate file is heading towards the right solution, but instead of a .proto file it can be a textproto or YAML file that we check in. The contents would be of a new message type that is basically just map<KnownRegex, string> and contains the single canonical definition of the regex for each well-known regex. Then we can refer to that during Go/C++/Java code generation and include it in the Python code (not completely clear on how, but it should be possible).

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be too much for this PR, though. Up to you whether you want to do that here; if not, please open another issue to track cleaning up the tech debt.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, interesting. I like the textproto. It would make sense to include uuid patterns / other regexes that are duped (although the uuid dupe might be able to removed in code).

I filed an issue #316 and I'm happy to work on next week (after I use this PR in envoy!)


// Map from well known regex to regex pattern.
var regex_map = map[string]*string{
"UNKNOWN": &unknown,
"HTTP_HEADER_NAME": &httpHeaderName,
"HTTP_HEADER_VALUE": &httpHeaderValue,
}

type FieldType interface {
ProtoType() pgs.ProtoType
Embed() pgs.Message
Expand Down Expand Up @@ -196,6 +207,7 @@ func (m *Module) CheckString(r *validate.StringRules) {
m.checkMinMax(r.MinLen, r.MaxLen)
m.checkMinMax(r.MinBytes, r.MaxBytes)
m.checkIns(len(r.In), len(r.NotIn))
m.checkWellKnownRegex(r.GetWellKnownRegex(), r)
m.checkPattern(r.Pattern, len(r.In))

if r.MaxLen != nil {
Expand Down Expand Up @@ -452,6 +464,13 @@ func (m *Module) checkLen(len, min, max *uint64) {
"cannot have both `len` and `max_len` rules on the same field")
}

func (m *Module) checkWellKnownRegex(wk validate.KnownRegex, r *validate.StringRules) {
if wk != 0 {
m.Assert(r.Pattern == nil, "regex `well_known_regex` and regex `pattern` are incompatible")
r.Pattern = regex_map[wk.String()]
}
}

func (m *Module) checkPattern(p *string, in int) {
if p != nil {
m.Assert(in == 0, "regex `pattern` and `in` rules are incompatible")
Expand Down Expand Up @@ -479,4 +498,3 @@ func (m *Module) checkTS(ts *timestamp.Timestamp) *int64 {
m.CheckErr(err, "could not resolve timestamp")
return proto.Int64(t.UnixNano())
}

7 changes: 4 additions & 3 deletions rule_comparison.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
| ip |✅|✅|✅|✅|✅|
| ipv4 |✅|✅|✅|✅|✅|
| ipv6 |✅|✅|✅|✅|✅|
| uri |✅|✅|✅|✅|✅|
| uri_ref |✅|✅|✅|✅|✅|
| uuid |✅|✅|❌|✅|✅|
| uri |✅|✅|❌|✅|✅|
| uri_ref |✅|✅|❌|✅|✅|
| uuid |✅|✅|✅|✅|✅|
| well_known_regex |✅|✅|✅|✅|✅|

## Bytes
| Constraint Rule | Go | GoGo | C++ | Java | Python |
Expand Down
3 changes: 3 additions & 0 deletions templates/cc/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ namespace protobuf_wkt = google::protobuf;
namespace validate {
using std::string;

// define the regex for a UUID once up-front
const re2::RE2 _uuidPattern("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$");

{{ range .AllMessages }}
{{- if not (disabled .) -}}

Expand Down
7 changes: 2 additions & 5 deletions templates/cc/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,8 @@ const strTpl = `
}
*/}}
{{ else if $r.GetUuid }}
{{ unimplemented "C++ UUID validation is not implemented" }}
{{/* TODO(akonradi) implement UUID constraints
if err := m._validateUuid({{ accessor . }}); err != nil {
return {{ errCause . "err" "value must be a valid UUID" }}
if (!RE2::FullMatch(re2::StringPiece({{ accessor . }}), pgv::validate::_uuidPattern)) {
{{ err . "value must be a valid UUID" }}
}
*/}}
{{ end }}
`
2 changes: 2 additions & 0 deletions tests/harness/cases/strings.proto
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ message StringIPv6 { string val = 1 [(validate.rules).string.ipv6 = tr
message StringURI { string val = 1 [(validate.rules).string.uri = true]; }
message StringURIRef { string val = 1 [(validate.rules).string.uri_ref = true]; }
message StringUUID { string val = 1 [(validate.rules).string.uuid = true]; }
message StringHttpHeaderName { string val = 1 [(validate.rules).string.well_known_regex = HTTP_HEADER_NAME]; }
message StringHttpHeaderValue { string val = 1 [(validate.rules).string.well_known_regex = HTTP_HEADER_VALUE]; }
21 changes: 21 additions & 0 deletions tests/harness/executor/cases.go
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,27 @@ var stringCases = []TestCase{
{"string - UUID - valid (v5 - case-insensitive)", &cases.StringUUID{Val: "A6EDC906-2F9F-5FB2-A373-EFAC406F0EF2"}, true},
{"string - UUID - invalid", &cases.StringUUID{Val: "foobar"}, false},
{"string - UUID - invalid (bad UUID)", &cases.StringUUID{Val: "ffffffff-ffff-ffff-ffff-fffffffffffff"}, false},

{"string - http header name - valid", &cases.StringHttpHeaderName{Val: "clustername"}, true},
{"string - http header name - valid", &cases.StringHttpHeaderName{Val: ":path"}, true},
{"string - http header name - valid (nums)", &cases.StringHttpHeaderName{Val: "cluster-123"}, true},
{"string - http header name - valid (special token)", &cases.StringHttpHeaderName{Val: "!+#&.%"}, true},
{"string - http header name - valid (period)", &cases.StringHttpHeaderName{Val: "CLUSTER.NAME"}, true},
{"string - http header name - invalid", &cases.StringHttpHeaderName{Val: ":"}, false},
{"string - http header name - invalid", &cases.StringHttpHeaderName{Val: ":path:"}, false},
{"string - http header name - invalid (space)", &cases.StringHttpHeaderName{Val: "cluster name"}, false},
{"string - http header name - invalid (return)", &cases.StringHttpHeaderName{Val: "example\r"}, false},
{"string - http header name - invalid (tab)", &cases.StringHttpHeaderName{Val: "example\t"}, false},
{"string - http header name - invalid (slash)", &cases.StringHttpHeaderName{Val: "/test/long/url"}, false},

{"string - http header value - valid", &cases.StringHttpHeaderValue{Val: "cluster.name.123"}, true},
{"string - http header value - valid (uppercase)", &cases.StringHttpHeaderValue{Val: "/TEST/LONG/URL"}, true},
{"string - http header value - valid (spaces)", &cases.StringHttpHeaderValue{Val: "cluster name"}, true},
{"string - http header value - valid (tab)", &cases.StringHttpHeaderValue{Val: "example\t"}, true},
{"string - http header value - valid (special token)", &cases.StringHttpHeaderValue{Val: "!#%&./+"}, true},
{"string - http header value - invalid (NUL)", &cases.StringHttpHeaderValue{Val: "foo\u0000bar"}, false},
{"string - http header value - invalid (DEL)", &cases.StringHttpHeaderValue{Val: "\u007f"}, false},
{"string - http header value - invalid", &cases.StringHttpHeaderValue{Val: "example\r"}, false},
}

var bytesCases = []TestCase{
Expand Down
5 changes: 3 additions & 2 deletions tests/harness/python/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def unpack(message):
result.Error = False
result.AllowFailure = True
result.Reason = repr(e)
sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8')
Comment thread
asraa marked this conversation as resolved.
try:
sys.stdout.write(result.SerializeToString())
sys.stdout.write(result.SerializeToString().decode("utf-8"))
except TypeError:
sys.stdout.write(result.SerializeToString().decode(errors='surrogateescape'))
sys.stdout.write(result.SerializeToString().decode("utf-8", errors='surrogateescape'))
Loading