Skip to content

Commit c44de18

Browse files
committed
tools: add jsonschema linter
This commit initializes a Go module for the repo, adding a single command under `tools/vectorlint`. The new `vectorlint` tool can be provided a directory of schemas, and one or more directories of test vectors. It will process all vectors to check that: * the vector JSON is valid * the vector JSON references a schema JSON file that exists * the schema JSON compiles * the vector JSON is valid according to its referenced schema This initial work has some important limitations: 1. There are 7 schemas referenced by vector files in `testvectors_v1/` that don't exist. For now these are tracked in a `missingSchemas` map and the tool ignores vectors that ref these. We should add the missing schemas and remove the `missingSchemas` entries. 2. The schemas use a number of custom formats. To get started I've implemented the format validation as a no-op. We should add validation logic for each custom format and remove the no-op validation. 3. The schema's don't include "additionalProperties":false, and there are few required fields, so the validation is quite lax. (e.g. v1 files that differ substantially from the v0 schema appear to verify anyway).
1 parent ced6d85 commit c44de18

File tree

3 files changed

+207
-0
lines changed

3 files changed

+207
-0
lines changed

go.mod

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
module github.com/c2sp/wycheproof
2+
3+
go 1.23.6
4+
5+
require github.com/santhosh-tekuri/jsonschema/v6 v6.0.1
6+
7+
require golang.org/x/text v0.14.0 // indirect

go.sum

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI=
2+
github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
3+
github.com/santhosh-tekuri/jsonschema/v6 v6.0.1 h1:PKK9DyHxif4LZo+uQSgXNqs0jj5+xZwwfKHgph2lxBw=
4+
github.com/santhosh-tekuri/jsonschema/v6 v6.0.1/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU=
5+
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
6+
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=

tools/vectorlint/main.go

+194
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
// vectorlint analyzes vector files to flag potential issues
2+
package main
3+
4+
import (
5+
"encoding/json"
6+
"flag"
7+
"fmt"
8+
"github.com/santhosh-tekuri/jsonschema/v6"
9+
"io/fs"
10+
"log"
11+
"os"
12+
"path/filepath"
13+
"regexp"
14+
"strings"
15+
)
16+
17+
func main() {
18+
schemaDirectory := flag.String("schemas-dir", "schemas", "directory containing schema files")
19+
vectorsDirectories := flag.String("vectors-dir", "testvectors_v1,testvectors", "comma separated directories containing vector files")
20+
vectorFilter := flag.String("vector-filter", "", "only validate vector files matching the provided pattern")
21+
22+
flag.Parse()
23+
24+
vectorDirectoryParts := strings.Split(*vectorsDirectories, ",")
25+
26+
log.Printf("reading schemas from %q\n", *schemaDirectory)
27+
log.Printf("reading vectors from %q\n", vectorDirectoryParts)
28+
29+
var vectorRegex *regexp.Regexp
30+
if *vectorFilter != "" {
31+
vectorRegex = regexp.MustCompile(*vectorFilter)
32+
log.Printf("filtering vectors with %q\n", *vectorFilter)
33+
}
34+
35+
schemaCompiler := jsonschema.NewCompiler()
36+
37+
for _, f := range customFormats {
38+
schemaCompiler.RegisterFormat(&f)
39+
}
40+
41+
var total, valid, invalid, noSchema, ignored int
42+
for _, vectorDir := range vectorDirectoryParts {
43+
err := filepath.WalkDir(vectorDir, func(path string, d fs.DirEntry, err error) error {
44+
if err != nil {
45+
return err
46+
}
47+
48+
if d.IsDir() || !strings.HasSuffix(d.Name(), ".json") {
49+
return nil
50+
}
51+
52+
if vectorRegex != nil && !vectorRegex.MatchString(d.Name()) {
53+
return nil
54+
}
55+
56+
vectorData, err := os.ReadFile(path)
57+
if err != nil {
58+
return fmt.Errorf("failed to read %s: %w", path, err)
59+
}
60+
61+
total++
62+
63+
var vector struct {
64+
Schema string `json:"schema"`
65+
}
66+
67+
if err := json.Unmarshal(vectorData, &vector); err != nil {
68+
log.Printf("❌ %q: invalid vector JSON data: %s\n", path, err)
69+
invalid++
70+
return nil
71+
}
72+
73+
if vector.Schema == "" {
74+
log.Printf("❌ %q: no schema specified\n", path)
75+
noSchema++
76+
return nil
77+
}
78+
79+
if missingSchemas[vector.Schema] {
80+
log.Printf("⚠️ %q: ignoring missing schema %q\n", path, vector.Schema)
81+
ignored++
82+
return nil
83+
}
84+
85+
schemaPath := filepath.Join(*schemaDirectory, vector.Schema)
86+
if _, err := os.Stat(schemaPath); os.IsNotExist(err) {
87+
log.Printf("❌ %q: referenced schema %q not found\n", path, vector.Schema)
88+
invalid++
89+
return nil
90+
}
91+
92+
schema, err := schemaCompiler.Compile(schemaPath)
93+
if err != nil {
94+
log.Printf("❌ %q: invalid schema %q: %s\n", path, vector.Schema, err)
95+
invalid++
96+
return nil
97+
}
98+
99+
var instance any
100+
if err := json.Unmarshal(vectorData, &instance); err != nil {
101+
log.Printf("❌ %q: invalid vector JSON data: %s\n", path, err)
102+
invalid++
103+
return nil
104+
}
105+
106+
if err := schema.Validate(instance); err != nil {
107+
log.Printf("❌ %q: vector doesn't validate with schema: %s\n", path, err)
108+
invalid++
109+
return nil
110+
}
111+
112+
log.Printf("✅ %q: validates with %q\n", path, vector.Schema)
113+
valid++
114+
return nil
115+
})
116+
if err != nil {
117+
fmt.Printf("Error walking directory: %v\n", err)
118+
os.Exit(1)
119+
}
120+
}
121+
122+
log.Printf("linted %d vector files\n", total)
123+
log.Printf("valid: %d\n", valid)
124+
log.Printf("invalid: %d\n", invalid)
125+
log.Printf("no schema: %d\n", noSchema)
126+
log.Printf("ignored: %d\n", ignored)
127+
128+
os.Exit(invalid)
129+
}
130+
131+
var (
132+
// TODO(XXX): some _v1 vectors reference schema files that don't exist. Until fixed, ignore these schemas.
133+
missingSchemas = map[string]bool{
134+
// testvectors_v1/aes_ff1_base*_test.json:
135+
"fpe_str_test_schema.json": true,
136+
137+
// testvectors_v1/aes_ff1_radix*_test.json:
138+
"fpe_list_test_schema.json": true,
139+
140+
// testvectors_v1/ec_prime_order_curves_test.json:
141+
"ec_curve_test_schema.json": true,
142+
143+
// testvectors_v1/ecdsa_secp256k1_sha256_bitcoin_test.json
144+
"ecdsa_bitcoin_verify_schema.json": true,
145+
146+
// testvectors_v1/pbes2_hmacsha*_aes_*_test.json:
147+
"pbe_test_schema.json": true,
148+
149+
// testvectors_v1/pbkdf2_hmacsha*_test.json:
150+
"pbkdf_test_schema.json": true,
151+
152+
// testvectors_v1/rsa_pss_*_sha*_mgf*_params_test.json
153+
// testvectors_v1/rsa_pss_misc_params_test.json:
154+
"rsassa_pss_with_parameters_verify_schema.json": true,
155+
}
156+
157+
customFormats = []jsonschema.Format{
158+
{
159+
Name: "Asn",
160+
// TODO(XXX): validate "Asn" format.
161+
Validate: noValidateFormat,
162+
},
163+
{
164+
Name: "Der",
165+
// TODO(XXX): validate "Der" format.
166+
Validate: noValidateFormat,
167+
},
168+
{
169+
Name: "EcCurve",
170+
// TODO(XXX): validate "EcCurve" format.
171+
Validate: noValidateFormat,
172+
},
173+
{
174+
Name: "HexBytes",
175+
// TODO(XXX): validate "HexBytes" format.
176+
Validate: noValidateFormat,
177+
},
178+
{
179+
Name: "BigInt",
180+
// TODO(XXX): validate "BigInt" format.
181+
Validate: noValidateFormat,
182+
},
183+
{
184+
Name: "Pem",
185+
// TODO(XXX): validate "Pem" format.
186+
Validate: noValidateFormat,
187+
},
188+
}
189+
)
190+
191+
// noValidateFormat is a placeholder Format.Validate callback that performs no validation of the input.
192+
func noValidateFormat(_ any) error {
193+
return nil
194+
}

0 commit comments

Comments
 (0)