-
Notifications
You must be signed in to change notification settings - Fork 564
/
Copy pathbench_json_schema.py
77 lines (67 loc) · 2.07 KB
/
bench_json_schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from outlines_core.fsm.json_schema import build_regex_from_schema
from outlines.caching import cache_disabled
from outlines.fsm.guide import RegexGuide
from .common import setup_tokenizer # noqa: E402
simple_schema = """{
"$defs": {
"Armor": {
"enum": ["leather", "chainmail", "plate"],
"title": "Armor",
"type": "string"
}
},
"properties": {
"name": {"maxLength": 10, "title": "Name", "type": "string"},
"age": {"title": "Age", "type": "integer"},
"armor": {"$ref": "#/$defs/Armor"},
"strength": {"title": "Strength", "type": "integer"}\
},
"required": ["name", "age", "armor", "strength"],
"title": "Character",
"type": "object"
}"""
complex_schema = """{
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "Schema for a recording",
"type": "object",
"definitions": {
"artist": {
"type": "object",
"properties": {
"id": {"type": "number"},
"name": {"type": "string"},
"functions": {
"type": "array",
"items": {"type": "string"}
}
},
"required": ["id", "name", "functions"]
}
},
"properties": {
"id": {"type": "number"},
"work": {
"type": "object",
"properties": {
"id": {"type": "number"},
"name": {"type": "string"},
"composer": {"$ref": "#/definitions/artist"}
}
},
"recording_artists": {
"type": "array",
"items": {"$ref": "#/definitions/artist"}
}
},
"required": ["id", "work", "recording_artists"]
}"""
schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
class JsonSchemaBenchmark:
params = schemas.keys()
def setup(self, schema_name):
self.tokenizer = setup_tokenizer()
self.schema = schemas[schema_name]
@cache_disabled()
def time_json_schema_to_fsm(self, schema_name):
regex = build_regex_from_schema(self.schema)
RegexGuide.from_regex(regex, self.tokenizer)