Skip to content

Commit 6e10afe

Browse files
authored
[SchemaRegistry] add lru cache to avro serializer (#20813)
fixes: #20712 performance comparison results: #20712 (comment)
1 parent 1bc3354 commit 6e10afe

24 files changed

+174
-255
lines changed

sdk/schemaregistry/azure-schemaregistry-avroserializer/azure/schemaregistry/serializer/avroserializer/_schema_registry_avro_serializer.py

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
# IN THE SOFTWARE.
2424
#
2525
# --------------------------------------------------------------------------
26+
try:
27+
from functools import lru_cache
28+
except ImportError:
29+
from backports.functools_lru_cache import lru_cache
2630
from io import BytesIO
2731
from typing import Any, Dict, Mapping
2832
import avro
@@ -59,8 +63,6 @@ def __init__(self, **kwargs):
5963
if self._auto_register_schemas
6064
else self._schema_registry_client.get_schema_id
6165
)
62-
self._id_to_schema = {}
63-
self._schema_to_id = {}
6466
self._user_input_schema_cache = {}
6567

6668
def __enter__(self):
@@ -79,8 +81,9 @@ def close(self):
7981
"""
8082
self._schema_registry_client.close()
8183

82-
def _get_schema_id(self, schema_name, schema, **kwargs):
83-
# type: (str, avro.schema.Schema, Any) -> str
84+
@lru_cache(maxsize=128)
85+
def _get_schema_id(self, schema_name, schema_str, **kwargs):
86+
# type: (str, str, Any) -> str
8487
"""
8588
Get schema id from local cache with the given schema.
8689
If there is no item in the local cache, get schema id from the service and cache it.
@@ -92,17 +95,12 @@ def _get_schema_id(self, schema_name, schema, **kwargs):
9295
:return: Schema Id
9396
:rtype: str
9497
"""
95-
schema_str = str(schema)
96-
try:
97-
return self._schema_to_id[schema_str]
98-
except KeyError:
99-
schema_id = self._auto_register_schema_func(
100-
self._schema_group, schema_name, "Avro", schema_str, **kwargs
101-
).schema_id
102-
self._schema_to_id[schema_str] = schema_id
103-
self._id_to_schema[schema_id] = schema_str
104-
return schema_id
98+
schema_id = self._auto_register_schema_func(
99+
self._schema_group, schema_name, "Avro", schema_str, **kwargs
100+
).schema_id
101+
return schema_id
105102

103+
@lru_cache(maxsize=128)
106104
def _get_schema(self, schema_id, **kwargs):
107105
# type: (str, Any) -> str
108106
"""
@@ -112,15 +110,10 @@ def _get_schema(self, schema_id, **kwargs):
112110
:param str schema_id: Schema id
113111
:return: Schema content
114112
"""
115-
try:
116-
return self._id_to_schema[schema_id]
117-
except KeyError:
118-
schema_str = self._schema_registry_client.get_schema(
119-
schema_id, **kwargs
120-
).schema_content
121-
self._id_to_schema[schema_id] = schema_str
122-
self._schema_to_id[schema_str] = schema_id
123-
return schema_str
113+
schema_str = self._schema_registry_client.get_schema(
114+
schema_id, **kwargs
115+
).schema_content
116+
return schema_str
124117

125118
def serialize(self, value, **kwargs):
126119
# type: (Mapping[str, Any], Any) -> bytes
@@ -147,7 +140,7 @@ def serialize(self, value, **kwargs):
147140
cached_schema = parsed_schema
148141

149142
record_format_identifier = b"\0\0\0\0"
150-
schema_id = self._get_schema_id(cached_schema.fullname, cached_schema, **kwargs)
143+
schema_id = self._get_schema_id(cached_schema.fullname, str(cached_schema), **kwargs)
151144
data_bytes = self._avro_serializer.serialize(value, cached_schema)
152145

153146
stream = BytesIO()

sdk/schemaregistry/azure-schemaregistry-avroserializer/samples/avro_serializer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@
2929
from azure.schemaregistry import SchemaRegistryClient
3030
from azure.schemaregistry.serializer.avroserializer import SchemaRegistryAvroSerializer
3131

32-
TENANT_ID=os.environ['SCHEMA_REGISTRY_AZURE_TENANT_ID']
33-
CLIENT_ID=os.environ['SCHEMA_REGISTRY_AZURE_CLIENT_ID']
34-
CLIENT_SECRET=os.environ['SCHEMA_REGISTRY_AZURE_CLIENT_SECRET']
32+
TENANT_ID=os.environ['AZURE_TENANT_ID']
33+
CLIENT_ID=os.environ['AZURE_CLIENT_ID']
34+
CLIENT_SECRET=os.environ['AZURE_CLIENT_SECRET']
3535

36-
SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE=os.environ['SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE']
37-
GROUP_NAME=os.environ['SCHEMA_REGISTRY_GROUP']
36+
SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE=os.environ['SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE']
37+
GROUP_NAME=os.environ['SCHEMAREGISTRY_GROUP']
3838
SCHEMA_STRING = """
3939
{"namespace": "example.avro",
4040
"type": "record",
@@ -79,7 +79,7 @@ def deserialize(serializer, bytes_payload):
7979

8080

8181
if __name__ == '__main__':
82-
schema_registry = SchemaRegistryClient(endpoint=SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE, credential=token_credential)
82+
schema_registry = SchemaRegistryClient(endpoint=SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE, credential=token_credential)
8383
serializer = SchemaRegistryAvroSerializer(client=schema_registry, group_name=GROUP_NAME, auto_register_schemas=True)
8484
bytes_data_ben, bytes_data_alice = serialize(serializer)
8585
dict_data_ben = deserialize(serializer, bytes_data_ben)

sdk/schemaregistry/azure-schemaregistry-avroserializer/samples/eventhub_receive_integration.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
EVENTHUB_CONNECTION_STR = os.environ['EVENT_HUB_CONN_STR']
2020
EVENTHUB_NAME = os.environ['EVENT_HUB_NAME']
2121

22-
SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE = os.environ['SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE']
23-
GROUP_NAME = os.environ['SCHEMA_REGISTRY_GROUP']
22+
SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE = os.environ['SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE']
23+
GROUP_NAME = os.environ['SCHEMAREGISTRY_GROUP']
2424

2525

2626
def on_event(partition_context, event):
@@ -48,7 +48,7 @@ def on_event(partition_context, event):
4848
# TODO: after 'azure-schemaregistry==1.0.0b3' is released, update 'endpoint' to 'fully_qualified_namespace'
4949
avro_serializer = SchemaRegistryAvroSerializer(
5050
client=SchemaRegistryClient(
51-
endpoint=SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE,
51+
endpoint=SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE,
5252
credential=DefaultAzureCredential()
5353
),
5454
group_name=GROUP_NAME,

sdk/schemaregistry/azure-schemaregistry-avroserializer/samples/eventhub_send_integration.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
EVENTHUB_CONNECTION_STR = os.environ['EVENT_HUB_CONN_STR']
2121
EVENTHUB_NAME = os.environ['EVENT_HUB_NAME']
2222

23-
SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE = os.environ['SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE']
24-
GROUP_NAME = os.environ['SCHEMA_REGISTRY_GROUP']
23+
SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE = os.environ['SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE']
24+
GROUP_NAME = os.environ['SCHEMAREGISTRY_GROUP']
2525

2626
SCHEMA_STRING = """
2727
{"namespace": "example.avro",
@@ -62,7 +62,7 @@ def send_event_data_batch(producer, serializer):
6262
# TODO: after 'azure-schemaregistry==1.0.0b3' is released, update 'endpoint' to 'fully_qualified_namespace'
6363
avro_serializer = SchemaRegistryAvroSerializer(
6464
client=SchemaRegistryClient(
65-
endpoint=SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE,
65+
endpoint=SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE,
6666
credential=DefaultAzureCredential()
6767
),
6868
group_name=GROUP_NAME,

sdk/schemaregistry/azure-schemaregistry-avroserializer/setup.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# -------------------------------------------------------------------------
88

99
import re
10+
import sys
1011
import os.path
1112
from io import open
1213
from setuptools import find_packages, setup
@@ -39,6 +40,12 @@
3940
'azure.schemaregistry',
4041
'azure.schemaregistry.serializer'
4142
]
43+
install_packages = [
44+
'azure-schemaregistry==1.0.0b2',
45+
'avro<2.0.0,>=1.10.0'
46+
]
47+
if sys.version_info < (3,0):
48+
install_packages.append('backports.functools-lru-cache>=1.6.4')
4249

4350
setup(
4451
name=PACKAGE_NAME,
@@ -64,8 +71,5 @@
6471
],
6572
zip_safe=False,
6673
packages=find_packages(exclude=exclude_packages),
67-
install_requires=[
68-
'azure-schemaregistry==1.0.0b2',
69-
'avro<2.0.0,>=1.10.0'
70-
]
74+
install_requires=install_packages
7175
)

sdk/schemaregistry/azure-schemaregistry-avroserializer/tests/recordings/test_avro_serializer.test_basic_sr_avro_serializer_with_auto_register_schemas.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ interactions:
2323
uri: https://fake_resource.servicebus.windows.net/$schemagroups/fakegroup/schemas/example.avro.User?api-version=2017-04
2424
response:
2525
body:
26-
string: '{"id":"fc61e4d3e31b46f6a758fa1b67f35cc5"}'
26+
string: '{"id":"f666e373299048fabaa4296f5dbfed46"}'
2727
headers:
2828
content-type:
2929
- application/json
3030
date:
31-
- Fri, 24 Sep 2021 19:54:45 GMT
31+
- Tue, 28 Sep 2021 22:27:25 GMT
3232
location:
3333
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/fakegroup/schemas/example.avro.User/versions/1?api-version=2017-04
3434
server:
@@ -38,9 +38,9 @@ interactions:
3838
transfer-encoding:
3939
- chunked
4040
x-schema-id:
41-
- fc61e4d3e31b46f6a758fa1b67f35cc5
41+
- f666e373299048fabaa4296f5dbfed46
4242
x-schema-id-location:
43-
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/getschemabyid/fc61e4d3e31b46f6a758fa1b67f35cc5?api-version=2017-04
43+
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/getschemabyid/f666e373299048fabaa4296f5dbfed46?api-version=2017-04
4444
x-schema-type:
4545
- Avro
4646
x-schema-version:

sdk/schemaregistry/azure-schemaregistry-avroserializer/tests/recordings/test_avro_serializer.test_basic_sr_avro_serializer_without_auto_register_schemas.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ interactions:
2323
uri: https://fake_resource.servicebus.windows.net/$schemagroups/fakegroup/schemas/example.avro.User?api-version=2017-04
2424
response:
2525
body:
26-
string: '{"id":"fc61e4d3e31b46f6a758fa1b67f35cc5"}'
26+
string: '{"id":"f666e373299048fabaa4296f5dbfed46"}'
2727
headers:
2828
content-type:
2929
- application/json
3030
date:
31-
- Fri, 24 Sep 2021 19:54:47 GMT
31+
- Tue, 28 Sep 2021 22:27:26 GMT
3232
location:
3333
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/fakegroup/schemas/example.avro.User/versions/1?api-version=2017-04
3434
server:
@@ -38,9 +38,9 @@ interactions:
3838
transfer-encoding:
3939
- chunked
4040
x-schema-id:
41-
- fc61e4d3e31b46f6a758fa1b67f35cc5
41+
- f666e373299048fabaa4296f5dbfed46
4242
x-schema-id-location:
43-
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/getschemabyid/fc61e4d3e31b46f6a758fa1b67f35cc5?api-version=2017-04
43+
- https://swathip-test-eventhubs.servicebus.windows.net:443/$schemagroups/getschemabyid/f666e373299048fabaa4296f5dbfed46?api-version=2017-04
4444
x-schema-type:
4545
- Avro
4646
x-schema-version:

sdk/schemaregistry/azure-schemaregistry-avroserializer/tests/schemaregistry_preparer.py

Lines changed: 0 additions & 73 deletions
This file was deleted.

sdk/schemaregistry/azure-schemaregistry-avroserializer/tests/test_avro_serializer.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,11 @@ def test_basic_sr_avro_serializer_with_auto_register_schemas(self, schemaregistr
8787
encoded_data = sr_avro_serializer.serialize(dict_data, schema=schema_str)
8888

8989
assert schema_str in sr_avro_serializer._user_input_schema_cache
90-
assert str(avro.schema.parse(schema_str)) in sr_avro_serializer._schema_to_id
9190

9291
assert encoded_data[0:4] == b'\0\0\0\0'
9392
schema_id = sr_client.get_schema_id(schemaregistry_group, schema.fullname, "Avro", str(schema)).schema_id
9493
assert encoded_data[4:36] == schema_id.encode("utf-8")
9594

96-
assert schema_id in sr_avro_serializer._id_to_schema
97-
9895
decoded_data = sr_avro_serializer.deserialize(encoded_data)
9996
assert decoded_data["name"] == u"Ben"
10097
assert decoded_data["favorite_number"] == 7
@@ -115,14 +112,11 @@ def test_basic_sr_avro_serializer_without_auto_register_schemas(self, schemaregi
115112
encoded_data = sr_avro_serializer.serialize(dict_data, schema=schema_str)
116113

117114
assert schema_str in sr_avro_serializer._user_input_schema_cache
118-
assert str(avro.schema.parse(schema_str)) in sr_avro_serializer._schema_to_id
119115

120116
assert encoded_data[0:4] == b'\0\0\0\0'
121117
schema_id = sr_client.get_schema_id(schemaregistry_group, schema.fullname, "Avro", str(schema)).schema_id
122118
assert encoded_data[4:36] == schema_id.encode("utf-8")
123119

124-
assert schema_id in sr_avro_serializer._id_to_schema
125-
126120
decoded_data = sr_avro_serializer.deserialize(encoded_data)
127121
assert decoded_data["name"] == u"Ben"
128122
assert decoded_data["favorite_number"] == 7

sdk/schemaregistry/azure-schemaregistry/samples/async_samples/sample_code_schemaregistry_async.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,21 @@
3333

3434
def create_client():
3535
# [START create_sr_client_async]
36-
SCHEMA_REGISTRY_FQN = os.environ['SCHEMA_REGISTRY_FULLY_QUALIFIED_NAMESPACE']
36+
SCHEMAREGISTRY_FQN = os.environ['SCHEMAREGISTRY_FULLY_QUALIFIED_NAMESPACE']
3737
token_credential = DefaultAzureCredential()
38-
schema_registry_client = SchemaRegistryClient(fully_qualified_namespace=SCHEMA_REGISTRY_FQN, credential=token_credential)
38+
schema_registry_client = SchemaRegistryClient(fully_qualified_namespace=SCHEMAREGISTRY_FQN, credential=token_credential)
3939
# [END create_sr_client_async]
40-
TENANT_ID = os.environ['SCHEMA_REGISTRY_AZURE_TENANT_ID']
41-
CLIENT_ID = os.environ['SCHEMA_REGISTRY_AZURE_CLIENT_ID']
42-
CLIENT_SECRET = os.environ['SCHEMA_REGISTRY_AZURE_CLIENT_SECRET']
40+
TENANT_ID = os.environ['AZURE_TENANT_ID']
41+
CLIENT_ID = os.environ['AZURE_CLIENT_ID']
42+
CLIENT_SECRET = os.environ['AZURE_CLIENT_SECRET']
4343
token_credential = ClientSecretCredential(TENANT_ID, CLIENT_ID, CLIENT_SECRET)
44-
schema_registry_client = SchemaRegistryClient(fully_qualified_namespace=SCHEMA_REGISTRY_FQN, credential=token_credential)
44+
schema_registry_client = SchemaRegistryClient(fully_qualified_namespace=SCHEMAREGISTRY_FQN, credential=token_credential)
4545
return schema_registry_client, token_credential
4646

4747

4848
async def register_schema(schema_registry_client):
4949
# [START register_schema_async]
50-
GROUP_NAME = os.environ['SCHEMA_REGISTRY_GROUP']
50+
GROUP_NAME = os.environ['SCHEMAREGISTRY_GROUP']
5151
NAME = 'your-schema-name'
5252
FORMAT = SchemaFormat.AVRO
5353
SCHEMA_DEFINITION = """{"namespace":"example.avro","type":"record","name":"User","fields":[{"name":"name","type":"string"},{"name":"favorite_number","type":["int","null"]},{"name":"favorite_color","type":["string","null"]}]}"""
@@ -66,7 +66,7 @@ async def get_schema(schema_registry_client, id):
6666

6767

6868
async def get_schema_id(schema_registry_client):
69-
group_name = os.environ['SCHEMA_REGISTRY_GROUP']
69+
group_name = os.environ['SCHEMAREGISTRY_GROUP']
7070
name = 'your-schema-name'
7171
format = SchemaFormat.AVRO
7272
schema_definition = """{"namespace":"example.avro","type":"record","name":"User","fields":[{"name":"name","type":"string"},{"name":"favorite_number","type":["int","null"]},{"name":"favorite_color","type":["string","null"]}]}"""

0 commit comments

Comments
 (0)