-
Notifications
You must be signed in to change notification settings - Fork 3.3k
[SchemaRegistry] add lru cache to avro serializer #20813
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7c48c74
3f47b00
a37f82e
eb5f749
8bfc931
62c1335
4bd0e5d
0bcf433
1d4daa1
00e602d
7ad7dc8
582f2b8
a76497f
2dd27be
942cddf
d8be661
5a9db19
18ab332
7453723
e3abf91
2b9baeb
0bf25b6
461ea57
34f4e38
52ec26b
52d87cf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,10 @@ | |
| # IN THE SOFTWARE. | ||
| # | ||
| # -------------------------------------------------------------------------- | ||
| try: | ||
| from functools import lru_cache | ||
| except ImportError: | ||
| from backports.functools_lru_cache import lru_cache | ||
| from io import BytesIO | ||
| from typing import Any, Dict, Mapping | ||
| import avro | ||
|
|
@@ -59,8 +63,6 @@ def __init__(self, **kwargs): | |
| if self._auto_register_schemas | ||
| else self._schema_registry_client.get_schema_id | ||
| ) | ||
| self._id_to_schema = {} | ||
| self._schema_to_id = {} | ||
| self._user_input_schema_cache = {} | ||
|
|
||
| def __enter__(self): | ||
|
|
@@ -79,8 +81,9 @@ def close(self): | |
| """ | ||
| self._schema_registry_client.close() | ||
|
|
||
| def _get_schema_id(self, schema_name, schema, **kwargs): | ||
| # type: (str, avro.schema.Schema, Any) -> str | ||
| @lru_cache(maxsize=128) | ||
| def _get_schema_id(self, schema_name, schema_str, **kwargs): | ||
| # type: (str, str, Any) -> str | ||
| """ | ||
| Get schema id from local cache with the given schema. | ||
| If there is no item in the local cache, get schema id from the service and cache it. | ||
|
|
@@ -92,17 +95,12 @@ def _get_schema_id(self, schema_name, schema, **kwargs): | |
| :return: Schema Id | ||
| :rtype: str | ||
| """ | ||
| schema_str = str(schema) | ||
| try: | ||
| return self._schema_to_id[schema_str] | ||
| except KeyError: | ||
| schema_id = self._auto_register_schema_func( | ||
| self._schema_group, schema_name, "Avro", schema_str, **kwargs | ||
| ).schema_id | ||
| self._schema_to_id[schema_str] = schema_id | ||
| self._id_to_schema[schema_id] = schema_str | ||
| return schema_id | ||
| schema_id = self._auto_register_schema_func( | ||
| self._schema_group, schema_name, "Avro", schema_str, **kwargs | ||
| ).schema_id | ||
| return schema_id | ||
|
|
||
| @lru_cache(maxsize=128) | ||
| def _get_schema(self, schema_id, **kwargs): | ||
| # type: (str, Any) -> str | ||
| """ | ||
|
|
@@ -112,15 +110,10 @@ def _get_schema(self, schema_id, **kwargs): | |
| :param str schema_id: Schema id | ||
| :return: Schema content | ||
| """ | ||
| try: | ||
| return self._id_to_schema[schema_id] | ||
| except KeyError: | ||
| schema_str = self._schema_registry_client.get_schema( | ||
| schema_id, **kwargs | ||
| ).schema_content | ||
| self._id_to_schema[schema_id] = schema_str | ||
| self._schema_to_id[schema_str] = schema_id | ||
| return schema_str | ||
| schema_str = self._schema_registry_client.get_schema( | ||
| schema_id, **kwargs | ||
| ).schema_content | ||
| return schema_str | ||
|
|
||
| def serialize(self, value, **kwargs): | ||
| # type: (Mapping[str, Any], Any) -> bytes | ||
|
|
@@ -147,7 +140,7 @@ def serialize(self, value, **kwargs): | |
| cached_schema = parsed_schema | ||
|
|
||
| record_format_identifier = b"\0\0\0\0" | ||
| schema_id = self._get_schema_id(cached_schema.fullname, cached_schema, **kwargs) | ||
| schema_id = self._get_schema_id(cached_schema.fullname, str(cached_schema), **kwargs) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why cast I'm thinking of the accepted type of input maybe we could start by just supporting "str" first, because accepting bytes brings the problem of decoding bytes into string. do we know what type fastavro is expecting for the schema, string, bytes or dict?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think removing
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. only support
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keeping everything, except for I can deal with removing the |
||
| data_bytes = self._avro_serializer.serialize(value, cached_schema) | ||
|
|
||
| stream = BytesIO() | ||
|
|
||
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.