From a9f9a2e2cc7632b024c16299f9c38ff4d09c4d6d Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Thu, 21 Aug 2025 23:10:17 -0700 Subject: [PATCH 1/2] improve performance with validator instances --- mapillary_tools/authenticate.py | 2 +- mapillary_tools/config.py | 5 +++++ mapillary_tools/geotag/options.py | 5 ++++- mapillary_tools/process_geotag_properties.py | 15 ++++----------- mapillary_tools/serializer/description.py | 18 +++++++++++++----- mapillary_tools/upload.py | 3 +-- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/mapillary_tools/authenticate.py b/mapillary_tools/authenticate.py index e89a00990..f7ae4bfa5 100644 --- a/mapillary_tools/authenticate.py +++ b/mapillary_tools/authenticate.py @@ -165,7 +165,7 @@ def _prompt(message: str) -> str: def _validate_profile(user_items: config.UserItem) -> config.UserItem: try: - jsonschema.validate(user_items, config.UserItemSchema) + config.UserItemSchemaValidator.validate(user_items) except jsonschema.ValidationError as ex: raise exceptions.MapillaryBadParameterError( f"Invalid profile format: {ex.message}" diff --git a/mapillary_tools/config.py b/mapillary_tools/config.py index 206e03771..d08b8a7fe 100644 --- a/mapillary_tools/config.py +++ b/mapillary_tools/config.py @@ -6,6 +6,8 @@ import typing as T from typing import TypedDict +import jsonschema + if sys.version_info >= (3, 11): from typing import Required else: @@ -50,6 +52,9 @@ class UserItem(TypedDict, total=False): } +UserItemSchemaValidator = jsonschema.Draft202012Validator(UserItemSchema) + + def _load_config(config_path: str) -> configparser.ConfigParser: config = configparser.ConfigParser() # Override to not change option names (by default it will lower them) diff --git a/mapillary_tools/geotag/options.py b/mapillary_tools/geotag/options.py index 81e1cf4d3..19081f239 100644 --- a/mapillary_tools/geotag/options.py +++ b/mapillary_tools/geotag/options.py @@ -173,8 +173,11 @@ class InterpolationOption: } +SourceOptionSchemaValidator = jsonschema.Draft202012Validator(SourceOptionSchema) + + def validate_option(instance): - jsonschema.validate(instance=instance, schema=SourceOptionSchema) + SourceOptionSchemaValidator.validate(instance=instance) if __name__ == "__main__": diff --git a/mapillary_tools/process_geotag_properties.py b/mapillary_tools/process_geotag_properties.py index 2d5215055..c46879efa 100644 --- a/mapillary_tools/process_geotag_properties.py +++ b/mapillary_tools/process_geotag_properties.py @@ -304,19 +304,12 @@ def _validate_metadatas( # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time' # See https://stackoverflow.com/a/61432070 good_metadatas, error_metadatas = types.separate_errors(metadatas) - map_results = utils.mp_map_maybe( - validate_and_fail_metadata, - T.cast(T.Iterable[types.Metadata], good_metadatas), - num_processes=num_processes, - ) validated_metadatas = list( - tqdm( - map_results, - desc="Validating metadatas", - unit="metadata", - disable=LOG.getEffectiveLevel() <= logging.DEBUG, - total=len(good_metadatas), + utils.mp_map_maybe( + validate_and_fail_metadata, + T.cast(T.Iterable[types.Metadata], good_metadatas), + num_processes=num_processes, ) ) diff --git a/mapillary_tools/serializer/description.py b/mapillary_tools/serializer/description.py index 4c0c56d72..8f4be4412 100644 --- a/mapillary_tools/serializer/description.py +++ b/mapillary_tools/serializer/description.py @@ -259,6 +259,11 @@ def _merge_schema(*schemas: dict) -> dict: ) +ImageDescriptionFileSchemaValidator = jsonschema.Draft202012Validator( + ImageDescriptionFileSchema +) + + VideoDescriptionFileSchema = _merge_schema( VideoDescriptionSchema, { @@ -295,9 +300,9 @@ def _merge_schema(*schemas: dict) -> dict: ) -ImageVideoDescriptionFileSchema = { - "oneOf": [VideoDescriptionFileSchema, ImageDescriptionFileSchema] -} +VideoDescriptionFileSchemaValidator = jsonschema.Draft202012Validator( + VideoDescriptionFileSchema +) class DescriptionJSONSerializer(BaseSerializer): @@ -520,7 +525,7 @@ def parse_capture_time(time: str) -> datetime.datetime: def validate_image_desc(desc: T.Any) -> None: try: - jsonschema.validate(instance=desc, schema=ImageDescriptionFileSchema) + ImageDescriptionFileSchemaValidator.validate(desc) except jsonschema.ValidationError as ex: # do not use str(ex) which is more verbose raise exceptions.MapillaryMetadataValidationError(ex.message) from ex @@ -533,7 +538,7 @@ def validate_image_desc(desc: T.Any) -> None: def validate_video_desc(desc: T.Any) -> None: try: - jsonschema.validate(instance=desc, schema=VideoDescriptionFileSchema) + VideoDescriptionFileSchemaValidator.validate(desc) except jsonschema.ValidationError as ex: # do not use str(ex) which is more verbose raise exceptions.MapillaryMetadataValidationError(ex.message) from ex @@ -584,4 +589,7 @@ def desc_file_to_exif(desc: ImageDescription) -> ImageDescription: if __name__ == "__main__": + ImageVideoDescriptionFileSchema = { + "oneOf": [VideoDescriptionFileSchema, ImageDescriptionFileSchema] + } print(json.dumps(ImageVideoDescriptionFileSchema, indent=4)) diff --git a/mapillary_tools/upload.py b/mapillary_tools/upload.py index 4a31ad84c..f0f09b58c 100644 --- a/mapillary_tools/upload.py +++ b/mapillary_tools/upload.py @@ -10,7 +10,6 @@ from pathlib import Path import humanize -import jsonschema import requests from tqdm import tqdm @@ -57,7 +56,7 @@ def upload( metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path) - jsonschema.validate(instance=user_items, schema=config.UserItemSchema) + config.UserItemSchemaValidator.validate(user_items) # Setup the emitter -- the order matters here From 3cad0da81e7f00b9c4424992263183ab2c4ac079 Mon Sep 17 00:00:00 2001 From: Tao Peng Date: Mon, 25 Aug 2025 11:02:35 -0700 Subject: [PATCH 2/2] fix --- mapillary_tools/serializer/description.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mapillary_tools/serializer/description.py b/mapillary_tools/serializer/description.py index 8f4be4412..5206f1f4a 100644 --- a/mapillary_tools/serializer/description.py +++ b/mapillary_tools/serializer/description.py @@ -305,6 +305,11 @@ def _merge_schema(*schemas: dict) -> dict: ) +ImageVideoDescriptionFileSchema = { + "oneOf": [VideoDescriptionFileSchema, ImageDescriptionFileSchema] +} + + class DescriptionJSONSerializer(BaseSerializer): @override @classmethod @@ -589,7 +594,4 @@ def desc_file_to_exif(desc: ImageDescription) -> ImageDescription: if __name__ == "__main__": - ImageVideoDescriptionFileSchema = { - "oneOf": [VideoDescriptionFileSchema, ImageDescriptionFileSchema] - } print(json.dumps(ImageVideoDescriptionFileSchema, indent=4))