From 1e42b98707144c3ea07caccd8533543654358b3a Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 11 Apr 2025 11:37:16 +0200 Subject: [PATCH 1/2] Optimize `ArchiveField` reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This consists of two optimizations: - It prefers the `archive_field` over the `db_field`, and can thus avoid some queries for the `db_field`, as some queries in API are explicitly `defer`-ing that load. - It also avoids initializing the `ArchiveService.storage_hash`, which we don’t need for reads (only writes), and which would require us to load the related repository and owner. --- shared/django_apps/utils/model_utils.py | 43 +++++++++++++------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/shared/django_apps/utils/model_utils.py b/shared/django_apps/utils/model_utils.py index 03d30a871..b9b1bd15c 100644 --- a/shared/django_apps/utils/model_utils.py +++ b/shared/django_apps/utils/model_utils.py @@ -1,7 +1,8 @@ -import json import logging from typing import Any, Callable, Optional +import orjson + from shared.api_archive.archive import ArchiveService from shared.storage.exceptions import FileNotInStorageError from shared.utils.ReportEncoder import ReportEncoder @@ -83,14 +84,19 @@ def __set_name__(self, owner, name): self.archive_field_name = "_" + name + "_storage_path" self.cached_value_property_name = f"__{self.public_name}_cached_value" - def _get_value_from_archive(self, obj): - repository = obj.get_repository() - archive_service = ArchiveService(repository=repository) + def __get__(self, obj, objtype=None): + # check cached value first + value = getattr(obj, self.cached_value_property_name, None) + if value is not None: + return value + + # then the archive field archive_field = getattr(obj, self.archive_field_name) - if archive_field: + if archive_field is not None: + archive_service = ArchiveService(repository=None) try: file_str = archive_service.read_file(archive_field) - return self.rehydrate_fn(obj, json.loads(file_str)) + value = self.rehydrate_fn(obj, orjson.loads(file_str)) except FileNotInStorageError: log.error( "Archive enabled field not in storage", @@ -100,25 +106,20 @@ def _get_value_from_archive(self, obj): commit=obj.get_commitid(), ), ) - else: + + # then the DB field, possibly loaded on-demand + elif (db_field := getattr(obj, self.db_field_name)) is not None: + value = self.rehydrate_fn(obj, db_field) + + # and then last, resort to the default value + if value is None: log.debug( "Both db_field and archive_field are None", - extra=dict( - object_id=obj.id, - commit=obj.get_commitid(), - ), + extra=dict(object_id=obj.id, commit=obj.get_commitid()), ) - return self.default_value_class() + value = self.default_value_class() - def __get__(self, obj, objtype=None): - cached_value = getattr(obj, self.cached_value_property_name, None) - if cached_value: - return cached_value - db_field = getattr(obj, self.db_field_name) - if db_field is not None: - value = self.rehydrate_fn(obj, db_field) - else: - value = self._get_value_from_archive(obj) + # lastly, we want to cache re retrieved value for future use setattr(obj, self.cached_value_property_name, value) return value From f8baa459666b0e6a125e38c400dd3ba1c9b68c1e Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 11 Apr 2025 13:00:15 +0200 Subject: [PATCH 2/2] trigger CI