From 0944ff7aaa5e9849927a5a8e4d6b4c00a1ab0fa8 Mon Sep 17 00:00:00 2001 From: Martin Dobias Date: Mon, 17 Oct 2022 23:56:05 +0200 Subject: [PATCH] Fix memory leak related to MerginProject object (fixes #78) It turns out that we are creating MerginProject objects way too much, and the cleanup of it may not be handled properly because of a cyclic reference between MerginProject and its internal GeoDiff context object. It is going to be both safer and faster to create MerginProject for a particular working directory once and then keep it cached. Fun fact: the leak is not showing up in "top" commend, only in docker's stats about memory... --- dbsync.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/dbsync.py b/dbsync.py index 86e7857..be3c311 100644 --- a/dbsync.py +++ b/dbsync.py @@ -173,9 +173,28 @@ def _print_mergin_changes(diff_dict): print(" removed: " + item['path']) +# Dictionary used by _get_mergin_project() function below. +# key = path to a local dir with Mergin project, value = cached MerginProject object +cached_mergin_project_objects = {} + + +def _get_mergin_project(work_path): + """ + Returns a cached MerginProject object or creates one if it does not exist yet. + This is to avoid creating many of these objects (e.g. every pull/push) because it does + initialization of geodiff as well, so things should be 1. a bit faster, and 2. safer. + (Safer because we are having a cycle of refs between GeoDiff and MerginProject objects + related to logging - and untangling those would need some extra calls when we are done + with MerginProject. But since we use the object all the time, it's better to cache it anyway.) + """ + if work_path not in cached_mergin_project_objects: + cached_mergin_project_objects[work_path] = MerginProject(work_path) + return cached_mergin_project_objects[work_path] + + def _get_project_version(work_path): """ Returns the current version of the project """ - mp = MerginProject(work_path) + mp = _get_mergin_project(work_path) return mp.metadata["version"] @@ -234,7 +253,7 @@ def pull(conn_cfg, mc): _check_has_working_dir(work_dir) _check_has_sync_file(gpkg_full_path) - mp = MerginProject(work_dir) + mp = _get_mergin_project(work_dir) mp.set_tables_to_skip(ignored_tables) if mp.geodiff is None: raise DbSyncError("Mergin Maps client installation problem: geodiff not available") @@ -322,7 +341,7 @@ def status(conn_cfg, mc): _check_has_sync_file(gpkg_full_path) # get basic information - mp = MerginProject(work_dir) + mp = _get_mergin_project(work_dir) mp.set_tables_to_skip(ignored_tables) if mp.geodiff is None: raise DbSyncError("Mergin Maps client installation problem: geodiff not available") @@ -394,7 +413,7 @@ def push(conn_cfg, mc): _check_has_working_dir(work_dir) _check_has_sync_file(gpkg_full_path) - mp = MerginProject(work_dir) + mp = _get_mergin_project(work_dir) mp.set_tables_to_skip(ignored_tables) if mp.geodiff is None: raise DbSyncError("Mergin Maps client installation problem: geodiff not available")