diff --git a/dbsync.py b/dbsync.py index fe90a10..20a6a1d 100644 --- a/dbsync.py +++ b/dbsync.py @@ -63,6 +63,12 @@ def _check_has_sync_file(file_path): raise DbSyncError("The output GPKG file does not exist: " + file_path) +def _drop_schema(conn, schema_name: str) -> None: + cur = conn.cursor() + cur.execute(sql.SQL("DROP SCHEMA IF EXIST {} CASCADE").format(sql.Identifier(schema_name))) + conn.commit() + + def _check_schema_exists(conn, schema_name): cur = conn.cursor() cur.execute("SELECT EXISTS(SELECT 1 FROM pg_namespace WHERE nspname = %s)", (schema_name,)) @@ -595,7 +601,8 @@ def init(conn_cfg, mc, from_gpkg=True): # this is not a first run of db-sync init db_proj_info = _get_db_project_comment(conn, conn_cfg.base) if not db_proj_info: - raise DbSyncError("Base schema exists but missing which project it belongs to") + raise DbSyncError("Base schema exists but missing which project it belongs to. " + f"This may be a result of a previously failed attempt to initialize DB sync. You can delete both schemas `{conn_cfg.base}` and `{conn_cfg.modified}` to fix this error and restart DB sync.") if "error" in db_proj_info: changes_gpkg_base = _compare_datasets("sqlite", "", gpkg_full_path, conn_cfg.driver, conn_cfg.conn_info, conn_cfg.base, ignored_tables, @@ -671,9 +678,11 @@ def init(conn_cfg, mc, from_gpkg=True): print("The GPKG file, base and modified schemas are already initialized and in sync") return # nothing to do elif modified_schema_exists: - raise DbSyncError(f"The 'modified' schema exists but the base schema is missing: {conn_cfg.base}") + raise DbSyncError(f"The 'modified' schema exists but the base schema is missing: {conn_cfg.base}. " + f"This may be a result of a previously failed attempt to initialize DB sync. You can delete schema `{conn_cfg.modified}` in the database to fix this error and restart DB sync.") elif base_schema_exists: - raise DbSyncError(f"The base schema exists but the modified schema is missing: {conn_cfg.modified}") + raise DbSyncError(f"The base schema exists but the modified schema is missing: {conn_cfg.modified}. " + f"This may be a result of a previously failed attempt to initialize DB sync. You can delete schema `{conn_cfg.base}` in the database to fix this error and restart DB sync.") # initialize: we have an existing GeoPackage in our Mergin Maps project and we want to initialize database print("The base and modified schemas do not exist yet, going to initialize them ...") @@ -699,22 +708,23 @@ def init(conn_cfg, mc, from_gpkg=True): raise DbSyncError('Initialization of db-sync failed due to a bug in geodiff.\n ' 'Please report this problem to mergin-db-sync developers') except DbSyncError: - # add comment to base schema before throwing exception - _set_db_project_comment(conn, conn_cfg.base, conn_cfg.mergin_project, local_version, - error='Initialization of db-sync failed due to a bug in geodiff') + print(f"Cleaning up after a failed DB sync init - dropping schemas {conn_cfg.base} and {conn_cfg.modified}.") + _drop_schema(conn_cfg.base) + _drop_schema(conn_cfg.modified) raise _set_db_project_comment(conn, conn_cfg.base, conn_cfg.mergin_project, local_version) else: if not modified_schema_exists: - raise DbSyncError("The 'modified' schema does not exist: " + conn_cfg.modified) + raise DbSyncError(f"The 'modified' schema does not exist: {conn_cfg.modified}. " + "This schema is necessary if initialization should be done from database (parameter `init-from-db`).") if os.path.exists(gpkg_full_path) and base_schema_exists: # make sure output gpkg is in sync with db or fail summary_modified = _compare_datasets(conn_cfg.driver, conn_cfg.conn_info, conn_cfg.modified, - "sqlite", "", gpkg_full_path, ignored_tables) + "sqlite", "", gpkg_full_path, ignored_tables) summary_base = _compare_datasets(conn_cfg.driver, conn_cfg.conn_info, conn_cfg.base, - "sqlite", "", gpkg_full_path, ignored_tables) + "sqlite", "", gpkg_full_path, ignored_tables) if len(summary_base): print(f"Local project version at {_get_project_version(work_dir)} and base schema at {db_proj_info['version']}") _print_changes_summary(summary_base, "Base schema changes:") @@ -755,8 +765,8 @@ def init(conn_cfg, mc, from_gpkg=True): raise DbSyncError('Initialization of db-sync failed due to a bug in geodiff.\n ' 'Please report this problem to mergin-db-sync developers') except DbSyncError: - _set_db_project_comment(conn, conn_cfg.base, conn_cfg.mergin_project, local_version, - error='Initialization of db-sync failed due to a bug in geodiff') + print(f"Cleaning up after a failed DB sync init - dropping schema {conn_cfg.base}.") + _drop_schema(conn_cfg.base) raise # upload gpkg to Mergin Maps (client takes care of storing metadata) diff --git a/test/test_basic.py b/test/test_basic.py index 7bd92f3..0cf5555 100644 --- a/test/test_basic.py +++ b/test/test_basic.py @@ -121,16 +121,6 @@ def test_init_from_gpkg(mc: MerginClient): assert db_proj_info["name"] == config.connections[0].mergin_project assert db_proj_info["version"] == 'v1' - # rename base schema to mimic some mismatch - cur.execute(sql.SQL("ALTER SCHEMA {} RENAME TO schema_tmp").format(sql.Identifier(db_schema_base)).as_string(conn)) - conn.commit() - with pytest.raises(DbSyncError) as err: - dbsync_init(mc) - assert "The 'modified' schema exists but the base schema is missing" in str(err.value) - # and revert back - cur.execute(sql.SQL("ALTER SCHEMA schema_tmp RENAME TO {}").format(sql.Identifier(db_schema_base)).as_string(conn)) - conn.commit() - # make change in GPKG and push to server to create pending changes, it should pass but not sync shutil.copy(os.path.join(TEST_DATA_DIR, 'inserted_1_A.gpkg'), os.path.join(project_dir, 'test_sync.gpkg')) mc.push_project(project_dir) @@ -440,6 +430,7 @@ def test_recreated_project_ids(mc: MerginClient): with pytest.raises(DbSyncError): dbsync_status(mc) + @pytest.mark.parametrize("project_name", ['test_init_1', 'Test_Init_2', "Test 3", "Test-4"]) def test_project_names(mc: MerginClient, project_name: str): source_gpkg_path = os.path.join(TEST_DATA_DIR, 'base.gpkg') @@ -487,3 +478,74 @@ def test_project_names(mc: MerginClient, project_name: str): assert gpkg_cur.fetchone()[3] == 100 db_proj_info = _get_db_project_comment(conn, db_schema_base) assert db_proj_info["version"] == 'v3' + + +def test_init_from_gpkg_missing_schema(mc: MerginClient): + source_gpkg_path = os.path.join(TEST_DATA_DIR, 'base.gpkg') + project_name = "test_init_missing_schema" + db_schema_base = project_name + "_base" + db_schema_main = project_name + "_main" + + init_sync_from_geopackage(mc, project_name, source_gpkg_path) + + conn = psycopg2.connect(DB_CONNINFO) + cur = conn.cursor() + + # drop base schema to mimic some mismatch + cur.execute(sql.SQL("DROP SCHEMA {} CASCADE").format(sql.Identifier(db_schema_base))) + conn.commit() + + # check that removed schema does not exists + cur.execute(f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{db_schema_base}'") + cur.fetchone() is None + + with pytest.raises(DbSyncError) as err: + dbsync_init(mc) + assert "The 'modified' schema exists but the base schema is missing" in str(err.value) + assert "This may be a result of a previously failed attempt to initialize DB sync" in str(err.value) + + init_sync_from_geopackage(mc, project_name, source_gpkg_path) + + # drop main schema to mimic some mismatch + cur.execute(sql.SQL("DROP SCHEMA {} CASCADE").format(sql.Identifier(db_schema_main))) + conn.commit() + + # check that removed schema does not exists + cur.execute(f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{db_schema_main}'") + cur.fetchone() is None + + with pytest.raises(DbSyncError) as err: + dbsync_init(mc) + assert "The base schema exists but the modified schema is missing" in str(err.value) + assert "This may be a result of a previously failed attempt to initialize DB sync" in str(err.value) + + +def test_init_from_gpkg_missing_comment(mc: MerginClient): + project_name = "test_init_missing_comment" + source_gpkg_path = os.path.join(TEST_DATA_DIR, 'base.gpkg') + schema_name = project_name + "_base" + + init_sync_from_geopackage(mc, project_name, source_gpkg_path) + + conn = psycopg2.connect(DB_CONNINFO) + cur = conn.cursor() + + # sql query for schema + sql_cmd = f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{schema_name}'" + + # check that schema exists + cur.execute(sql_cmd) + cur.fetchone()[0] == schema_name + + # drop base schema to mimic some mismatch + query = sql.SQL("COMMENT ON SCHEMA {} IS %s").format(sql.Identifier(schema_name)) + cur.execute(query.as_string(conn), ("",)) + conn.commit() + + with pytest.raises(DbSyncError) as err: + dbsync_init(mc) + assert "Base schema exists but missing which project it belongs to" in str(err.value) + + # check that schema does not exists anymore + cur.execute(sql_cmd) + cur.fetchone() is None diff --git a/test/test_config.py b/test/test_config.py index 6b9ac24..6c34a69 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -57,10 +57,10 @@ def test_config(): _reset_config() with pytest.raises(ConfigError, match="Config error: Only 'postgres' driver is currently supported."): - config.update({'CONNECTIONS': [{"driver": "oracle", "conn_info": "", "modified": "mergin_main", "base": "mergin_base", "mergin_project": "john/dbsync", "sync_file": "sync.gpkg", "init_from": "gpkg"}]}) + config.update({'CONNECTIONS': [{"driver": "oracle", "conn_info": "", "modified": "mergin_main", "base": "mergin_base", "mergin_project": "john/dbsync", "sync_file": "sync.gpkg"}]}) validate_config(config) _reset_config() with pytest.raises(ConfigError, match="Config error: Name of the Mergin Maps project should be provided in the namespace/name format."): - config.update({'CONNECTIONS': [{"driver": "postgres", "conn_info": "", "modified": "mergin_main", "base": "mergin_base", "mergin_project": "dbsync", "sync_file": "sync.gpkg", "init_from": "gpkg"}]}) + config.update({'CONNECTIONS': [{"driver": "postgres", "conn_info": "", "modified": "mergin_main", "base": "mergin_base", "mergin_project": "dbsync", "sync_file": "sync.gpkg"}]}) validate_config(config)