From 6350da26973e53fa82fde93423237c57983f9ef1 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Tue, 11 Feb 2025 09:40:32 +0800 Subject: [PATCH] [fix](external catalog) Fix missing fields when rebuilding metadata from image (#47603) Issue Number: close #xxx Related PR: #41510 Problem Summary: In PR #41510, we added several fields to External Catalog. However, we only handled the upgrade scenario for EditLog but not for Image. This causes Catalogs rebuilt from Image to miss these fields, resulting in NullPointerException during queries. This PR fixes this issue. Specifically: 1. Added null check and initialization for fields in gsonPostProcess 2. Ensured consistent behavior between EditLog replay and Image deserialization 3. Added proper logging for better troubleshooting --- .../doris/datasource/ExternalCatalog.java | 23 ++++++++++-- .../doris/datasource/ExternalDatabase.java | 36 +++++++++++++++++-- .../doris/datasource/InitCatalogLog.java | 7 +++- .../doris/datasource/InitDatabaseLog.java | 7 +++- 4 files changed, 66 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 778c110df0bfac..90e64f8b377bf8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -388,7 +388,7 @@ private void init() { db.setRemoteName(remoteDbName); } tmpIdToDb.put(dbId, db); - initCatalogLog.addRefreshDb(dbId); + initCatalogLog.addRefreshDb(dbId, remoteDbName); } else { dbId = Env.getCurrentEnv().getNextId(); tmpDbNameToId.put(localDbName, dbId); @@ -726,8 +726,12 @@ public void replayInitCatalog(InitCatalogLog log) { if (LOG.isDebugEnabled()) { LOG.debug("replay init external catalog[{}]: {}", name, log); } - // If the remote name is missing during upgrade, all databases in the Map will be reinitialized. - if (log.getCreateCount() > 0 && (log.getRemoteDbNames() == null || log.getRemoteDbNames().isEmpty())) { + // If the remote name is missing during upgrade, or + // the refresh db's remote name is empty, + // all databases in the Map will be reinitialized. + if ((log.getCreateCount() > 0 && (log.getRemoteDbNames() == null || log.getRemoteDbNames().isEmpty())) + || (log.getRefreshCount() > 0 + && (log.getRefreshRemoteDbNames() == null || log.getRefreshRemoteDbNames().isEmpty()))) { dbNameToId = Maps.newConcurrentMap(); idToDb = Maps.newConcurrentMap(); lastUpdateTime = log.getLastUpdateTime(); @@ -747,6 +751,7 @@ public void replayInitCatalog(InitCatalogLog log) { log.getRefreshDbIds().get(i), name); continue; } + db.get().setRemoteName(log.getRefreshRemoteDbNames().get(i)); Preconditions.checkNotNull(db.get()); tmpDbNameToId.put(db.get().getFullName(), db.get().getId()); tmpIdToDb.put(db.get().getId(), db.get()); @@ -763,6 +768,18 @@ public void replayInitCatalog(InitCatalogLog log) { db.getFullName(), db.getId(), log.getRemoteDbNames().get(i)); } } + // Check whether the remoteName of db in tmpIdToDb is empty + for (ExternalDatabase db : tmpIdToDb.values()) { + if (Strings.isNullOrEmpty(db.getRemoteName())) { + LOG.info("Database [{}] remoteName is empty in catalog [{}], mark as uninitialized", + db.getFullName(), name); + dbNameToId = Maps.newConcurrentMap(); + idToDb = Maps.newConcurrentMap(); + lastUpdateTime = log.getLastUpdateTime(); + initialized = false; + return; + } + } dbNameToId = tmpDbNameToId; idToDb = tmpIdToDb; lastUpdateTime = log.getLastUpdateTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java index 9041f4d39ad44d..a8e527bbc2ab15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java @@ -194,7 +194,9 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) { LOG.debug("replay init external db[{}.{}]: {}", name, catalog.getName(), log); } // If the remote name is missing during upgrade, all tables in the Map will be reinitialized. - if (log.getCreateCount() > 0 && (log.getRemoteTableNames() == null || log.getRemoteTableNames().isEmpty())) { + if ((log.getCreateCount() > 0 && (log.getRemoteTableNames() == null || log.getRemoteTableNames().isEmpty())) + || (log.getRefreshCount() > 0 + && (log.getRefreshRemoteTableNames() == null || log.getRefreshRemoteTableNames().isEmpty()))) { tableNameToId = Maps.newConcurrentMap(); idToTbl = Maps.newConcurrentMap(); lastUpdateTime = log.getLastUpdateTime(); @@ -212,6 +214,7 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) { // So we need add a validation here to avoid table(s) not found, this is just a temporary solution // because later we will remove all the logics about InitCatalogLog/InitDatabaseLog. if (table.isPresent()) { + table.get().setRemoteName(log.getRefreshRemoteTableNames().get(i)); tmpTableNameToId.put(table.get().getName(), table.get().getId()); tmpIdToTbl.put(table.get().getId(), table.get()); @@ -237,6 +240,19 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) { LOG.info("Synchronized table (create): [Name: {}, ID: {}, Remote Name: {}]", table.getName(), table.getId(), log.getRemoteTableNames().get(i)); } + // Check whether the remoteName and db Tbl db in idToTbl is empty + for (T table : idToTbl.values()) { + if (Strings.isNullOrEmpty(table.getRemoteName()) + || table.getDb() == null) { + LOG.info("Table [{}] remoteName or database is empty, mark as uninitialized", + table.getName()); + tableNameToId = Maps.newConcurrentMap(); + idToTbl = Maps.newConcurrentMap(); + lastUpdateTime = log.getLastUpdateTime(); + initialized = false; + return; + } + } tableNameToId = tmpTableNameToId; idToTbl = tmpIdToTbl; lastUpdateTime = log.getLastUpdateTime(); @@ -272,7 +288,7 @@ private void init() { table.setDb(this); } tmpIdToTbl.put(tblId, table); - initDatabaseLog.addRefreshTable(tblId); + initDatabaseLog.addRefreshTable(tblId, remoteTableName); } else { tblId = Env.getCurrentEnv().getNextId(); tmpTableNameToId.put(localTableName, tblId); @@ -629,14 +645,22 @@ public void gsonPostProcess() throws IOException { case "ExternalInfoSchemaTable": ExternalInfoSchemaTable infoSchemaTable = GsonUtils.GSON.fromJson(GsonUtils.GSON.toJson(obj), ExternalInfoSchemaTable.class); + if (infoSchemaTable.getDb() == null) { + infoSchemaTable.setDb(this); + } tmpIdToTbl.put(infoSchemaTable.getId(), (T) infoSchemaTable); tableNameToId.put(infoSchemaTable.getName(), infoSchemaTable.getId()); + lowerCaseToTableName.put(infoSchemaTable.getName().toLowerCase(), infoSchemaTable.getName()); break; case "ExternalMysqlTable": ExternalMysqlTable mysqlTable = GsonUtils.GSON.fromJson(GsonUtils.GSON.toJson(obj), ExternalMysqlTable.class); + if (mysqlTable.getDb() == null) { + mysqlTable.setDb(this); + } tmpIdToTbl.put(mysqlTable.getId(), (T) mysqlTable); tableNameToId.put(mysqlTable.getName(), mysqlTable.getId()); + lowerCaseToTableName.put(mysqlTable.getName().toLowerCase(), mysqlTable.getName()); break; default: break; @@ -649,6 +673,14 @@ public void gsonPostProcess() throws IOException { ((ExternalTable) obj).getName()); } } + // Check whether the remoteName and db Tbl db in idToTbl is empty + for (T table : idToTbl.values()) { + if (Strings.isNullOrEmpty(table.getRemoteName()) + || table.getDb() == null) { + initialized = false; + break; + } + } idToTbl = tmpIdToTbl; rwLock = new MonitoredReentrantReadWriteLock(true); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InitCatalogLog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InitCatalogLog.java index 50f713ce407fc0..f5c797574cc52f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InitCatalogLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InitCatalogLog.java @@ -56,6 +56,9 @@ public enum Type { @SerializedName(value = "refreshDbIds") private List refreshDbIds; + @SerializedName(value = "refreshRemoteDbNames") + private List refreshRemoteDbNames; + @SerializedName(value = "createDbIds") private List createDbIds; @@ -76,15 +79,17 @@ public InitCatalogLog() { createCount = 0; catalogId = 0; refreshDbIds = Lists.newArrayList(); + refreshRemoteDbNames = Lists.newArrayList(); createDbIds = Lists.newArrayList(); createDbNames = Lists.newArrayList(); remoteDbNames = Lists.newArrayList(); type = Type.UNKNOWN; } - public void addRefreshDb(long id) { + public void addRefreshDb(long id, String remoteName) { refreshCount += 1; refreshDbIds.add(id); + refreshRemoteDbNames.add(remoteName); } public void addCreateDb(long id, String name, String remoteName) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InitDatabaseLog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InitDatabaseLog.java index 6284637d05d225..3fabca92052c63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InitDatabaseLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InitDatabaseLog.java @@ -60,6 +60,9 @@ public enum Type { @SerializedName(value = "refreshTableIds") private List refreshTableIds; + @SerializedName(value = "refreshRemoteTableNames") + private List refreshRemoteTableNames; + @SerializedName(value = "createTableIds") private List createTableIds; @@ -81,15 +84,17 @@ public InitDatabaseLog() { catalogId = 0; dbId = 0; refreshTableIds = Lists.newArrayList(); + refreshRemoteTableNames = Lists.newArrayList(); createTableIds = Lists.newArrayList(); createTableNames = Lists.newArrayList(); remoteTableNames = Lists.newArrayList(); type = Type.UNKNOWN; } - public void addRefreshTable(long id) { + public void addRefreshTable(long id, String remoteName) { refreshCount += 1; refreshTableIds.add(id); + refreshRemoteTableNames.add(remoteName); } public void addCreateTable(long id, String name, String remoteName) {