diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index 3178f05e348d6c..36e2ee88bea429 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -1950,7 +1950,8 @@ public long loadSqlBlockRule(DataInputStream in, long checksum) throws IOExcepti } // Only called by checkpoint thread - public void saveImage() throws IOException { + // return the latest image file's absolute path + public String saveImage() throws IOException { // Write image.ckpt Storage storage = new Storage(this.imageDir); File curFile = storage.getImageFile(replayedJournalId.get()); @@ -1963,6 +1964,7 @@ public void saveImage() throws IOException { curFile.delete(); throw new IOException(); } + return curFile.getAbsolutePath(); } public void saveImage(File curFile, long replayedJournalId) throws IOException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java index 69efd618f85f99..be433fe4c786a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/Checkpoint.java @@ -35,6 +35,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import com.google.common.base.Strings; + import java.io.IOException; import java.io.OutputStream; import java.net.HttpURLConnection; @@ -111,6 +113,8 @@ public synchronized void doCheckpoint() { catalog = Catalog.getCurrentCatalog(); catalog.setEditLog(editLog); createStaticFieldForCkpt(); + boolean exceptionCaught = false; + String latestImageFilePath = null; try { catalog.loadImage(imageDir); catalog.replayJournal(checkPointVersion); @@ -119,13 +123,25 @@ public synchronized void doCheckpoint() { checkPointVersion, catalog.getReplayedJournalId())); } catalog.fixBugAfterMetadataReplayed(false); - catalog.saveImage(); + latestImageFilePath = catalog.saveImage(); replayedJournalId = catalog.getReplayedJournalId(); + + // destroy checkpoint catalog, reclaim memory + catalog = null; + Catalog.destroyCheckpoint(); + destroyStaticFieldForCkpt(); + + // Load image to verify if the newly generated image file is valid + // If success, do all the following jobs + // If failed, just return + catalog = Catalog.getCurrentCatalog(); + catalog.loadImage(imageDir); if (MetricRepo.isInit) { MetricRepo.COUNTER_IMAGE_WRITE_SUCCESS.increase(1L); } LOG.info("checkpoint finished save image.{}", replayedJournalId); } catch (Throwable e) { + exceptionCaught = true; e.printStackTrace(); LOG.error("Exception when generate new image file", e); if (MetricRepo.isInit) { @@ -137,6 +153,22 @@ public synchronized void doCheckpoint() { catalog = null; Catalog.destroyCheckpoint(); destroyStaticFieldForCkpt(); + // if new image generated && exception caught, delete the latest image here + // delete the newest image file, cuz it is invalid + if ((!Strings.isNullOrEmpty(latestImageFilePath)) && exceptionCaught) { + MetaCleaner cleaner = new MetaCleaner(Config.meta_dir + "/image"); + try { + cleaner.cleanTheLatestInvalidImageFile(latestImageFilePath); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_IMAGE_CLEAN_SUCCESS.increase(1L); + } + } catch (Throwable ex) { + LOG.error("Master delete latest invalid image file failed.", ex); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_IMAGE_CLEAN_FAILED.increase(1L); + } + } + } } // push image file to all the other non master nodes diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java index 2c2b17a9410c13..6c69ebe995c34e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/MetaCleaner.java @@ -67,6 +67,17 @@ public void clean() throws IOException { } } } + + public void cleanTheLatestInvalidImageFile(String path) throws IOException { + File latestInvalidImage = new File(path); + if (latestInvalidImage.exists()) { + if (latestInvalidImage.delete()) { + LOG.info(latestInvalidImage.getAbsoluteFile() + " deleted."); + } else { + LOG.warn(latestInvalidImage.getAbsoluteFile() + " delete failed."); + } + } + } private String fileType(File file) throws IOException { String type = null;