Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 27 additions & 53 deletions fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@
import org.apache.doris.catalog.View;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.MetaNotFoundException;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.DynamicPartitionUtil;
Expand Down Expand Up @@ -99,32 +97,25 @@ public void processCreateMaterializedView(CreateMaterializedViewStmt stmt)
String tableName = stmt.getBaseIndexName();
// check db
String dbName = stmt.getDBName();
Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportDdlException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
Database db = Catalog.getCurrentCatalog().getDbOrDdlException(dbName);
// check cluster capacity
Catalog.getCurrentSystemInfo().checkClusterCapacity(stmt.getClusterName());
// check db quota
db.checkQuota();

OlapTable olapTable = (OlapTable) db.getTableOrThrowException(tableName, TableType.OLAP);
((MaterializedViewHandler)materializedViewHandler).processCreateMaterializedView(stmt, db,
olapTable);
OlapTable olapTable = db.getTableOrMetaException(tableName, TableType.OLAP);
((MaterializedViewHandler) materializedViewHandler).processCreateMaterializedView(stmt, db, olapTable);
}

public void processDropMaterializedView(DropMaterializedViewStmt stmt) throws DdlException, MetaNotFoundException {
// check db
String dbName = stmt.getTableName().getDb();
Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportDdlException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
Database db = Catalog.getCurrentCatalog().getDbOrDdlException(dbName);

String tableName = stmt.getTableName().getTbl();
OlapTable olapTable = (OlapTable) db.getTableOrThrowException(tableName, TableType.OLAP);
OlapTable olapTable = db.getTableOrMetaException(tableName, TableType.OLAP);
// drop materialized view
((MaterializedViewHandler)materializedViewHandler).processDropMaterializedView(stmt, db, olapTable);
((MaterializedViewHandler) materializedViewHandler).processDropMaterializedView(stmt, db, olapTable);
}

private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, List<AlterClause> alterClauses,
Expand Down Expand Up @@ -260,17 +251,11 @@ private void processModifyColumnComment(Database db, OlapTable tbl, List<AlterCl
}
}

public void replayModifyComment(ModifyCommentOperationLog operation) {
public void replayModifyComment(ModifyCommentOperationLog operation) throws MetaNotFoundException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a special Exception to handle those "replay" logic.
I am worried that MetaNotFoundException will be thrown elsewhere in the replay logic, but it may be unexpected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is considerable. But "replay" throwing any exception is an extremely big risk, which will cause all FE crush and cannot recover. These MetaNotFoundException are mostly thrown by getDb and getTable, due to the lock inconsistence that makes editlogs out of order. Semantically, throwing this exception means some metadata the editlog want to edit on is missing during replay, which makes this replay unable to continue. Just like getDb/Table, This kind of inconsistence cannot recover anyway, and these editlogs are theoretically only affect lost metadata.
I prefer to use MetaNotFoundException that indicate metadata has lost rather than other exception which may cause confuse. The inconsistence can be checked from warning log.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ccoffline how about check db or table state after getting db or table lock , so that the edit log will always keep in order? we don't need some much check code?

Copy link
Contributor Author

@ccoffline ccoffline Aug 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@caiconghui we don't have any promise that the edit logs are in order. These check code is to prevent the worst from happening.
Edit logs that out of order may cause meta inconsistent, which has to be fixed sooner or later. We are exploring ways to ensure consistency and minimize the cost. Until then, we have to check all NPE or let all the FE to crash.

Copy link
Contributor

@caiconghui caiconghui Aug 31, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ccoffline so we only need to simply check null for getdb and get table in replay operation and just simply skip the following operation? now, the only way that fe may crash is with that replay operation when fe restart?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it better to split this PR into two PRs ? one for code refactor and another for db and table null check, I think it is easy to review and not-error-prone when PR is not so big? @ccoffline

long dbId = operation.getDbId();
long tblId = operation.getTblId();
Database db = Catalog.getCurrentCatalog().getDb(dbId);
if (db == null) {
return;
}
Table tbl = db.getTable(tblId);
if (tbl == null) {
return;
}
Database db = Catalog.getCurrentCatalog().getDbOrMetaException(dbId);
Table tbl = db.getTableOrMetaException(tblId);
tbl.writeLock();
try {
ModifyCommentOperationLog.Type type = operation.getType();
Expand Down Expand Up @@ -311,14 +296,8 @@ public void processAlterTable(AlterTableStmt stmt) throws UserException {
String tableName = dbTableName.getTbl();
final String clusterName = stmt.getClusterName();

Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportDdlException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
Table table = db.getTable(tableName);
if (table == null) {
ErrorReport.reportDdlException(ErrorCode.ERR_BAD_TABLE_ERROR, tableName);
}
Database db = Catalog.getCurrentCatalog().getDbOrDdlException(dbName);
Table table = db.getTableOrDdlException(tableName);
List<AlterClause> alterClauses = Lists.newArrayList();
// some operations will take long time to process, need to be done outside the table lock
boolean needProcessOutsideTableLock = false;
Expand All @@ -342,7 +321,7 @@ public void processAlterTable(AlterTableStmt stmt) throws UserException {
AlterClause alterClause = alterClauses.get(0);
if (alterClause instanceof AddPartitionClause) {
if (!((AddPartitionClause) alterClause).isTempPartition()) {
DynamicPartitionUtil.checkAlterAllowed((OlapTable) db.getTable(tableName));
DynamicPartitionUtil.checkAlterAllowed((OlapTable) db.getTableOrMetaException(tableName, TableType.OLAP));
}
Catalog.getCurrentCatalog().addPartition(db, tableName, (AddPartitionClause) alterClause);
} else if (alterClause instanceof ModifyPartitionClause) {
Expand Down Expand Up @@ -376,7 +355,7 @@ private void processReplaceTable(Database db, OlapTable origTable, List<AlterCla
ReplaceTableClause clause = (ReplaceTableClause) alterClauses.get(0);
String newTblName = clause.getTblName();
boolean swapTable = clause.isSwapTable();
Table newTbl = db.getTableOrThrowException(newTblName, TableType.OLAP);
Table newTbl = db.getTableOrMetaException(newTblName, TableType.OLAP);
OlapTable olapNewTbl = (OlapTable) newTbl;
db.writeLock();
origTable.writeLock();
Expand All @@ -399,13 +378,14 @@ private void processReplaceTable(Database db, OlapTable origTable, List<AlterCla

}

public void replayReplaceTable(ReplaceTableOperationLog log) {
public void replayReplaceTable(ReplaceTableOperationLog log) throws MetaNotFoundException {
long dbId = log.getDbId();
long origTblId = log.getOrigTblId();
long newTblId = log.getNewTblId();
Database db = Catalog.getCurrentCatalog().getDb(dbId);
OlapTable origTable = (OlapTable) db.getTable(origTblId);
OlapTable newTbl = (OlapTable) db.getTable(newTblId);

Database db = Catalog.getCurrentCatalog().getDbOrMetaException(dbId);
OlapTable origTable = db.getTableOrMetaException(origTblId, TableType.OLAP);
OlapTable newTbl = db.getTableOrMetaException(newTblId, TableType.OLAP);

try {
replaceTableInternal(db, origTable, newTbl, log.isSwapTable(), true);
Expand Down Expand Up @@ -457,13 +437,10 @@ public void processAlterView(AlterViewStmt stmt, ConnectContext ctx) throws User
TableName dbTableName = stmt.getTbl();
String dbName = dbTableName.getDb();

Database db = Catalog.getCurrentCatalog().getDb(dbName);
if (db == null) {
ErrorReport.reportDdlException(ErrorCode.ERR_BAD_DB_ERROR, dbName);
}
Database db = Catalog.getCurrentCatalog().getDbOrDdlException(dbName);

String tableName = dbTableName.getTbl();
View view = (View) db.getTableOrThrowException(tableName, TableType.VIEW);
View view = db.getTableOrMetaException(tableName, TableType.VIEW);
modifyViewDef(db, view, stmt.getInlineViewDef(), ctx.getSessionVariable().getSqlMode(), stmt.getColumns());
}

Expand Down Expand Up @@ -491,14 +468,15 @@ private void modifyViewDef(Database db, View view, String inlineViewDef, long sq
}
}

public void replayModifyViewDef(AlterViewInfo alterViewInfo) throws DdlException {
public void replayModifyViewDef(AlterViewInfo alterViewInfo) throws MetaNotFoundException, DdlException {
long dbId = alterViewInfo.getDbId();
long tableId = alterViewInfo.getTableId();
String inlineViewDef = alterViewInfo.getInlineViewDef();
List<Column> newFullSchema = alterViewInfo.getNewFullSchema();

Database db = Catalog.getCurrentCatalog().getDb(dbId);
View view = (View) db.getTable(tableId);
Database db = Catalog.getCurrentCatalog().getDbOrMetaException(dbId);
View view = db.getTableOrMetaException(tableId, TableType.VIEW);

db.writeLock();
view.writeLock();
try {
Expand Down Expand Up @@ -631,13 +609,9 @@ public void modifyPartitionsProperty(Database db,
Catalog.getCurrentCatalog().getEditLog().logBatchModifyPartition(info);
}

public void replayModifyPartition(ModifyPartitionInfo info) {
Database db = Catalog.getCurrentCatalog().getDb(info.getDbId());
OlapTable olapTable = (OlapTable) db.getTable(info.getTableId());
if (olapTable == null) {
LOG.warn("table {} does not eixst when replaying modify partition. db: {}", info.getTableId(), info.getDbId());
return;
}
public void replayModifyPartition(ModifyPartitionInfo info) throws MetaNotFoundException {
Database db = Catalog.getCurrentCatalog().getDbOrMetaException(info.getDbId());
OlapTable olapTable = db.getTableOrMetaException(info.getTableId(), TableType.OLAP);
olapTable.writeLock();
try {
PartitionInfo partitionInfo = olapTable.getPartitionInfo();
Expand Down
61 changes: 33 additions & 28 deletions fe/fe-core/src/main/java/org/apache/doris/alter/AlterHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -306,41 +306,49 @@ protected void jobDone(AlterJob alterJob) {
}
}

public void replayInitJob(AlterJob alterJob, Catalog catalog) {
Database db = catalog.getDb(alterJob.getDbId());
alterJob.replayInitJob(db);
// add rollup job
addAlterJob(alterJob);
public void replayInitJob(AlterJob alterJob, Catalog catalog) throws MetaNotFoundException {
try {
Database db = catalog.getDbOrMetaException(alterJob.getDbId());
alterJob.replayInitJob(db);
} finally {
// add rollup job
addAlterJob(alterJob);
}
}

public void replayFinishing(AlterJob alterJob, Catalog catalog) {
Database db = catalog.getDb(alterJob.getDbId());
alterJob.replayFinishing(db);
alterJob.setState(JobState.FINISHING);
// !!! the alter job should add to the cache again, because the alter job is deserialized from journal
// it is a different object compared to the cache
addAlterJob(alterJob);
public void replayFinishing(AlterJob alterJob, Catalog catalog) throws MetaNotFoundException {
try {
Database db = catalog.getDbOrMetaException(alterJob.getDbId());
alterJob.replayFinishing(db);
} finally {
alterJob.setState(JobState.FINISHING);
// !!! the alter job should add to the cache again, because the alter job is deserialized from journal
// it is a different object compared to the cache
addAlterJob(alterJob);
}
}

public void replayFinish(AlterJob alterJob, Catalog catalog) {
Database db = catalog.getDb(alterJob.getDbId());
alterJob.replayFinish(db);
alterJob.setState(JobState.FINISHED);

jobDone(alterJob);
public void replayFinish(AlterJob alterJob, Catalog catalog) throws MetaNotFoundException {
try {
Database db = catalog.getDbOrMetaException(alterJob.getDbId());
alterJob.replayFinish(db);
} finally {
alterJob.setState(JobState.FINISHED);
jobDone(alterJob);
}
}

public void replayCancel(AlterJob alterJob, Catalog catalog) {
public void replayCancel(AlterJob alterJob, Catalog catalog) throws MetaNotFoundException {
removeAlterJob(alterJob.getTableId());
alterJob.setState(JobState.CANCELLED);
Database db = catalog.getDb(alterJob.getDbId());
if (db != null) {
try {
// we log rollup job cancelled even if db is dropped.
// so check db != null here
Database db = catalog.getDbOrMetaException(alterJob.getDbId());
alterJob.replayCancel(db);
} finally {
addFinishedOrCancelledAlterJob(alterJob);
}

addFinishedOrCancelledAlterJob(alterJob);
}

@Override
Expand Down Expand Up @@ -409,12 +417,9 @@ public Integer getAlterJobNumByState(JobState state) {
* In summary, we only need to update replica's version when replica's version is smaller than X
*/
public void handleFinishAlterTask(AlterReplicaTask task) throws MetaNotFoundException {
Database db = Catalog.getCurrentCatalog().getDb(task.getDbId());
if (db == null) {
throw new MetaNotFoundException("database " + task.getDbId() + " does not exist");
}
Database db = Catalog.getCurrentCatalog().getDbOrMetaException(task.getDbId());

OlapTable tbl = (OlapTable) db.getTableOrThrowException(task.getTableId(), Table.TableType.OLAP);
OlapTable tbl = db.getTableOrMetaException(task.getTableId(), Table.TableType.OLAP);
tbl.writeLock();
try {
Partition partition = tbl.getPartition(task.getPartitionId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,9 @@ public synchronized final boolean cancel(String errMsg) {
* return false if table is not stable.
*/
protected boolean checkTableStable(Database db) throws AlterCancelException {
OlapTable tbl = null;
OlapTable tbl;
try {
tbl = (OlapTable) db.getTableOrThrowException(tableId, Table.TableType.OLAP);
tbl = db.getTableOrMetaException(tableId, Table.TableType.OLAP);
} catch (MetaNotFoundException e) {
throw new AlterCancelException(e.getMessage());
}
Expand All @@ -206,7 +206,7 @@ protected boolean checkTableStable(Database db) throws AlterCancelException {
return false;
} else {
// table is stable, set is to ROLLUP and begin altering.
LOG.info("table {} is stable, start {} job {}", tableId, type);
LOG.info("table {} is stable, start {} job {}", tableId, type, jobId);
tbl.setState(type == JobType.ROLLUP ? OlapTableState.ROLLUP : OlapTableState.SCHEMA_CHANGE);
errMsg = "";
return true;
Expand Down
Loading