diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 78318418de4b56..53a0d34ca89494 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -1023,13 +1023,13 @@ opt_enable_feature_properties ::= ; alter_system_clause ::= - KW_ADD KW_BACKEND string_list:hostPorts + KW_ADD KW_BACKEND string_list:hostPorts opt_properties:properties {: - RESULT = new AddBackendClause(hostPorts, false); + RESULT = new AddBackendClause(hostPorts, false, properties); :} | KW_ADD KW_FREE KW_BACKEND string_list:hostPorts {: - RESULT = new AddBackendClause(hostPorts, true); + RESULT = new AddBackendClause(hostPorts, true, Maps.newHashMap()); :} | KW_ADD KW_BACKEND KW_TO ident:clusterName string_list:hostPorts {: @@ -1080,7 +1080,11 @@ alter_system_clause ::= | KW_SET KW_LOAD KW_ERRORS KW_HUB opt_properties:properties {: RESULT = new AlterLoadErrorUrlClause(properties); - :} + :} + | KW_MODIFY KW_BACKEND string_list:hostPorts KW_SET LPAREN key_value_map:properties RPAREN + {: + RESULT = new ModifyBackendClause(hostPorts, properties); + :} ; // Sync Stmt diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index bfc6df1c3ca5ab..bc5a6af5c93bdc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -42,6 +42,7 @@ import org.apache.doris.catalog.OlapTable.OlapTableState; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.View; @@ -495,11 +496,10 @@ public void modifyPartitionsProperty(Database db, // get value from properties here // 1. data property - DataProperty newDataProperty = - PropertyAnalyzer.analyzeDataProperty(properties, null); - // 2. replication num - short newReplicationNum = - PropertyAnalyzer.analyzeReplicationNum(properties, (short) -1); + DataProperty newDataProperty = PropertyAnalyzer.analyzeDataProperty(properties, null); + // 2. replica allocation + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + Catalog.getCurrentSystemInfo().checkReplicaAllocation(db.getClusterName(), replicaAlloc); // 3. in memory boolean newInMemory = PropertyAnalyzer.analyzeBooleanProp(properties, PropertyAnalyzer.PROPERTIES_INMEMORY, false); @@ -515,9 +515,9 @@ public void modifyPartitionsProperty(Database db, if (newDataProperty != null) { partitionInfo.setDataProperty(partition.getId(), newDataProperty); } - // 2. replication num - if (newReplicationNum != (short) -1) { - partitionInfo.setReplicationNum(partition.getId(), newReplicationNum); + // 2. replica allocation + if (!replicaAlloc.isNotSet()) { + partitionInfo.setReplicaAllocation(partition.getId(), replicaAlloc); } // 3. in memory boolean oldInMemory = partitionInfo.getIsInMemory(partition.getId()); @@ -529,7 +529,7 @@ public void modifyPartitionsProperty(Database db, partitionInfo.setTabletType(partition.getId(), tTabletType); } ModifyPartitionInfo info = new ModifyPartitionInfo(db.getId(), olapTable.getId(), partition.getId(), - newDataProperty, newReplicationNum, hasInMemory ? newInMemory : oldInMemory); + newDataProperty, replicaAlloc, hasInMemory ? newInMemory : oldInMemory); modifyPartitionInfos.add(info); } @@ -551,8 +551,8 @@ public void replayModifyPartition(ModifyPartitionInfo info) { if (info.getDataProperty() != null) { partitionInfo.setDataProperty(info.getPartitionId(), info.getDataProperty()); } - if (info.getReplicationNum() != (short) -1) { - partitionInfo.setReplicationNum(info.getPartitionId(), info.getReplicationNum()); + if (!info.getReplicaAlloc().isNotSet()) { + partitionInfo.setReplicaAllocation(info.getPartitionId(), info.getReplicaAlloc()); } partitionInfo.setIsInMemory(info.getPartitionId(), info.isInMemory()); } finally { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java index 7fab577bb680d9..eeca9b6d60929b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java @@ -368,7 +368,7 @@ private RollupJobV2 createMaterializedViewJob(String mvName, String baseIndexNam MaterializedIndex mvIndex = new MaterializedIndex(mvIndexId, IndexState.SHADOW); MaterializedIndex baseIndex = partition.getIndex(baseIndexId); TabletMeta mvTabletMeta = new TabletMeta(dbId, tableId, partitionId, mvIndexId, mvSchemaHash, medium); - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId); + short replicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partitionId).getTotalReplicaNum(); for (Tablet baseTablet : baseIndex.getTablets()) { long baseTabletId = baseTablet.getId(); long mvTabletId = catalog.getNextId(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJob.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJob.java index 8a7830a2e4fffb..b661c5bf67cd4d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJob.java @@ -655,7 +655,7 @@ public int tryFinishJob() { continue; } - short expectReplicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + short expectReplicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); MaterializedIndex rollupIndex = entry.getValue(); for (Tablet rollupTablet : rollupIndex.getTablets()) { // yiguolei: the rollup tablet only contains the replica that is healthy at rollup time diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index 470d4c526919b0..09cd47b5205ef2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -449,7 +449,8 @@ protected void runRunningJob() throws AlterCancelException { long visiableVersion = partition.getVisibleVersion(); long visiableVersionHash = partition.getVisibleVersionHash(); - short expectReplicationNum = tbl.getPartitionInfo().getReplicationNum(partition.getId()); + short expectReplicationNum = tbl.getPartitionInfo().getReplicaAllocation(partitionId).getTotalReplicaNum(); + MaterializedIndex rollupIndex = entry.getValue(); for (Tablet rollupTablet : rollupIndex.getTablets()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index a1091929ad68f3..302df9fbb319b8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -51,6 +51,7 @@ import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletMeta; @@ -1318,7 +1319,8 @@ private void createJob(long dbId, OlapTable olapTable, Map alterClauses, String clusterName, Database } Catalog.getCurrentCatalog().modifyTableDynamicPartition(db, olapTable, properties); return; - } else if (properties.containsKey("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)) { - Preconditions.checkNotNull(properties.get(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)); - Catalog.getCurrentCatalog().modifyTableDefaultReplicationNum(db, olapTable, properties); + } else if (properties.containsKey("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)) { + Preconditions.checkNotNull(properties.get("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)); + Catalog.getCurrentCatalog().modifyTableDefaultReplicaAllocation(db, olapTable, properties); return; - } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)) { - Catalog.getCurrentCatalog().modifyTableReplicationNum(db, olapTable, properties); + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)) { + Catalog.getCurrentCatalog().modifyTableReplicaAllocation(db, olapTable, properties); return; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJob.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJob.java index feb442b042a3e2..e7ab6f4611237f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJob.java @@ -402,7 +402,7 @@ public boolean sendTasks() { List tasks = new LinkedList(); for (Partition partition : olapTable.getPartitions()) { long partitionId = partition.getId(); - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId); + short replicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partitionId).getTotalReplicaNum(); for (Long indexId : this.changedIndexIdToSchema.keySet()) { MaterializedIndex alterIndex = partition.getIndex(indexId); if (alterIndex == null) { @@ -683,7 +683,7 @@ public int tryFinishJob() { boolean hasUnfinishedPartition = false; for (Partition partition : olapTable.getPartitions()) { long partitionId = partition.getId(); - short expectReplicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + short expectReplicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); boolean hasUnfinishedIndex = false; for (long indexId : this.changedIndexIdToSchema.keySet()) { MaterializedIndex materializedIndex = partition.getIndex(indexId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index bd22ba2a2060e4..68093235f8a23a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -484,7 +484,7 @@ protected void runRunningJob() throws AlterCancelException { long visiableVersion = partition.getVisibleVersion(); long visiableVersionHash = partition.getVisibleVersionHash(); - short expectReplicationNum = tbl.getPartitionInfo().getReplicationNum(partition.getId()); + short expectReplicationNum = tbl.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); Map shadowIndexMap = partitionIndexMap.row(partitionId); for (Map.Entry entry : shadowIndexMap.entrySet()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SystemHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SystemHandler.java index 33ec4117e59738..f8a59bcfb723c8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SystemHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SystemHandler.java @@ -29,6 +29,7 @@ import org.apache.doris.analysis.DropBackendClause; import org.apache.doris.analysis.DropFollowerClause; import org.apache.doris.analysis.DropObserverClause; +import org.apache.doris.analysis.ModifyBackendClause; import org.apache.doris.analysis.ModifyBrokerClause; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; @@ -143,8 +144,8 @@ public synchronized void process(List alterClauses, String clusterN && Catalog.getCurrentCatalog().getCluster(destClusterName) == null) { throw new DdlException("Cluster: " + destClusterName + " does not exist."); } - Catalog.getCurrentSystemInfo().addBackends(addBackendClause.getHostPortPairs(), - addBackendClause.isFree(), addBackendClause.getDestCluster()); + Catalog.getCurrentSystemInfo().addBackends(addBackendClause.getHostPortPairs(), + addBackendClause.isFree(), addBackendClause.getDestCluster(), addBackendClause.getTag()); } else if (alterClause instanceof DropBackendClause) { // drop backend DropBackendClause dropBackendClause = (DropBackendClause) alterClause; @@ -188,6 +189,8 @@ public synchronized void process(List alterClauses, String clusterN } else if (alterClause instanceof AlterLoadErrorUrlClause) { AlterLoadErrorUrlClause clause = (AlterLoadErrorUrlClause) alterClause; Catalog.getCurrentCatalog().getLoadInstance().setLoadErrorHubInfo(clause.getProperties()); + } else if (alterClause instanceof ModifyBackendClause) { + Catalog.getCurrentSystemInfo().modifyBackends(((ModifyBackendClause) alterClause)); } else { Preconditions.checkState(false, alterClause.getClass()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddBackendClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddBackendClause.java index 642732fbac2006..302bc998cd76ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AddBackendClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AddBackendClause.java @@ -17,27 +17,38 @@ package org.apache.doris.analysis; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.resource.Tag; + import com.google.common.base.Strings; +import com.google.common.collect.Maps; import java.util.List; +import java.util.Map; public class AddBackendClause extends BackendClause { - // be in free state is not owned by any cluster protected boolean isFree; // cluster that backend will be added to protected String destCluster; + protected Map properties = Maps.newHashMap(); + private Tag tag; public AddBackendClause(List hostPorts) { super(hostPorts); this.isFree = true; this.destCluster = ""; } - - public AddBackendClause(List hostPorts, boolean isFree) { + + public AddBackendClause(List hostPorts, boolean isFree, Map properties) { super(hostPorts); this.isFree = isFree; this.destCluster = ""; + this.properties = properties; + if (this.properties == null) { + this.properties = Maps.newHashMap(); + } } public AddBackendClause(List hostPorts, String destCluster) { @@ -46,6 +57,21 @@ public AddBackendClause(List hostPorts, String destCluster) { this.destCluster = destCluster; } + public Tag getTag() { + return tag; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + super.analyze(analyzer); + tag = PropertyAnalyzer.analyzeBackendTagProperties(properties); + } + + @Override + public Map getProperties() { + return properties; + } + @Override public String toSql() { StringBuilder sb = new StringBuilder(); @@ -77,3 +103,4 @@ public String getDestCluster() { } } + diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterSystemStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterSystemStmt.java index 8e2175b8a66b8b..cf16640270ac6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterSystemStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterSystemStmt.java @@ -54,7 +54,8 @@ public void analyze(Analyzer analyzer) throws UserException { || (alterClause instanceof AddFollowerClause) || (alterClause instanceof DropFollowerClause) || (alterClause instanceof ModifyBrokerClause) - || (alterClause instanceof AlterLoadErrorUrlClause)); + || (alterClause instanceof AlterLoadErrorUrlClause) + || (alterClause instanceof ModifyBackendClause)); alterClause.analyze(analyzer); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BackendClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BackendClause.java index c09ef49c7693b7..342b154aa7e385 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BackendClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BackendClause.java @@ -32,7 +32,6 @@ public class BackendClause extends AlterClause { protected List hostPorts; - protected List> hostPortPairs; protected BackendClause(List hostPorts) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableAsSelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableAsSelectStmt.java index 90e563b7164c11..e58439076e3077 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableAsSelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableAsSelectStmt.java @@ -19,7 +19,6 @@ import org.apache.doris.catalog.Table; import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; import org.apache.doris.common.UserException; @@ -93,7 +92,7 @@ public void createTable(Analyzer analyzer) throws AnalysisException { // Create table try { analyzer.getCatalog().createTable(createTableStmt); - } catch (DdlException e) { + } catch (UserException e) { throw new AnalysisException(e.getMessage()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyBackendClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyBackendClause.java new file mode 100644 index 00000000000000..f6d6262a8c3f53 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyBackendClause.java @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.resource.Tag; + +import com.google.common.collect.Maps; + +import java.util.List; +import java.util.Map; + +public class ModifyBackendClause extends BackendClause { + protected Map properties = Maps.newHashMap(); + private Tag tag; + + public ModifyBackendClause(List hostPorts, Map properties) { + super(hostPorts); + this.properties = properties; + } + + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + super.analyze(analyzer); + tag = PropertyAnalyzer.analyzeBackendTagProperties(properties); + } + + public Tag getTag() { + return tag; + } + + @Override + public String toSql() { + StringBuilder sb = new StringBuilder(); + sb.append("MODIFY BACKEND "); + for (int i = 0; i < hostPorts.size(); i++) { + sb.append("\"").append(hostPorts.get(i)).append("\""); + if (i != hostPorts.size() - 1) { + sb.append(", "); + } + } + return sb.toString(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java index cba71ae391596a..5dd3c31c993232 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyPartitionClause.java @@ -20,9 +20,9 @@ import org.apache.doris.alter.AlterOpType; import org.apache.doris.catalog.DataProperty; import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.PropertyAnalyzer; + import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -93,7 +93,7 @@ public void analyze(Analyzer analyzer) throws AnalysisException { } // Check the following properties' legality before modifying partition. - // 1. replication_num + // 1. replication_num or replica_allocation // 2. storage_medium && storage_cooldown_time // 3. in_memory // 4. tablet type @@ -103,10 +103,8 @@ private void checkProperties(Map properties) throws AnalysisExce newDataProperty = PropertyAnalyzer.analyzeDataProperty(properties, DataProperty.DEFAULT_DATA_PROPERTY); Preconditions.checkNotNull(newDataProperty); - // 2. replication num - short newReplicationNum = (short) -1; - newReplicationNum = PropertyAnalyzer.analyzeReplicationNum(properties, FeConstants.default_replication_num); - Preconditions.checkState(newReplicationNum != (short) -1); + // 2. replica allocation + PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); // 3. in memory PropertyAnalyzer.analyzeBooleanProp(properties, PropertyAnalyzer.PROPERTIES_INMEMORY, false); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java index 4ad057ef5eb6fe..782ad3df15cb3f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java @@ -18,6 +18,7 @@ package org.apache.doris.analysis; import org.apache.doris.alter.AlterOpType; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.TableProperty; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.util.DynamicPartitionUtil; @@ -74,11 +75,14 @@ public void analyze(Analyzer analyzer) throws AnalysisException { } } else if (DynamicPartitionUtil.checkDynamicPartitionPropertiesExist(properties)) { // do nothing, dynamic properties will be analyzed in SchemaChangeHandler.process - } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)) { - PropertyAnalyzer.analyzeReplicationNum(properties, false); - } else if (properties.containsKey("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)) { - short defaultReplicationNum = PropertyAnalyzer.analyzeReplicationNum(properties, true); - properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, Short.toString(defaultReplicationNum)); + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM) + || properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)) { + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, replicaAlloc.toCreateStmt()); + } else if (properties.containsKey("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM) + || properties.containsKey("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)) { + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, "default"); + properties.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, replicaAlloc.toCreateStmt()); } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_INMEMORY)) { this.needTableStable = false; this.opType = AlterOpType.MODIFY_TABLE_PROPERTY_SYNC; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java index a397d44764e57c..6a0bfe6f53f7b2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RestoreStmt.java @@ -17,12 +17,13 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; -import org.apache.doris.common.FeConstants; import org.apache.doris.common.UserException; import org.apache.doris.common.util.PrintableMap; +import org.apache.doris.common.util.PropertyAnalyzer; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -37,7 +38,7 @@ public class RestoreStmt extends AbstractBackupStmt { private final static String PROP_META_VERSION = "meta_version"; private boolean allowLoad = false; - private int replicationNum = FeConstants.default_replication_num; + private ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; private String backupTimestamp = null; private int metaVersion = -1; @@ -50,8 +51,8 @@ public boolean allowLoad() { return allowLoad; } - public int getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public String getBackupTimestamp() { @@ -101,16 +102,10 @@ public void analyzeProperties() throws AnalysisException { } // replication num - if (copiedProperties.containsKey(PROP_REPLICATION_NUM)) { - try { - replicationNum = Integer.valueOf(copiedProperties.get(PROP_REPLICATION_NUM)); - } catch (NumberFormatException e) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_COMMON_ERROR, - "Invalid replication num format: " + copiedProperties.get(PROP_REPLICATION_NUM)); - } - copiedProperties.remove(PROP_REPLICATION_NUM); + this.replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(copiedProperties, ""); + if (this.replicaAlloc.isNotSet()) { + this.replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; } - // backup timestamp if (copiedProperties.containsKey(PROP_BACKUP_TIMESTAMP)) { backupTimestamp = copiedProperties.get(PROP_BACKUP_TIMESTAMP); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SinglePartitionDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SinglePartitionDesc.java index 102d4b875665d0..b33271853560e5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SinglePartitionDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SinglePartitionDesc.java @@ -19,17 +19,17 @@ import org.apache.doris.analysis.PartitionKeyDesc.PartitionKeyValueType; import org.apache.doris.catalog.DataProperty; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.Pair; import org.apache.doris.common.util.PrintableMap; import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.thrift.TTabletType; import com.google.common.base.Joiner; import com.google.common.base.Joiner.MapJoiner; import com.google.common.base.Preconditions; -import org.apache.doris.thrift.TTabletType; import java.util.Map; @@ -43,7 +43,7 @@ public class SinglePartitionDesc { private Map properties; private DataProperty partitionDataProperty; - private Short replicationNum; + private ReplicaAllocation replicaAlloc; private boolean isInMemory = false; private TTabletType tabletType = TTabletType.TABLET_TYPE_DISK; private Pair versionInfo; @@ -59,7 +59,7 @@ public SinglePartitionDesc(boolean ifNotExists, String partName, PartitionKeyDes this.properties = properties; this.partitionDataProperty = DataProperty.DEFAULT_DATA_PROPERTY; - this.replicationNum = FeConstants.default_replication_num; + this.replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; } public boolean isSetIfNotExists() { @@ -78,15 +78,17 @@ public DataProperty getPartitionDataProperty() { return partitionDataProperty; } - public short getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public boolean isInMemory() { return isInMemory; } - public TTabletType getTabletType() { return tabletType; } + public TTabletType getTabletType() { + return tabletType; + } public Pair getVersionInfo() { return versionInfo; @@ -111,13 +113,13 @@ public void analyze(int partColNum, Map otherProperties) throws // analyze data property partitionDataProperty = PropertyAnalyzer.analyzeDataProperty(properties, - DataProperty.DEFAULT_DATA_PROPERTY); + DataProperty.DEFAULT_DATA_PROPERTY); Preconditions.checkNotNull(partitionDataProperty); // analyze replication num - replicationNum = PropertyAnalyzer.analyzeReplicationNum(properties, FeConstants.default_replication_num); - if (replicationNum == null) { - throw new AnalysisException("Invalid replication number: " + replicationNum); + replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + if (replicaAlloc.isNotSet()) { + replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; } // analyze version info diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java index 5b18f2fee31389..49dd9f695afe98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/BackupHandler.java @@ -405,7 +405,7 @@ private void restore(Repository repository, Database db, RestoreStmt stmt) throw // Create a restore job RestoreJob restoreJob = new RestoreJob(stmt.getLabel(), stmt.getBackupTimestamp(), - db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicationNum(), + db.getId(), db.getFullName(), jobInfo, stmt.allowLoad(), stmt.getReplicaAlloc(), stmt.getTimeoutMs(), stmt.getMetaVersion(), catalog, repository.getId()); catalog.getEditLog().logRestoreJob(restoreJob); diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 7f96fe71587490..aaf2eb16e84bff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -41,6 +41,7 @@ import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.ResourceMgr; import org.apache.doris.catalog.Table; @@ -55,6 +56,7 @@ import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.resource.Tag; import org.apache.doris.task.AgentBatchTask; import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskExecutor; @@ -70,9 +72,6 @@ import org.apache.doris.thrift.TStorageType; import org.apache.doris.thrift.TTaskType; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.ArrayListMultimap; @@ -83,6 +82,9 @@ import com.google.common.collect.Multimap; import com.google.common.collect.Table.Cell; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -125,7 +127,7 @@ public enum RestoreJobState { private long snapshotFinishedTime = -1; private long downloadFinishedTime = -1; - private int restoreReplicationNum; + private ReplicaAllocation replicaAlloc; // this 2 members is to save all newly restored objs // tbl name -> part @@ -157,13 +159,13 @@ public RestoreJob() { } public RestoreJob(String label, String backupTs, long dbId, String dbName, BackupJobInfo jobInfo, - boolean allowLoad, int restoreReplicationNum, long timeoutMs, int metaVersion, - Catalog catalog, long repoId) { + boolean allowLoad, ReplicaAllocation replicaAlloc, long timeoutMs, int metaVersion, + Catalog catalog, long repoId) { super(JobType.RESTORE, label, dbId, dbName, timeoutMs, catalog, repoId); this.backupTimestamp = backupTs; this.jobInfo = jobInfo; this.allowLoad = allowLoad; - this.restoreReplicationNum = restoreReplicationNum; + this.replicaAlloc = replicaAlloc; this.state = RestoreJobState.PENDING; this.metaVersion = metaVersion; } @@ -507,7 +509,6 @@ private void checkAndPrepareMeta() { } } - // Check and prepare meta objects. AgentBatchTask batchTask = new AgentBatchTask(); db.readLock(); @@ -590,7 +591,7 @@ private void checkAndPrepareMeta() { Partition restorePart = resetPartitionForRestore(localOlapTbl, remoteOlapTbl, partitionName, db.getClusterName(), - restoreReplicationNum); + replicaAlloc); if (restorePart == null) { return; } @@ -619,7 +620,7 @@ private void checkAndPrepareMeta() { } // reset all ids in this table - Status st = remoteOlapTbl.resetIdsForRestore(catalog, db, restoreReplicationNum); + Status st = remoteOlapTbl.resetIdsForRestore(catalog, db, replicaAlloc); if (!st.ok()) { status = st; return; @@ -769,7 +770,7 @@ private void checkAndPrepareMeta() { PartitionItem remoteItem = remoteTbl.getPartitionInfo().getItem(remotePartId); DataProperty remoteDataProperty = remotePartitionInfo.getDataProperty(remotePartId); localPartitionInfo.addPartition(restoredPart.getId(), false, remoteItem, - remoteDataProperty, (short) restoreReplicationNum, + remoteDataProperty, replicaAlloc, remotePartitionInfo.getIsInMemory(remotePartId)); } localTbl.addPartition(restoredPart); @@ -899,12 +900,14 @@ private void checkAndRestoreResources() { private boolean genFileMappingWhenBackupReplicasEqual(PartitionInfo localPartInfo, Partition localPartition, Table localTbl, BackupPartitionInfo backupPartInfo, String partitionName, BackupOlapTableInfo tblInfo) { - if (localPartInfo.getReplicationNum(localPartition.getId()) != restoreReplicationNum) { + short restoreReplicaNum = replicaAlloc.getTotalReplicaNum(); + short localReplicaNum = localPartInfo.getReplicaAllocation(localPartition.getId()).getTotalReplicaNum(); + if (localReplicaNum != restoreReplicaNum) { status = new Status(ErrCode.COMMON_ERROR, "Partition " + partitionName + " in table " + localTbl.getName() + " has different replication num '" - + localPartInfo.getReplicationNum(localPartition.getId()) - + "' with partition in repository, which is " + restoreReplicationNum); + + localReplicaNum + + "' with partition in repository, which is " + restoreReplicaNum); return true; } @@ -949,13 +952,13 @@ private void createReplicas(Database db, AgentBatchTask batchTask, OlapTable loc // reset remote partition. // reset all id in remote partition, but DO NOT modify any exist catalog objects. private Partition resetPartitionForRestore(OlapTable localTbl, OlapTable remoteTbl, String partName, - String clusterName, int restoreReplicationNum) { + String clusterName, ReplicaAllocation replicaAlloc) { Preconditions.checkState(localTbl.getPartition(partName) == null); Partition remotePart = remoteTbl.getPartition(partName); Preconditions.checkNotNull(remotePart); PartitionInfo localPartitionInfo = localTbl.getPartitionInfo(); Preconditions.checkState(localPartitionInfo.getType() == PartitionType.RANGE - || localPartitionInfo.getType() == PartitionType.LIST); + || localPartitionInfo.getType() == PartitionType.LIST); // generate new partition id long newPartId = catalog.getNextId(); @@ -993,20 +996,20 @@ private Partition resetPartitionForRestore(OlapTable localTbl, OlapTable remoteT remoteIdx.addTablet(newTablet, null /* tablet meta */, true /* is restore */); // replicas - List beIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(restoreReplicationNum, true, - true, clusterName); - if (beIds == null) { - status = new Status(ErrCode.COMMON_ERROR, - "failed to get enough backends for creating replica of tablet " - + newTabletId + ". need: " + restoreReplicationNum); + try { + Map> beIds = Catalog.getCurrentSystemInfo().chooseBackendIdByFilters(replicaAlloc, clusterName, null); + for (Map.Entry> entry : beIds.entrySet()) { + for (Long beId : entry.getValue()) { + long newReplicaId = catalog.getNextId(); + Replica newReplica = new Replica(newReplicaId, beId, ReplicaState.NORMAL, + visibleVersion, visibleVersionHash, schemaHash); + newTablet.addReplica(newReplica, true /* is restore */); + } + } + } catch (DdlException e) { + status = new Status(ErrCode.COMMON_ERROR, e.getMessage()); return null; } - for (Long beId : beIds) { - long newReplicaId = catalog.getNextId(); - Replica newReplica = new Replica(newReplicaId, beId, ReplicaState.NORMAL, - visibleVersion, visibleVersionHash, schemaHash); - newTablet.addReplica(newReplica, true /* is restore */); - } } } return remotePart; @@ -1077,7 +1080,7 @@ private void replayCheckAndPrepareMeta() { long remotePartId = backupPartitionInfo.id; DataProperty remoteDataProperty = remotePartitionInfo.getDataProperty(remotePartId); localPartitionInfo.addPartition(restorePart.getId(), false, remotePartitionInfo.getItem(remotePartId), - remoteDataProperty, (short) restoreReplicationNum, + remoteDataProperty, replicaAlloc, remotePartitionInfo.getIsInMemory(remotePartId)); localTbl.addPartition(restorePart); @@ -1455,7 +1458,7 @@ public List getInfo() { info.add(dbName); info.add(state.name()); info.add(String.valueOf(allowLoad)); - info.add(String.valueOf(restoreReplicationNum)); + info.add(replicaAlloc.toCreateStmt()); info.add(getRestoreObjs()); info.add(TimeUtils.longToTimeString(createTime)); info.add(TimeUtils.longToTimeString(metaPreparedTime)); @@ -1652,7 +1655,7 @@ public void write(DataOutput out) throws IOException { out.writeLong(snapshotFinishedTime); out.writeLong(downloadFinishedTime); - out.writeInt(restoreReplicationNum); + replicaAlloc.write(out); out.writeInt(restoredPartitions.size()); for (Pair entry : restoredPartitions) { @@ -1720,7 +1723,12 @@ public void readFields(DataInput in) throws IOException { snapshotFinishedTime = in.readLong(); downloadFinishedTime = in.readLong(); - restoreReplicationNum = in.readInt(); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + int restoreReplicationNum = in.readInt(); + replicaAlloc = new ReplicaAllocation((short) restoreReplicationNum); + } else { + replicaAlloc = ReplicaAllocation.read(in); + } int size = in.readInt(); for (int i = 0; i < size; i++) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java index c730f8e3658fc9..6d5477130f0e20 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java @@ -195,6 +195,7 @@ import org.apache.doris.qe.JournalObservable; import org.apache.doris.qe.SessionVariable; import org.apache.doris.qe.VariableMgr; +import org.apache.doris.resource.Tag; import org.apache.doris.service.FrontendOptions; import org.apache.doris.system.Backend; import org.apache.doris.system.Backend.BackendState; @@ -2976,7 +2977,7 @@ public void replayRenameDatabase(String dbName, String newDbName) { * 10. add this table to FE's meta * 11. add this table to ColocateGroup if necessary */ - public void createTable(CreateTableStmt stmt) throws DdlException { + public void createTable(CreateTableStmt stmt) throws UserException { String engineName = stmt.getEngineName(); String dbName = stmt.getDbName(); String tableName = stmt.getTableName(); @@ -3110,9 +3111,10 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa Map properties = singlePartitionDesc.getProperties(); // partition properties should inherit table properties - Short replicationNum = olapTable.getDefaultReplicationNum(); - if (!properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM)) { - properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, replicationNum.toString()); + ReplicaAllocation replicaAlloc = olapTable.getDefaultReplicaAllocation(); + if (!properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM) + && !properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION)) { + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, replicaAlloc.toCreateStmt()); } if (!properties.containsKey(PropertyAnalyzer.PROPERTIES_INMEMORY)) { properties.put(PropertyAnalyzer.PROPERTIES_INMEMORY, olapTable.isInMemory().toString()); @@ -3155,7 +3157,7 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa ColocateGroupSchema groupSchema = colocateTableIndex.getGroupSchema(fullGroupName); Preconditions.checkNotNull(groupSchema); groupSchema.checkDistribution(distributionInfo); - groupSchema.checkReplicationNum(singlePartitionDesc.getReplicationNum()); + groupSchema.checkReplicaAllocation(singlePartitionDesc.getReplicaAlloc()); } indexIdToMeta = olapTable.getCopiedIndexIdToMeta(); @@ -3184,14 +3186,14 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa indexIdToMeta, distributionInfo, dataProperty.getStorageMedium(), - singlePartitionDesc.getReplicationNum(), + singlePartitionDesc.getReplicaAlloc(), singlePartitionDesc.getVersionInfo(), bfColumns, olapTable.getBfFpp(), tabletIdSet, olapTable.getCopiedIndexes(), singlePartitionDesc.isInMemory(), olapTable.getStorageFormat(), singlePartitionDesc.getTabletType() - ); + ); // check again table = db.getTable(tableName); @@ -3267,14 +3269,14 @@ public void addPartition(Database db, String tableName, AddPartitionClause addPa partitionInfo.getItem(partitionId).getItems(), ListPartitionItem.DUMMY_ITEM, dataProperty, - partitionInfo.getReplicationNum(partitionId), + partitionInfo.getReplicaAllocation(partitionId), partitionInfo.getIsInMemory(partitionId), isTempPartition); } else if (partitionInfo.getType() == PartitionType.LIST) { info = new PartitionPersistInfo(db.getId(), olapTable.getId(), partition, RangePartitionItem.DUMMY_ITEM, partitionInfo.getItem(partitionId), dataProperty, - partitionInfo.getReplicationNum(partitionId), + partitionInfo.getReplicaAllocation(partitionId), partitionInfo.getIsInMemory(partitionId), isTempPartition); } @@ -3313,7 +3315,7 @@ public void replayAddPartition(PartitionPersistInfo info) throws UserException { } partitionInfo.unprotectHandleNewSinglePartitionDesc(partition.getId(), info.isTempPartition(), - partitionItem, info.getDataProperty(), info.getReplicationNum(), info.isInMemory()); + partitionItem, info.getDataProperty(), info.getReplicaAlloc(), info.isInMemory()); if (!isCheckpointThread()) { // add to inverted index @@ -3429,7 +3431,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long Map indexIdToMeta, DistributionInfo distributionInfo, TStorageMedium storageMedium, - short replicationNum, + ReplicaAllocation replicaAlloc, Pair versionInfo, Set bfColumns, double bfFpp, @@ -3466,6 +3468,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long long version = partition.getVisibleVersion(); long versionHash = partition.getVisibleVersionHash(); + short totalReplicaNum = replicaAlloc.getTotalReplicaNum(); for (Map.Entry entry : indexMap.entrySet()) { long indexId = entry.getKey(); MaterializedIndex index = entry.getValue(); @@ -3475,7 +3478,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long int schemaHash = indexMeta.getSchemaHash(); TabletMeta tabletMeta = new TabletMeta(dbId, tableId, partitionId, indexId, schemaHash, storageMedium); createTablets(clusterName, index, ReplicaState.NORMAL, distributionInfo, version, versionHash, - replicationNum, tabletMeta, tabletIdSet); + replicaAlloc, tabletMeta, tabletIdSet); boolean ok = false; String errMsg = null; @@ -3485,7 +3488,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long TStorageType storageType = indexMeta.getStorageType(); List schema = indexMeta.getSchema(); KeysType keysType = indexMeta.getKeysType(); - int totalTaskNum = index.getTablets().size() * replicationNum; + int totalTaskNum = index.getTablets().size() * totalReplicaNum; MarkedCountDownLatch countDownLatch = new MarkedCountDownLatch(totalTaskNum); AgentBatchTask batchTask = new AgentBatchTask(); for (Tablet tablet : index.getTablets()) { @@ -3551,7 +3554,7 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long } // Create olap table and related base index synchronously. - private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlException { + private void createOlapTable(Database db, CreateTableStmt stmt) throws UserException { String tableName = stmt.getTableName(); LOG.debug("begin create olap table: {}", tableName); @@ -3633,23 +3636,18 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlExcept throw new DdlException(e.getMessage()); } - // analyze replication_num - short replicationNum = FeConstants.default_replication_num; - try { - boolean isReplicationNumSet = properties != null && properties.containsKey(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM); - replicationNum = PropertyAnalyzer.analyzeReplicationNum(properties, replicationNum); - if (isReplicationNumSet) { - olapTable.setReplicationNum(replicationNum); - } - } catch (AnalysisException e) { - throw new DdlException(e.getMessage()); + // analyze replica allocation + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + if (replicaAlloc.isNotSet()) { + replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; } + olapTable.setReplicationAllocation(replicaAlloc); // set in memory boolean isInMemory = PropertyAnalyzer.analyzeBooleanProp(properties, PropertyAnalyzer.PROPERTIES_INMEMORY, false); olapTable.setIsInMemory(isInMemory); - TTabletType tabletType = TTabletType.TABLET_TYPE_DISK; + TTabletType tabletType; try { tabletType = PropertyAnalyzer.analyzeTabletType(properties); } catch (AnalysisException e) { @@ -3671,7 +3669,7 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlExcept } Preconditions.checkNotNull(dataProperty); partitionInfo.setDataProperty(partitionId, dataProperty); - partitionInfo.setReplicationNum(partitionId, replicationNum); + partitionInfo.setReplicaAllocation(partitionId, replicaAlloc); partitionInfo.setIsInMemory(partitionId, isInMemory); partitionInfo.setTabletType(partitionId, tabletType); } @@ -3784,7 +3782,7 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlExcept olapTable.getIndexIdToMeta(), distributionInfo, partitionInfo.getDataProperty(partitionId).getStorageMedium(), - partitionInfo.getReplicationNum(partitionId), + partitionInfo.getReplicaAllocation(partitionId), versionInfo, bfColumns, bfFpp, tabletIdSet, olapTable.getCopiedIndexes(), isInMemory, storageFormat, tabletType); @@ -3819,7 +3817,7 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlExcept olapTable.getBaseIndexId(), entry.getValue(), entry.getKey(), olapTable.getIndexIdToMeta(), distributionInfo, dataProperty.getStorageMedium(), - partitionInfo.getReplicationNum(entry.getValue()), + partitionInfo.getReplicaAllocation(entry.getValue()), versionInfo, bfColumns, bfFpp, tabletIdSet, olapTable.getCopiedIndexes(), isInMemory, storageFormat, @@ -3849,7 +3847,7 @@ private void createOlapTable(Database db, CreateTableStmt stmt) throws DdlExcept // we have added these index to memory, only need to persist here if (getColocateTableIndex().isColocateTable(tableId)) { GroupId groupId = getColocateTableIndex().getGroup(tableId); - List> backendsPerBucketSeq = getColocateTableIndex().getBackendsPerBucketSeq(groupId); + Map>> backendsPerBucketSeq = getColocateTableIndex().getBackendsPerBucketSeq(groupId); ColocatePersistInfo info = ColocatePersistInfo.createForAddTable(groupId, tableId, backendsPerBucketSeq); editLog.logColocateAddTable(info); } @@ -3890,7 +3888,6 @@ private void createMysqlTable(Database db, CreateTableStmt stmt) throws DdlExcep private void createOdbcTable(Database db, CreateTableStmt stmt) throws DdlException { String tableName = stmt.getTableName(); - List columns = stmt.getColumns(); long tableId = Catalog.getCurrentCatalog().getNextId(); @@ -4054,9 +4051,9 @@ public static void getDdlStmt(String dbName, Table table, List createTab sb.append("\nPROPERTIES (\n"); // replicationNum - Short replicationNum = olapTable.getDefaultReplicationNum(); - sb.append("\"").append(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM).append("\" = \""); - sb.append(replicationNum).append("\""); + ReplicaAllocation replicaAlloc = olapTable.getDefaultReplicaAllocation(); + sb.append("\"").append(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION).append("\" = \""); + sb.append(replicaAlloc.toCreateStmt()).append("\""); // bloom filter Set bfColumnNames = olapTable.getCopiedBfColumns(); @@ -4088,7 +4085,7 @@ public static void getDdlStmt(String dbName, Table table, List createTab // dynamic partition if (olapTable.dynamicPartitionExists()) { - sb.append(olapTable.getTableProperty().getDynamicPartitionProperty().getProperties(replicationNum)); + sb.append(olapTable.getTableProperty().getDynamicPartitionProperty().getProperties(replicaAlloc)); } // in memory @@ -4316,18 +4313,15 @@ public void replayAlterExternalTableSchema(String dbName, String tableName, List } private void createTablets(String clusterName, MaterializedIndex index, ReplicaState replicaState, - DistributionInfo distributionInfo, long version, long versionHash, short replicationNum, + DistributionInfo distributionInfo, long version, long versionHash, ReplicaAllocation replicaAlloc, TabletMeta tabletMeta, Set tabletIdSet) throws DdlException { - Preconditions.checkArgument(replicationNum > 0); - DistributionInfoType distributionInfoType = distributionInfo.getType(); if (distributionInfoType == DistributionInfoType.HASH) { ColocateTableIndex colocateIndex = Catalog.getCurrentColocateIndex(); - List> backendsPerBucketSeq = null; + Map>> backendsPerBucketSeq = null; GroupId groupId = null; if (colocateIndex.isColocateTable(tabletMeta.getTableId())) { // if this is a colocate table, try to get backend seqs from colocation index. - Database db = Catalog.getCurrentCatalog().getDb(tabletMeta.getDbId()); groupId = colocateIndex.getGroup(tabletMeta.getTableId()); backendsPerBucketSeq = colocateIndex.getBackendsPerBucketSeq(groupId); } @@ -4337,7 +4331,7 @@ private void createTablets(String clusterName, MaterializedIndex index, ReplicaS // otherwise, backends should be chosen from backendsPerBucketSeq; boolean chooseBackendsArbitrary = backendsPerBucketSeq == null || backendsPerBucketSeq.isEmpty(); if (chooseBackendsArbitrary) { - backendsPerBucketSeq = Lists.newArrayList(); + backendsPerBucketSeq = Maps.newHashMap(); } for (int i = 0; i < distributionInfo.getBucketNum(); ++i) { // create a new tablet with random chosen backends @@ -4348,29 +4342,42 @@ private void createTablets(String clusterName, MaterializedIndex index, ReplicaS tabletIdSet.add(tablet.getId()); // get BackendIds - List chosenBackendIds; + Map> chosenBackendIds; if (chooseBackendsArbitrary) { // This is the first colocate table in the group, or just a normal table, // randomly choose backends if (Config.enable_strict_storage_medium_check) { - chosenBackendIds = chosenBackendIdBySeq(replicationNum, clusterName, tabletMeta.getStorageMedium()); + chosenBackendIds = getCurrentSystemInfo().chooseBackendIdByFilters(replicaAlloc, clusterName, + tabletMeta.getStorageMedium()); } else { - chosenBackendIds = chosenBackendIdBySeq(replicationNum, clusterName); + chosenBackendIds = getCurrentSystemInfo().chooseBackendIdByFilters(replicaAlloc, clusterName, null); + } + + for (Map.Entry> entry : chosenBackendIds.entrySet()) { + backendsPerBucketSeq.putIfAbsent(entry.getKey(), Lists.newArrayList()); + backendsPerBucketSeq.get(entry.getKey()).add(entry.getValue()); } - backendsPerBucketSeq.add(chosenBackendIds); } else { // get backends from existing backend sequence - chosenBackendIds = backendsPerBucketSeq.get(i); + chosenBackendIds = Maps.newHashMap(); + for (Map.Entry>> entry : backendsPerBucketSeq.entrySet()) { + chosenBackendIds.put(entry.getKey(), entry.getValue().get(i)); + } } - + // create replicas - for (long backendId : chosenBackendIds) { - long replicaId = getNextId(); - Replica replica = new Replica(replicaId, backendId, replicaState, version, versionHash, - tabletMeta.getOldSchemaHash()); - tablet.addReplica(replica); + short totalReplicaNum = (short) 0; + for (List backendIds : chosenBackendIds.values()) { + for (long backendId : backendIds) { + long replicaId = getNextId(); + Replica replica = new Replica(replicaId, backendId, replicaState, version, versionHash, + tabletMeta.getOldSchemaHash()); + tablet.addReplica(replica); + totalReplicaNum++; + } } - Preconditions.checkState(chosenBackendIds.size() == replicationNum, chosenBackendIds.size() + " vs. "+ replicationNum); + Preconditions.checkState(totalReplicaNum == totalReplicaNum, + totalReplicaNum + " vs. " + totalReplicaNum); } if (groupId != null && chooseBackendsArbitrary) { @@ -4384,24 +4391,6 @@ private void createTablets(String clusterName, MaterializedIndex index, ReplicaS } } - // create replicas for tablet with random chosen backends - private List chosenBackendIdBySeq(int replicationNum, String clusterName, TStorageMedium storageMedium) throws DdlException { - List chosenBackendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMedium(replicationNum, - true, true, clusterName, storageMedium); - if (chosenBackendIds == null) { - throw new DdlException("Failed to find enough host with storage medium is " + storageMedium + " in all backends. need: " + replicationNum); - } - return chosenBackendIds; - } - - private List chosenBackendIdBySeq(int replicationNum, String clusterName) throws DdlException { - List chosenBackendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(replicationNum, true, true, clusterName); - if (chosenBackendIds == null) { - throw new DdlException("Failed to find enough host in all backends. need: " + replicationNum); - } - return chosenBackendIds; - } - // Drop table public void dropTable(DropTableStmt stmt) throws DdlException { String dbName = stmt.getDbName(); @@ -4829,7 +4818,7 @@ public HashMap getPartitionIdToStorageMediumMap() { new ModifyPartitionInfo(db.getId(), olapTable.getId(), partition.getId(), DataProperty.DEFAULT_DATA_PROPERTY, - (short) -1, + ReplicaAllocation.NOT_SET, partitionInfo.getIsInMemory(partition.getId())); editLog.logModifyPartition(info); } @@ -5224,18 +5213,18 @@ public void modifyTableColocate(Database db, OlapTable table, String colocateGro PartitionInfo partitionInfo = table.getPartitionInfo(); if (partitionInfo.getType() == PartitionType.RANGE || partitionInfo.getType() == PartitionType.LIST) { int bucketsNum = -1; - short replicationNum = -1; + ReplicaAllocation replicaAlloc = null; for (Partition partition : table.getPartitions()) { if (bucketsNum == -1) { bucketsNum = partition.getDistributionInfo().getBucketNum(); } else if (bucketsNum != partition.getDistributionInfo().getBucketNum()) { throw new DdlException("Partitions in table " + table.getName() + " have different buckets number"); } - - if (replicationNum == -1) { - replicationNum = partitionInfo.getReplicationNum(partition.getId()); - } else if (replicationNum != partitionInfo.getReplicationNum(partition.getId())) { - throw new DdlException("Partitions in table " + table.getName() + " have different replication number"); + + if (replicaAlloc == null) { + replicaAlloc = partitionInfo.getReplicaAllocation(partition.getId()); + } else if (!replicaAlloc.equals(partitionInfo.getReplicaAllocation(partition.getId()))) { + throw new DdlException("Partitions in table " + table.getName() + " have different replica allocation."); } } } @@ -5243,8 +5232,8 @@ public void modifyTableColocate(Database db, OlapTable table, String colocateGro // set to an already exist colocate group, check if this table can be added to this group. groupSchema.checkColocateSchema(table); } - - List> backendsPerBucketSeq = null; + + Map>> backendsPerBucketSeq = null; if (groupSchema == null) { // assign to a newly created group, set backends sequence. // we arbitrarily choose a tablet backends sequence from this table, @@ -5446,7 +5435,7 @@ public void replayRenameColumn(TableInfo tableInfo) throws DdlException { } public void modifyTableDynamicPartition(Database db, OlapTable table, Map properties) - throws DdlException { + throws UserException { Map logProperties = new HashMap<>(properties); TableProperty tableProperty = table.getTableProperty(); if (tableProperty == null) { @@ -5472,10 +5461,10 @@ public void modifyTableDynamicPartition(Database db, OlapTable table, Map properties) throws DdlException { + // The caller need to hold the table write lock + public void modifyTableReplicaAllocation(Database db, OlapTable table, Map properties) throws UserException { Preconditions.checkArgument(table.isWriteLockHeldByCurrentThread()); - String defaultReplicationNumName = "default."+ PropertyAnalyzer.PROPERTIES_REPLICATION_NUM; + String defaultReplicationNumName = "default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM; PartitionInfo partitionInfo = table.getPartitionInfo(); if (partitionInfo.getType() == PartitionType.RANGE || partitionInfo.getType() == PartitionType.LIST) { throw new DdlException("This is a partitioned table, you should specify partitions with MODIFY PARTITION clause." + @@ -5488,35 +5477,40 @@ public void modifyTableReplicationNum(Database db, OlapTable table, Map properties) { + // The caller need to hold the table write lock + public void modifyTableDefaultReplicaAllocation(Database db, OlapTable table, Map properties) { Preconditions.checkArgument(table.isWriteLockHeldByCurrentThread()); + TableProperty tableProperty = table.getTableProperty(); if (tableProperty == null) { tableProperty = new TableProperty(properties); + table.setTableProperty(tableProperty); } else { tableProperty.modifyTableProperties(properties); } - tableProperty.buildReplicationNum(); + tableProperty.buildReplicaAllocation(); + // log ModifyTablePropertyOperationLog info = new ModifyTablePropertyOperationLog(db.getId(), table.getId(), properties); editLog.logModifyReplicationNum(info); @@ -6195,15 +6189,16 @@ public Set getMigrations() { olapTable.readLock(); try { for (Partition partition : olapTable.getPartitions()) { - final short replicationNum = olapTable.getPartitionInfo() - .getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = olapTable.getPartitionInfo() + .getReplicaAllocation(partition.getId()); + short totalReplicaNum = replicaAlloc.getTotalReplicaNum(); for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { if (materializedIndex.getState() != IndexState.NORMAL) { continue; } for (Tablet tablet : materializedIndex.getTablets()) { int replicaNum = 0; - int quorum = replicationNum / 2 + 1; + int quorum = totalReplicaNum / 2 + 1; for (Replica replica : tablet.getReplicas()) { if (replica.getState() != ReplicaState.CLONE && beIds.contains(replica.getBackendId())) { @@ -6525,7 +6520,7 @@ public void truncateTable(TruncateTableStmt truncateTableStmt) throws DdlExcepti copiedTbl.getIndexIdToMeta(), copiedTbl.getDefaultDistributionInfo(), copiedTbl.getPartitionInfo().getDataProperty(oldPartitionId).getStorageMedium(), - copiedTbl.getPartitionInfo().getReplicationNum(oldPartitionId), + copiedTbl.getPartitionInfo().getReplicaAllocation(oldPartitionId), null /* version info */, copiedTbl.getCopiedBfColumns(), copiedTbl.getBfFpp(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java index bdee43118b9d21..9c35c1f6938d38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java @@ -108,7 +108,7 @@ public synchronized boolean recycleTable(long dbId, Table table) { public synchronized boolean recyclePartition(long dbId, long tableId, Partition partition, Range range, PartitionItem listPartitionItem, DataProperty dataProperty, - short replicationNum, + ReplicaAllocation replicaAlloc, boolean isInMemory) { if (idToPartition.containsKey(partition.getId())) { LOG.error("partition[{}] already in recycle bin.", partition.getId()); @@ -120,8 +120,7 @@ public synchronized boolean recyclePartition(long dbId, long tableId, Partition // recycle partition RecyclePartitionInfo partitionInfo = new RecyclePartitionInfo(dbId, tableId, partition, - range, listPartitionItem, dataProperty, replicationNum, - isInMemory); + range, listPartitionItem, dataProperty, replicaAlloc, isInMemory); idToRecycleTime.put(partition.getId(), System.currentTimeMillis()); idToPartition.put(partition.getId(), partitionInfo); LOG.info("recycle partition[{}-{}]", partition.getId(), partition.getName()); @@ -450,7 +449,7 @@ public synchronized void recoverPartition(long dbId, OlapTable table, String par long partitionId = recoverPartition.getId(); partitionInfo.setItem(partitionId, false, recoverItem); partitionInfo.setDataProperty(partitionId, recoverPartitionInfo.getDataProperty()); - partitionInfo.setReplicationNum(partitionId, recoverPartitionInfo.getReplicationNum()); + partitionInfo.setReplicaAllocation(partitionId, recoverPartitionInfo.getReplicaAlloc()); partitionInfo.setIsInMemory(partitionId, recoverPartitionInfo.isInMemory()); // remove from recycle bin @@ -485,7 +484,7 @@ public synchronized void replayRecoverPartition(OlapTable table, long partitionI } partitionInfo.setItem(partitionId, false, recoverItem); partitionInfo.setDataProperty(partitionId, recyclePartitionInfo.getDataProperty()); - partitionInfo.setReplicationNum(partitionId, recyclePartitionInfo.getReplicationNum()); + partitionInfo.setReplicaAllocation(partitionId, recyclePartitionInfo.getReplicaAlloc()); partitionInfo.setIsInMemory(partitionId, recyclePartitionInfo.isInMemory()); iterator.remove(); @@ -740,7 +739,7 @@ public class RecyclePartitionInfo implements Writable { private Range range; private PartitionItem listPartitionItem; private DataProperty dataProperty; - private short replicationNum; + private ReplicaAllocation replicaAlloc; private boolean isInMemory; public RecyclePartitionInfo() { @@ -749,7 +748,7 @@ public RecyclePartitionInfo() { public RecyclePartitionInfo(long dbId, long tableId, Partition partition, Range range, PartitionItem listPartitionItem, - DataProperty dataProperty, short replicationNum, + DataProperty dataProperty, ReplicaAllocation replicaAlloc, boolean isInMemory) { this.dbId = dbId; this.tableId = tableId; @@ -757,7 +756,7 @@ public RecyclePartitionInfo(long dbId, long tableId, Partition partition, this.range = range; this.listPartitionItem = listPartitionItem; this.dataProperty = dataProperty; - this.replicationNum = replicationNum; + this.replicaAlloc = replicaAlloc; this.isInMemory = isInMemory; } @@ -785,8 +784,8 @@ public DataProperty getDataProperty() { return dataProperty; } - public short getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public boolean isInMemory() { @@ -801,7 +800,7 @@ public void write(DataOutput out) throws IOException { RangeUtils.writeRange(out, range); listPartitionItem.write(out); dataProperty.write(out); - out.writeShort(replicationNum); + replicaAlloc.write(out); out.writeBoolean(isInMemory); } @@ -817,7 +816,12 @@ public void readFields(DataInput in) throws IOException { } dataProperty = DataProperty.read(in); - replicationNum = in.readShort(); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + short replicationNum = in.readShort(); + replicaAlloc = new ReplicaAllocation(replicationNum); + } else { + replicaAlloc = ReplicaAllocation.read(in); + } if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_72) { isInMemory = in.readBoolean(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateGroupSchema.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateGroupSchema.java index 0ea7b15eab43af..d6455e450c6ca6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateGroupSchema.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateGroupSchema.java @@ -21,6 +21,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; +import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Writable; import com.google.common.collect.Lists; @@ -38,17 +39,17 @@ public class ColocateGroupSchema implements Writable { private GroupId groupId; private List distributionColTypes = Lists.newArrayList(); private int bucketsNum; - private short replicationNum; + private ReplicaAllocation replicaAlloc; private ColocateGroupSchema() { } - public ColocateGroupSchema(GroupId groupId, List distributionCols, int bucketsNum, short replicationNum) { + public ColocateGroupSchema(GroupId groupId, List distributionCols, int bucketsNum, ReplicaAllocation replicaAlloc) { this.groupId = groupId; this.distributionColTypes = distributionCols.stream().map(c -> c.getType()).collect(Collectors.toList()); this.bucketsNum = bucketsNum; - this.replicationNum = replicationNum; + this.replicaAlloc = replicaAlloc; } public GroupId getGroupId() { @@ -59,8 +60,8 @@ public int getBucketsNum() { return bucketsNum; } - public short getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public List getDistributionColTypes() { @@ -69,7 +70,7 @@ public List getDistributionColTypes() { public void checkColocateSchema(OlapTable tbl) throws DdlException { checkDistribution(tbl.getDefaultDistributionInfo()); - checkReplicationNum(tbl.getPartitionInfo()); + checkReplicaAllocation(tbl.getPartitionInfo()); } public void checkDistribution(DistributionInfo distributionInfo) throws DdlException { @@ -95,17 +96,19 @@ public void checkDistribution(DistributionInfo distributionInfo) throws DdlExcep } } - public void checkReplicationNum(PartitionInfo partitionInfo) throws DdlException { - for (Short repNum : partitionInfo.idToReplicationNum.values()) { - if (repNum != replicationNum) { - ErrorReport.reportDdlException(ErrorCode.ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_NUM, replicationNum); + public void checkReplicaAllocation(PartitionInfo partitionInfo) throws DdlException { + for (ReplicaAllocation replicaAlloc : partitionInfo.idToReplicaAllocation.values()) { + if (!replicaAlloc.equals(this.replicaAlloc)) { + ErrorReport.reportDdlException(ErrorCode.ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_ALLOCATION, + this.replicaAlloc); } } } - public void checkReplicationNum(short repNum) throws DdlException { - if (repNum != replicationNum) { - ErrorReport.reportDdlException(ErrorCode.ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_NUM, replicationNum); + public void checkReplicaAllocation(ReplicaAllocation replicaAlloc) throws DdlException { + if (!replicaAlloc.equals(this.replicaAlloc)) { + ErrorReport.reportDdlException(ErrorCode.ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_ALLOCATION, + this.replicaAlloc); } } @@ -123,7 +126,7 @@ public void write(DataOutput out) throws IOException { ColumnType.write(out, type); } out.writeInt(bucketsNum); - out.writeShort(replicationNum); + this.replicaAlloc.write(out); } public void readFields(DataInput in) throws IOException { @@ -133,6 +136,11 @@ public void readFields(DataInput in) throws IOException { distributionColTypes.add(ColumnType.read(in)); } bucketsNum = in.readInt(); - replicationNum = in.readShort(); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + short replicationNum = in.readShort(); + this.replicaAlloc = new ReplicaAllocation(replicationNum); + } else { + this.replicaAlloc = ReplicaAllocation.read(in); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java index 756d268cf368bd..c2c95489f67b24 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java @@ -21,15 +21,20 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.ColocatePersistInfo; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.resource.Tag; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.HashBasedTable; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; +import com.google.common.collect.Table; +import com.google.gson.annotations.SerializedName; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -37,7 +42,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -54,7 +58,9 @@ public class ColocateTableIndex implements Writable { private static final Logger LOG = LogManager.getLogger(ColocateTableIndex.class); public static class GroupId implements Writable { + @SerializedName(value = "dbId") public Long dbId; + @SerializedName(value = "grpId") public Long grpId; private GroupId() { @@ -66,18 +72,23 @@ public GroupId(long dbId, long grpId) { } public static GroupId read(DataInput in) throws IOException { - GroupId groupId = new GroupId(); - groupId.readFields(in); - return groupId; + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + GroupId groupId = new GroupId(); + groupId.readFields(in); + return groupId; + } else { + String json = Text.readString(in); + return GsonUtils.GSON.fromJson(json, GroupId.class); + } } @Override public void write(DataOutput out) throws IOException { - out.writeLong(dbId); - out.writeLong(grpId); + Text.writeString(out, GsonUtils.GSON.toJson(this)); } - public void readFields(DataInput in) throws IOException { + @Deprecated + private void readFields(DataInput in) throws IOException { dbId = in.readLong(); grpId = in.readLong(); } @@ -114,7 +125,7 @@ public String toString() { // group id -> group schema private Map group2Schema = Maps.newHashMap(); // group_id -> bucketSeq -> backend ids - private Map>> group2BackendsPerBucketSeq = Maps.newHashMap(); + private Table>> group2BackendsPerBucketSeq = HashBasedTable.create(); // the colocate group is unstable private Set unstableGroups = Sets.newHashSet(); @@ -160,7 +171,7 @@ public GroupId addTableToGroup(long dbId, OlapTable tbl, String groupName, Group HashDistributionInfo distributionInfo = (HashDistributionInfo) tbl.getDefaultDistributionInfo(); ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionInfo.getDistributionColumns(), distributionInfo.getBucketNum(), - tbl.getDefaultReplicationNum()); + tbl.getDefaultReplicaAllocation()); groupName2Id.put(fullGroupName, groupId); group2Schema.put(groupId, groupSchema); } @@ -172,10 +183,12 @@ public GroupId addTableToGroup(long dbId, OlapTable tbl, String groupName, Group } } - public void addBackendsPerBucketSeq(GroupId groupId, List> backendsPerBucketSeq) { + public void addBackendsPerBucketSeq(GroupId groupId, Map>> backendsPerBucketSeq) { writeLock(); try { - group2BackendsPerBucketSeq.put(groupId, backendsPerBucketSeq); + for (Map.Entry>> entry : backendsPerBucketSeq.entrySet()) { + group2BackendsPerBucketSeq.put(groupId, entry.getKey(), entry.getValue()); + } } finally { writeUnlock(); } @@ -228,7 +241,7 @@ public boolean removeTable(long tableId) { group2Tables.remove(groupId, tableId); if (!group2Tables.containsKey(groupId)) { // all tables of this group are removed, remove the group - group2BackendsPerBucketSeq.remove(groupId); + group2BackendsPerBucketSeq.rowMap().remove(groupId); group2Schema.remove(groupId); unstableGroups.remove(groupId); String fullGroupName = null; @@ -320,7 +333,7 @@ public Set getBackendsByGroup(GroupId groupId) { readLock(); try { Set allBackends = new HashSet<>(); - List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId); + List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId, Tag.DEFAULT_BACKEND_TAG); // if create colocate table with empty partition or create colocate table // with dynamic_partition will cause backendsPerBucketSeq == null if (backendsPerBucketSeq != null) { @@ -346,12 +359,12 @@ public List getAllTableIds(GroupId groupId) { } } - public List> getBackendsPerBucketSeq(GroupId groupId) { + public Map>> getBackendsPerBucketSeq(GroupId groupId) { readLock(); try { - List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId); + Map>> backendsPerBucketSeq = group2BackendsPerBucketSeq.row(groupId); if (backendsPerBucketSeq == null) { - return Lists.newArrayList(); + return Maps.newHashMap(); } return backendsPerBucketSeq; } finally { @@ -362,7 +375,7 @@ public List> getBackendsPerBucketSeq(GroupId groupId) { public List> getBackendsPerBucketSeqSet(GroupId groupId) { readLock(); try { - List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId); + List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId, Tag.DEFAULT_BACKEND_TAG); if (backendsPerBucketSeq == null) { return Lists.newArrayList(); } @@ -379,7 +392,7 @@ public List> getBackendsPerBucketSeqSet(GroupId groupId) { public Set getTabletBackendsByGroup(GroupId groupId, int tabletOrderIdx) { readLock(); try { - List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId); + List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(groupId, Tag.DEFAULT_BACKEND_TAG); if (backendsPerBucketSeq == null) { return Sets.newHashSet(); } @@ -453,10 +466,10 @@ public void replayAddTableToGroup(ColocatePersistInfo info) { writeLock(); try { - if (!group2BackendsPerBucketSeq.containsKey(info.getGroupId())) { - group2BackendsPerBucketSeq.put(info.getGroupId(), info.getBackendsPerBucketSeq()); + Map>> map = info.getBackendsPerBucketSeq(); + for (Map.Entry>> entry : map.entrySet()) { + group2BackendsPerBucketSeq.put(info.getGroupId(), entry.getKey(), entry.getValue()); } - addTableToGroup(info.getGroupId().dbId, tbl, tbl.getColocateGroup(), info.getGroupId()); } finally { writeUnlock(); @@ -505,7 +518,7 @@ public List> getInfos() { info.add(Joiner.on(", ").join(group2Tables.get(groupId))); ColocateGroupSchema groupSchema = group2Schema.get(groupId); info.add(String.valueOf(groupSchema.getBucketsNum())); - info.add(String.valueOf(groupSchema.getReplicationNum())); + info.add(String.valueOf(groupSchema.getReplicaAlloc().toCreateStmt())); List cols = groupSchema.getDistributionColTypes().stream().map( e -> e.toSql()).collect(Collectors.toList()); info.add(Joiner.on(", ").join(cols)); @@ -534,12 +547,16 @@ public void write(DataOutput out) throws IOException { groupSchema.write(out); // group schema // backend seq - List> backendsPerBucketSeq = group2BackendsPerBucketSeq.get(entry.getValue()); + Map>> backendsPerBucketSeq = group2BackendsPerBucketSeq.row(entry.getValue()); out.writeInt(backendsPerBucketSeq.size()); - for (List bucket2BEs : backendsPerBucketSeq) { - out.writeInt(bucket2BEs.size()); - for (Long be : bucket2BEs) { - out.writeLong(be); + for (Map.Entry>> tag2Bucket2BEs : backendsPerBucketSeq.entrySet()) { + tag2Bucket2BEs.getKey().write(out); + out.writeInt(tag2Bucket2BEs.getValue().size()); + for (List beIds : tag2Bucket2BEs.getValue()) { + out.writeInt(beIds.size()); + for (Long be : beIds) { + out.writeLong(be); + } } } } @@ -554,60 +571,8 @@ public void write(DataOutput out) throws IOException { public void readFields(DataInput in) throws IOException { int size = in.readInt(); if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_55) { - Multimap tmpGroup2Tables = ArrayListMultimap.create(); - Map tmpTable2Group = Maps.newHashMap(); - Map tmpGroup2Db = Maps.newHashMap(); - Map>> tmpGroup2BackendsPerBucketSeq = Maps.newHashMap(); - Set tmpBalancingGroups = Sets.newHashSet(); - - for (int i = 0; i < size; i++) { - long group = in.readLong(); - int tableSize = in.readInt(); - List tables = new ArrayList<>(); - for (int j = 0; j < tableSize; j++) { - tables.add(in.readLong()); - } - tmpGroup2Tables.putAll(group, tables); - } - - size = in.readInt(); - for (int i = 0; i < size; i++) { - long table = in.readLong(); - long group = in.readLong(); - tmpTable2Group.put(table, group); - } - - size = in.readInt(); - for (int i = 0; i < size; i++) { - long group = in.readLong(); - long db = in.readLong(); - tmpGroup2Db.put(group, db); - } - - size = in.readInt(); - for (int i = 0; i < size; i++) { - long group = in.readLong(); - List> bucketBeLists = new ArrayList<>(); - int bucketBeListsSize = in.readInt(); - for (int j = 0; j < bucketBeListsSize; j++) { - int beListSize = in.readInt(); - List beLists = new ArrayList<>(); - for (int k = 0; k < beListSize; k++) { - beLists.add(in.readLong()); - } - bucketBeLists.add(beLists); - } - tmpGroup2BackendsPerBucketSeq.put(group, bucketBeLists); - } - - size = in.readInt(); - for (int i = 0; i < size; i++) { - long group = in.readLong(); - tmpBalancingGroups.add(group); - } - - convertedToNewMembers(tmpGroup2Tables, tmpTable2Group, tmpGroup2Db, tmpGroup2BackendsPerBucketSeq, - tmpBalancingGroups); + throw new IOException("This is a very old metadata with version: " + + Catalog.getCurrentCatalogJournalVersion() + ", can not be read"); } else { for (int i = 0; i < size; i++) { String fullGrpName = Text.readString(in); @@ -622,18 +587,37 @@ public void readFields(DataInput in) throws IOException { ColocateGroupSchema groupSchema = ColocateGroupSchema.read(in); group2Schema.put(grpId, groupSchema); - List> backendsPerBucketSeq = Lists.newArrayList(); - int beSize = in.readInt(); - for (int j = 0; j < beSize; j++) { - int seqSize = in.readInt(); - List seq = Lists.newArrayList(); - for (int k = 0; k < seqSize; k++) { - long beId = in.readLong(); - seq.add(beId); + // backends seqs + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + List> bucketsSeq = Lists.newArrayList(); + int beSize = in.readInt(); + for (int j = 0; j < beSize; j++) { + int seqSize = in.readInt(); + List seq = Lists.newArrayList(); + for (int k = 0; k < seqSize; k++) { + long beId = in.readLong(); + seq.add(beId); + } + bucketsSeq.add(seq); + } + group2BackendsPerBucketSeq.put(grpId, Tag.DEFAULT_BACKEND_TAG, bucketsSeq); + } else { + int tagSize = in.readInt(); + for (int j = 0; j < tagSize; j++) { + Tag tag = Tag.read(in); + int bucketSize = in.readInt(); + List> bucketsSeq = Lists.newArrayList(); + for (int k = 0; k < bucketSize; k++) { + List beIds = Lists.newArrayList(); + int beSize = in.readInt(); + for (int l = 0; l < bucketSize; l++) { + beIds.add(in.readLong()); + } + bucketsSeq.add(beIds); + } + group2BackendsPerBucketSeq.put(grpId, tag, bucketsSeq); } - backendsPerBucketSeq.add(seq); } - group2BackendsPerBucketSeq.put(grpId, backendsPerBucketSeq); } size = in.readInt(); @@ -642,69 +626,7 @@ public void readFields(DataInput in) throws IOException { } } } - - private void convertedToNewMembers(Multimap tmpGroup2Tables, Map tmpTable2Group, - Map tmpGroup2Db, Map>> tmpGroup2BackendsPerBucketSeq, - Set tmpBalancingGroups) { - - LOG.debug("debug: tmpGroup2Tables {}", tmpGroup2Tables); - LOG.debug("debug: tmpTable2Group {}", tmpTable2Group); - LOG.debug("debug: tmpGroup2Db {}", tmpGroup2Db); - LOG.debug("debug: tmpGroup2BackendsPerBucketSeq {}", tmpGroup2BackendsPerBucketSeq); - LOG.debug("debug: tmpBalancingGroups {}", tmpBalancingGroups); - - for (Map.Entry entry : tmpGroup2Db.entrySet()) { - GroupId groupId = new GroupId(entry.getValue(), entry.getKey()); - Database db = Catalog.getCurrentCatalog().getDb(groupId.dbId); - if (db == null) { - continue; - } - Collection tableIds = tmpGroup2Tables.get(groupId.grpId); - - for (Long tblId : tableIds) { - OlapTable tbl = (OlapTable) db.getTable(tblId); - if (tbl == null) { - continue; - } - tbl.readLock(); - try { - if (tblId.equals(groupId.grpId)) { - // this is a parent table, use its name as group name - groupName2Id.put(groupId.dbId + "_" + tbl.getName(), groupId); - - ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, - ((HashDistributionInfo)tbl.getDefaultDistributionInfo()).getDistributionColumns(), - tbl.getDefaultDistributionInfo().getBucketNum(), - tbl.getPartitionInfo().idToReplicationNum.values().stream().findFirst().get()); - group2Schema.put(groupId, groupSchema); - group2BackendsPerBucketSeq.put(groupId, tmpGroup2BackendsPerBucketSeq.get(groupId.grpId)); - } - } finally { - tbl.readUnlock(); - } - - group2Tables.put(groupId, tblId); - table2Group.put(tblId, groupId); - } - } - } - - public void setBackendsSetByIdxForGroup(GroupId groupId, int tabletOrderIdx, Set newBackends) { - writeLock(); - try { - List> backends = group2BackendsPerBucketSeq.get(groupId); - if (backends == null) { - return; - } - Preconditions.checkState(tabletOrderIdx < backends.size(), tabletOrderIdx + " vs. " + backends.size()); - backends.set(tabletOrderIdx, Lists.newArrayList(newBackends)); - ColocatePersistInfo info = ColocatePersistInfo.createForBackendsPerBucketSeq(groupId, backends); - Catalog.getCurrentCatalog().getEditLog().logColocateBackendsPerBucketSeq(info); - } finally { - writeUnlock(); - } - } - + // just for ut public Map getTable2Group() { return table2Group; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java index f1b7e829723436..ae5d8f473ee2b7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Database.java @@ -281,6 +281,7 @@ public void checkQuota() throws DdlException { checkReplicaQuota(); } + // return pair public Pair createTableWithLock(Table table, boolean isReplay, boolean setIfNotExist) { boolean result = true; // if a table is already exists, then edit log won't be executed @@ -454,7 +455,7 @@ public int getMaxReplicationNum() { table.readLock(); try { for (Partition partition : olapTable.getAllPartitions()) { - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + short replicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); if (ret < replicationNum) { ret = replicationNum; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DynamicPartitionProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/DynamicPartitionProperty.java index 50892a0d040fd0..9b4ed77d224f2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DynamicPartitionProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DynamicPartitionProperty.java @@ -18,8 +18,10 @@ package org.apache.doris.catalog; import org.apache.doris.analysis.TimestampArithmeticExpr.TimeUnit; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.util.DynamicPartitionUtil.StartOfDate; +import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.TimeUtils; import java.util.Map; @@ -36,6 +38,7 @@ public class DynamicPartitionProperty { public static final String START_DAY_OF_MONTH = "dynamic_partition.start_day_of_month"; public static final String TIME_ZONE = "dynamic_partition.time_zone"; public static final String REPLICATION_NUM = "dynamic_partition.replication_num"; + public static final String REPLICATION_ALLOCATION = "dynamic_partition.replication_allocation"; public static final String CREATE_HISTORY_PARTITION = "dynamic_partition.create_history_partition"; public static final String HOT_PARTITION_NUM = "dynamic_partition.hot_partition_num"; @@ -54,7 +57,8 @@ public class DynamicPartitionProperty { private StartOfDate startOfWeek; private StartOfDate startOfMonth; private TimeZone tz = TimeUtils.getSystemTimeZone(); - private int replicationNum; + // if NOT_SET, it will use table's default replica allocation + private ReplicaAllocation replicaAlloc; private boolean createHistoryPartition = false; // This property are used to describe the number of partitions that need to be reserved on the high-speed storage. // If not set, default is 0 @@ -71,7 +75,7 @@ public DynamicPartitionProperty(Map properties) { this.end = Integer.parseInt(properties.get(END)); this.prefix = properties.get(PREFIX); this.buckets = Integer.parseInt(properties.get(BUCKETS)); - this.replicationNum = Integer.parseInt(properties.getOrDefault(REPLICATION_NUM, String.valueOf(NOT_SET_REPLICATION_NUM))); + this.replicaAlloc = analyzeReplicaAllocation(properties); this.createHistoryPartition = Boolean.parseBoolean(properties.get(CREATE_HISTORY_PARTITION)); this.hotPartitionNum = Integer.parseInt(properties.getOrDefault(HOT_PARTITION_NUM, "0")); createStartOfs(properties); @@ -80,6 +84,15 @@ public DynamicPartitionProperty(Map properties) { } } + private ReplicaAllocation analyzeReplicaAllocation(Map properties) { + try { + return PropertyAnalyzer.analyzeReplicaAllocation(properties, "dynamic_partition"); + } catch (AnalysisException e) { + // should not happen + return ReplicaAllocation.NOT_SET; + } + } + private void createStartOfs(Map properties) { if (properties.containsKey(START_DAY_OF_WEEK)) { startOfWeek = new StartOfDate(-1, -1, Integer.valueOf(properties.get(START_DAY_OF_WEEK))); @@ -154,25 +167,22 @@ public TimeZone getTimeZone() { return tz; } - public int getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAllocation() { + return replicaAlloc; } /** * use table replication_num as dynamic_partition.replication_num default value */ - public String getProperties(int tableReplicationNum) { - int useReplicationNum = replicationNum; - if (useReplicationNum == NOT_SET_REPLICATION_NUM) { - useReplicationNum = tableReplicationNum; - } + public String getProperties(ReplicaAllocation tableReplicaAlloc) { + ReplicaAllocation tmpAlloc = this.replicaAlloc.isNotSet() ? tableReplicaAlloc : this.replicaAlloc; String res = ",\n\"" + ENABLE + "\" = \"" + enable + "\"" + ",\n\"" + TIME_UNIT + "\" = \"" + timeUnit + "\"" + ",\n\"" + TIME_ZONE + "\" = \"" + tz.getID() + "\"" + ",\n\"" + START + "\" = \"" + start + "\"" + ",\n\"" + END + "\" = \"" + end + "\"" + ",\n\"" + PREFIX + "\" = \"" + prefix + "\"" + - ",\n\"" + REPLICATION_NUM + "\" = \"" + useReplicationNum + "\"" + + ",\n\"" + REPLICATION_ALLOCATION + "\" = \"" + tmpAlloc.toCreateStmt() + "\"" + ",\n\"" + BUCKETS + "\" = \"" + buckets + "\"" + ",\n\"" + CREATE_HISTORY_PARTITION + "\" = \"" + createHistoryPartition + "\"" + ",\n\"" + HOT_PARTITION_NUM + "\" = \"" + hotPartitionNum + "\""; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java index b52a3285304190..496c8223ee92f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MetadataViewer.java @@ -80,7 +80,7 @@ private static List> getTabletStatus(String dbName, String tblName, for (String partName : partitions) { Partition partition = olapTable.getPartition(partName); long visibleVersion = partition.getVisibleVersion(); - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + short replicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { int schemaHash = olapTable.getSchemaHashByIndexId(index.getId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 72b0d9cb0ae8fe..bf5293e97a1190 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -44,6 +44,8 @@ import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.Util; import org.apache.doris.qe.OriginStatement; +import org.apache.doris.resource.Tag; +import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TOlapTable; import org.apache.doris.thrift.TStorageFormat; @@ -400,7 +402,7 @@ public void renameColumnNamePrefix(long idxId) { } } - public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplicationNum) { + public Status resetIdsForRestore(Catalog catalog, Database db, ReplicaAllocation restoreReplicaAlloc) { // table id id = catalog.getNextId(); @@ -432,9 +434,9 @@ public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplic for (Map.Entry entry : origPartNameToId.entrySet()) { long newPartId = catalog.getNextId(); partitionInfo.idToDataProperty.put(newPartId, - partitionInfo.idToDataProperty.remove(entry.getValue())); - partitionInfo.idToReplicationNum.remove(entry.getValue()); - partitionInfo.idToReplicationNum.put(newPartId, (short) restoreReplicationNum); + partitionInfo.idToDataProperty.remove(entry.getValue())); + partitionInfo.idToReplicaAllocation.remove(entry.getValue()); + partitionInfo.idToReplicaAllocation.put(newPartId, restoreReplicaAlloc); partitionInfo.getIdToItem(false).put(newPartId, partitionInfo.getIdToItem(false).remove(entry.getValue())); partitionInfo.idToInMemory.put(newPartId, partitionInfo.idToInMemory.remove(entry.getValue())); @@ -445,8 +447,8 @@ public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplic long newPartId = catalog.getNextId(); for (Map.Entry entry : origPartNameToId.entrySet()) { partitionInfo.idToDataProperty.put(newPartId, partitionInfo.idToDataProperty.remove(entry.getValue())); - partitionInfo.idToReplicationNum.remove(entry.getValue()); - partitionInfo.idToReplicationNum.put(newPartId, (short) restoreReplicationNum); + partitionInfo.idToReplicaAllocation.remove(entry.getValue()); + partitionInfo.idToReplicaAllocation.put(newPartId, restoreReplicaAlloc); partitionInfo.idToInMemory.put(newPartId, partitionInfo.idToInMemory.remove(entry.getValue())); idToPartition.put(newPartId, idToPartition.remove(entry.getValue())); } @@ -455,6 +457,7 @@ public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplic // for each partition, reset rollup index map for (Map.Entry entry : idToPartition.entrySet()) { Partition partition = entry.getValue(); + ReplicaAllocation replicaAlloc = partitionInfo.getReplicaAllocation(partition.getId()); for (Map.Entry entry2 : origIdxIdToName.entrySet()) { MaterializedIndex idx = partition.getIndex(entry2.getKey()); long newIdxId = indexNameToId.get(entry2.getValue()); @@ -475,19 +478,19 @@ public Status resetIdsForRestore(Catalog catalog, Database db, int restoreReplic idx.addTablet(newTablet, null /* tablet meta */, true /* is restore */); // replicas - List beIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(partitionInfo.getReplicationNum(entry.getKey()), - true, true, - db.getClusterName()); - if (beIds == null) { - return new Status(ErrCode.COMMON_ERROR, "failed to find " - + partitionInfo.getReplicationNum(entry.getKey()) - + " different hosts to create table: " + name); - } - for (Long beId : beIds) { - long newReplicaId = catalog.getNextId(); - Replica replica = new Replica(newReplicaId, beId, ReplicaState.NORMAL, - partition.getVisibleVersion(), partition.getVisibleVersionHash(), schemaHash); - newTablet.addReplica(replica, true /* is restore */); + try { + Map> tag2beIds = Catalog.getCurrentSystemInfo().chooseBackendIdByFilters( + replicaAlloc, db.getClusterName(), null); + for (Map.Entry> entry3 : tag2beIds.entrySet()) { + for (Long beId : entry3.getValue()) { + long newReplicaId = catalog.getNextId(); + Replica replica = new Replica(newReplicaId, beId, ReplicaState.NORMAL, + partition.getVisibleVersion(), partition.getVisibleVersionHash(), schemaHash); + newTablet.addReplica(replica, true /* is restore */); + } + } + } catch (DdlException e) { + return new Status(ErrCode.COMMON_ERROR, e.getMessage()); } } } @@ -685,11 +688,11 @@ private Partition dropPartition(long dbId, String partitionName, boolean isForce // recycle partition if (partitionInfo.getType() == PartitionType.RANGE) { Catalog.getCurrentRecycleBin().recyclePartition(dbId, id, partition, - partitionInfo.getItem(partition.getId()).getItems(), - new ListPartitionItem(Lists.newArrayList(new PartitionKey())), - partitionInfo.getDataProperty(partition.getId()), - partitionInfo.getReplicationNum(partition.getId()), - partitionInfo.getIsInMemory(partition.getId())); + partitionInfo.getItem(partition.getId()).getItems(), + new ListPartitionItem(Lists.newArrayList(new PartitionKey())), + partitionInfo.getDataProperty(partition.getId()), + partitionInfo.getReplicaAllocation(partition.getId()), + partitionInfo.getIsInMemory(partition.getId())); } else if (partitionInfo.getType() == PartitionType.LIST) { // construct a dummy range @@ -707,7 +710,7 @@ private Partition dropPartition(long dbId, String partitionName, boolean isForce dummyRange, partitionInfo.getItem(partition.getId()), partitionInfo.getDataProperty(partition.getId()), - partitionInfo.getReplicationNum(partition.getId()), + partitionInfo.getReplicaAllocation(partition.getId()), partitionInfo.getIsInMemory(partition.getId())); } } else if (!reserveTablets) { @@ -1221,7 +1224,7 @@ public void readFields(DataInput in) throws IOException { for (long partitionId : tempRangeInfo.getIdToItem(false).keySet()) { this.partitionInfo.addPartition(partitionId, true, tempRangeInfo.getItem(partitionId), tempRangeInfo.getDataProperty(partitionId), - tempRangeInfo.getReplicationNum(partitionId), tempRangeInfo.getIsInMemory(partitionId)); + tempRangeInfo.getReplicaAllocation(partitionId), tempRangeInfo.getIsInMemory(partitionId)); } } tempPartitions.unsetPartitionInfo(); @@ -1306,7 +1309,7 @@ public Partition replacePartition(Partition newPartition) { nameToPartition.put(newPartition.getName(), newPartition); DataProperty dataProperty = partitionInfo.getDataProperty(oldPartition.getId()); - short replicationNum = partitionInfo.getReplicationNum(oldPartition.getId()); + ReplicaAllocation replicaAlloc = partitionInfo.getReplicaAllocation(oldPartition.getId()); boolean isInMemory = partitionInfo.getIsInMemory(oldPartition.getId()); if (partitionInfo.getType() == PartitionType.RANGE @@ -1314,10 +1317,10 @@ public Partition replacePartition(Partition newPartition) { PartitionItem item = partitionInfo.getItem(oldPartition.getId()); partitionInfo.dropPartition(oldPartition.getId()); partitionInfo.addPartition(newPartition.getId(), false, item, dataProperty, - replicationNum, isInMemory); + replicaAlloc, isInMemory); } else { partitionInfo.dropPartition(oldPartition.getId()); - partitionInfo.addPartition(newPartition.getId(), dataProperty, replicationNum, isInMemory); + partitionInfo.addPartition(newPartition.getId(), dataProperty, replicaAlloc, isInMemory); } return oldPartition; @@ -1362,7 +1365,7 @@ public boolean isStable(SystemInfoService infoService, TabletScheduler tabletSch for (Partition partition : idToPartition.values()) { long visibleVersion = partition.getVisibleVersion(); long visibleVersionHash = partition.getVisibleVersionHash(); - short replicationNum = partitionInfo.getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = partitionInfo.getReplicaAllocation(partition.getId()); for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.ALL)) { for (Tablet tablet : mIndex.getTablets()) { if (tabletScheduler.containsTablet(tablet.getId())) { @@ -1372,7 +1375,7 @@ public boolean isStable(SystemInfoService infoService, TabletScheduler tabletSch } Pair statusPair = tablet.getHealthStatusWithPriority( - infoService, clusterName, visibleVersion, visibleVersionHash, replicationNum, + infoService, clusterName, visibleVersion, visibleVersionHash, replicaAlloc, aliveBeIdsInCluster); if (statusPair.first != TabletStatus.HEALTHY) { LOG.info("table {} is not stable because tablet {} status is {}. replicas: {}", @@ -1386,20 +1389,45 @@ public boolean isStable(SystemInfoService infoService, TabletScheduler tabletSch } // arbitrarily choose a partition, and get the buckets backends sequence from base index. - public List> getArbitraryTabletBucketsSeq() throws DdlException { - List> backendsPerBucketSeq = Lists.newArrayList(); + public Map>> getArbitraryTabletBucketsSeq() throws DdlException { + SystemInfoService infoService = Catalog.getCurrentSystemInfo(); + Map>> backendsPerBucketSeq = Maps.newHashMap(); for (Partition partition : idToPartition.values()) { - short replicationNum = partitionInfo.getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = partitionInfo.getReplicaAllocation(partition.getId()); + short totalReplicaNum = replicaAlloc.getTotalReplicaNum(); MaterializedIndex baseIdx = partition.getBaseIndex(); for (Long tabletId : baseIdx.getTabletIdsInOrder()) { Tablet tablet = baseIdx.getTablet(tabletId); List replicaBackendIds = tablet.getNormalReplicaBackendIds(); - if (replicaBackendIds.size() < replicationNum) { + if (replicaBackendIds.size() != totalReplicaNum) { // this should not happen, but in case, throw an exception to terminate this process throw new DdlException("Normal replica number of tablet " + tabletId + " is: " - + replicaBackendIds.size() + ", which is less than expected: " + replicationNum); + + replicaBackendIds.size() + ", but expected: " + totalReplicaNum); + } + + // check tag + Map currentReplicaAlloc = Maps.newHashMap(); + Map> tag2beIds = Maps.newHashMap(); + for (long beId : replicaBackendIds) { + Backend be = infoService.getBackend(beId); + if (be == null) { + continue; + } + short num = currentReplicaAlloc.getOrDefault(be.getTag(), (short) 0); + currentReplicaAlloc.putIfAbsent(be.getTag(), (short) (num + 1)); + List beIds = tag2beIds.getOrDefault(be.getTag(), Lists.newArrayList()); + beIds.add(beId); + tag2beIds.putIfAbsent(be.getTag(), beIds); + } + if (!currentReplicaAlloc.equals(replicaAlloc.getAllocMap())) { + throw new DdlException("The relica allocation is " + currentReplicaAlloc.toString() + + ", but expected: " + replicaAlloc.toCreateStmt()); + } + + for (Map.Entry> entry : tag2beIds.entrySet()) { + backendsPerBucketSeq.putIfAbsent(entry.getKey(), Lists.newArrayList()); + backendsPerBucketSeq.get(entry.getKey()).add(entry.getValue()); } - backendsPerBucketSeq.add(replicaBackendIds.subList(0, replicationNum)); } break; } @@ -1476,19 +1504,18 @@ public boolean convertRandomDistributionToHashDistribution() { return hasChanged; } - public void setReplicationNum(Short replicationNum) { + public void setReplicationAllocation(ReplicaAllocation replicaAlloc) { if (tableProperty == null) { tableProperty = new TableProperty(new HashMap<>()); } - tableProperty.modifyTableProperties(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, replicationNum.toString()); - tableProperty.buildReplicationNum(); + tableProperty.setReplicaAlloc(replicaAlloc); } - public Short getDefaultReplicationNum() { + public ReplicaAllocation getDefaultReplicaAllocation() { if (tableProperty != null) { - return tableProperty.getReplicationNum(); + return tableProperty.getReplicaAllocation(); } - return FeConstants.default_replication_num; + return ReplicaAllocation.DEFAULT_ALLOCATION; } public Boolean isInMemory() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java index b64b341cb75cd8..027858a855e97f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java @@ -56,8 +56,8 @@ public class PartitionInfo implements Writable { protected Map idToTempItem = Maps.newHashMap(); // partition id -> data property protected Map idToDataProperty; - // partition id -> replication num - protected Map idToReplicationNum; + // partition id -> replication allocation + protected Map idToReplicaAllocation; // true if the partition has multi partition columns protected boolean isMultiColumnPartition = false; @@ -70,7 +70,7 @@ public class PartitionInfo implements Writable { public PartitionInfo() { this.idToDataProperty = new HashMap<>(); - this.idToReplicationNum = new HashMap<>(); + this.idToReplicaAllocation = new HashMap<>(); this.idToInMemory = new HashMap<>(); this.idToTabletType = new HashMap<>(); } @@ -78,7 +78,7 @@ public PartitionInfo() { public PartitionInfo(PartitionType type) { this.type = type; this.idToDataProperty = new HashMap<>(); - this.idToReplicationNum = new HashMap<>(); + this.idToReplicaAllocation = new HashMap<>(); this.idToInMemory = new HashMap<>(); this.idToTabletType = new HashMap<>(); } @@ -132,7 +132,7 @@ public PartitionItem handleNewSinglePartitionDesc(SinglePartitionDesc desc, setItemInternal(partitionId, isTemp, partitionItem); idToDataProperty.put(partitionId, desc.getPartitionDataProperty()); - idToReplicationNum.put(partitionId, desc.getReplicationNum()); + idToReplicaAllocation.put(partitionId, desc.getReplicaAlloc()); idToInMemory.put(partitionId, desc.isInMemory()); return partitionItem; @@ -143,11 +143,11 @@ public PartitionItem createAndCheckPartitionItem(SinglePartitionDesc desc, boole } public void unprotectHandleNewSinglePartitionDesc(long partitionId, boolean isTemp, PartitionItem partitionItem, - DataProperty dataProperty, short replicationNum, + DataProperty dataProperty, ReplicaAllocation replicaAlloc, boolean isInMemory) { setItemInternal(partitionId, isTemp, partitionItem); idToDataProperty.put(partitionId, dataProperty); - idToReplicationNum.put(partitionId, replicationNum); + idToReplicaAllocation.put(partitionId, replicaAlloc); idToInMemory.put(partitionId, isInMemory); } @@ -207,15 +207,15 @@ public void setDataProperty(long partitionId, DataProperty newDataProperty) { idToDataProperty.put(partitionId, newDataProperty); } - public short getReplicationNum(long partitionId) { - if (!idToReplicationNum.containsKey(partitionId)) { - LOG.debug("failed to get replica num for partition: {}", partitionId); + public ReplicaAllocation getReplicaAllocation(long partitionId) { + if (!idToReplicaAllocation.containsKey(partitionId)) { + LOG.debug("failed to get replica allocation for partition: {}", partitionId); } - return idToReplicationNum.get(partitionId); + return idToReplicaAllocation.get(partitionId); } - public void setReplicationNum(long partitionId, short replicationNum) { - idToReplicationNum.put(partitionId, replicationNum); + public void setReplicaAllocation(long partitionId, ReplicaAllocation replicaAlloc) { + this.idToReplicaAllocation.put(partitionId, replicaAlloc); } public boolean getIsInMemory(long partitionId) { @@ -239,23 +239,23 @@ public void setTabletType(long partitionId, TTabletType tabletType) { public void dropPartition(long partitionId) { idToDataProperty.remove(partitionId); - idToReplicationNum.remove(partitionId); + idToReplicaAllocation.remove(partitionId); idToInMemory.remove(partitionId); idToItem.remove(partitionId); idToTempItem.remove(partitionId); } public void addPartition(long partitionId, boolean isTemp, PartitionItem item, DataProperty dataProperty, - short replicationNum, boolean isInMemory){ - addPartition(partitionId, dataProperty, replicationNum, isInMemory); + ReplicaAllocation replicaAlloc, boolean isInMemory) { + addPartition(partitionId, dataProperty, replicaAlloc, isInMemory); setItemInternal(partitionId, isTemp, item); } public void addPartition(long partitionId, DataProperty dataProperty, - short replicationNum, + ReplicaAllocation replicaAlloc, boolean isInMemory) { idToDataProperty.put(partitionId, dataProperty); - idToReplicationNum.put(partitionId, replicationNum); + idToReplicaAllocation.put(partitionId, replicaAlloc); idToInMemory.put(partitionId, isInMemory); } @@ -284,8 +284,8 @@ public void moveFromTempToFormal(long tempPartitionId) { public void write(DataOutput out) throws IOException { Text.writeString(out, type.name()); - Preconditions.checkState(idToDataProperty.size() == idToReplicationNum.size()); - Preconditions.checkState(idToInMemory.keySet().equals(idToReplicationNum.keySet())); + Preconditions.checkState(idToDataProperty.size() == idToReplicaAllocation.size()); + Preconditions.checkState(idToInMemory.keySet().equals(idToReplicaAllocation.keySet())); out.writeInt(idToDataProperty.size()); for (Map.Entry entry : idToDataProperty.entrySet()) { out.writeLong(entry.getKey()); @@ -296,7 +296,7 @@ public void write(DataOutput out) throws IOException { entry.getValue().write(out); } - out.writeShort(idToReplicationNum.get(entry.getKey())); + idToReplicaAllocation.get(entry.getKey()).write(out); out.writeBoolean(idToInMemory.get(entry.getKey())); } } @@ -314,8 +314,15 @@ public void readFields(DataInput in) throws IOException { idToDataProperty.put(partitionId, DataProperty.read(in)); } - short replicationNum = in.readShort(); - idToReplicationNum.put(partitionId, replicationNum); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + short replicationNum = in.readShort(); + ReplicaAllocation replicaAlloc = new ReplicaAllocation(replicationNum); + idToReplicaAllocation.put(partitionId, replicaAlloc); + } else { + ReplicaAllocation replicaAlloc = ReplicaAllocation.read(in); + idToReplicaAllocation.put(partitionId, replicaAlloc); + } + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_72) { idToInMemory.put(partitionId, in.readBoolean()); } else { @@ -338,11 +345,12 @@ public String toString() { buff.append(false); } buff.append("; "); - buff.append("data_property: ").append(entry.getValue().toString()).append("; ");; - buff.append("replica number: ").append(idToReplicationNum.get(entry.getKey())).append("; ");; + buff.append("data_property: ").append(entry.getValue().toString()).append("; "); + buff.append("replica number: ").append(idToReplicaAllocation.get(entry.getKey())).append("; "); buff.append("in memory: ").append(idToInMemory.get(entry.getKey())); } return buff.toString(); } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ReplicaAllocation.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ReplicaAllocation.java new file mode 100644 index 00000000000000..0cadee26ec709d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ReplicaAllocation.java @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.io.Text; +import org.apache.doris.common.io.Writable; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.resource.Tag; + +import com.clearspring.analytics.util.Lists; +import com.google.common.base.Joiner; +import com.google.common.collect.Maps; +import com.google.gson.annotations.SerializedName; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +// ReplicaAllocation is used to describe the distribution of replicas of a tablet. +// By default, 3 replicas of a tablet are distributed on 3 BE nodes with Tag "default". +public class ReplicaAllocation implements Writable { + + public static final ReplicaAllocation DEFAULT_ALLOCATION; + // represent that replica allocation is not set. + public static final ReplicaAllocation NOT_SET; + + static { + DEFAULT_ALLOCATION = new ReplicaAllocation((short) 3); + NOT_SET = new ReplicaAllocation(); + } + + @SerializedName(value = "allocMap") + private Map allocMap = Maps.newHashMap(); + + public ReplicaAllocation() { + + } + + // For convert the old replica number to replica allocation + public ReplicaAllocation(short replicaNum) { + allocMap.put(Tag.DEFAULT_BACKEND_TAG, replicaNum); + } + + public ReplicaAllocation (Map allocMap) { + this.allocMap = allocMap; + } + + public void put(Tag tag, Short num) { + this.allocMap.put(tag, num); + } + + public Map getAllocMap() { + return allocMap; + } + + public short getTotalReplicaNum() { + short num = 0; + for (Short s : allocMap.values()) { + num += s; + } + return num; + } + + public boolean isEmpty() { + return allocMap.isEmpty(); + } + + public boolean isNotSet() { + return this.equals(NOT_SET); + } + + public static ReplicaAllocation read(DataInput in) throws IOException { + String json = Text.readString(in); + return GsonUtils.GSON.fromJson(json, ReplicaAllocation.class); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ReplicaAllocation that = (ReplicaAllocation) o; + return that.allocMap.equals(this.allocMap); + } + + @Override + public int hashCode() { + return Objects.hash(allocMap); + } + + @Override + public void write(DataOutput out) throws IOException { + Text.writeString(out, GsonUtils.GSON.toJson(this)); + } + + @Override + public String toString() { + return toCreateStmt(); + } + + // For show create table stmt. like: + // "tag.location.zone1: 2, tag.location.zone2: 1" + public String toCreateStmt() { + List tags = Lists.newArrayList(); + for (Map.Entry entry : allocMap.entrySet()) { + tags.add(PropertyAnalyzer.TAG_LOCATION + "." + entry.getKey().value + ": " + entry.getValue()); + } + return Joiner.on(", ").join(tags); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java index bf94346dcb81d4..070b6c79cb039c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableProperty.java @@ -17,9 +17,10 @@ package org.apache.doris.catalog; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; -import org.apache.doris.common.FeConstants; +import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.PropertyAnalyzer; @@ -27,9 +28,13 @@ import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.thrift.TStorageFormat; +import com.google.common.base.Strings; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -39,18 +44,19 @@ /** TableProperty contains additional information about OlapTable * TableProperty includes properties to persistent the additional information * Different properties is recognized by prefix such as dynamic_partition - * If there is different type properties is added.Write a method such as buildDynamicProperty to build it. + * If there is different type properties is added, write a method such as buildDynamicProperty to build it. */ public class TableProperty implements Writable { + private static final Logger LOG = LogManager.getLogger(TableProperty.class); + public static final String DYNAMIC_PARTITION_PROPERTY_PREFIX = "dynamic_partition"; @SerializedName(value = "properties") private Map properties; + // the follower variables are built from "properties" private DynamicPartitionProperty dynamicPartitionProperty = new DynamicPartitionProperty(Maps.newHashMap()); - // table's default replication num - private Short replicationNum = FeConstants.default_replication_num; - + private ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; private boolean isInMemory = false; /* @@ -82,7 +88,7 @@ public TableProperty buildProperty(short opCode) { executeBuildDynamicProperty(); break; case OperationType.OP_MODIFY_REPLICATION_NUM: - buildReplicationNum(); + buildReplicaAllocation(); break; case OperationType.OP_MODIFY_IN_MEMORY: buildInMemory(); @@ -116,12 +122,6 @@ private TableProperty executeBuildDynamicProperty() { return this; } - public TableProperty buildReplicationNum() { - replicationNum = Short.parseShort(properties.getOrDefault(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, - String.valueOf(FeConstants.default_replication_num))); - return this; - } - public TableProperty buildInMemory() { isInMemory = Boolean.parseBoolean(properties.getOrDefault(PropertyAnalyzer.PROPERTIES_INMEMORY, "false")); return this; @@ -137,6 +137,14 @@ public void modifyTableProperties(Map modifyProperties) { properties.putAll(modifyProperties); } + public void setReplicaAlloc(ReplicaAllocation replicaAlloc) { + this.replicaAlloc = replicaAlloc; + } + + public ReplicaAllocation getReplicaAllocation() { + return replicaAlloc; + } + public void modifyTableProperties(String key, String value) { properties.put(key, value); } @@ -149,10 +157,6 @@ public DynamicPartitionProperty getDynamicPartitionProperty() { return dynamicPartitionProperty; } - public Short getReplicationNum() { - return replicationNum; - } - public boolean IsInMemory() { return isInMemory; } @@ -161,16 +165,42 @@ public TStorageFormat getStorageFormat() { return storageFormat; } + public void buildReplicaAllocation() { + try { + // Must copy the properties because "analyzeReplicaAllocation" with remove the property + // from the properties. + Map copiedProperties = Maps.newHashMap(properties); + this.replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(copiedProperties, "default"); + } catch (AnalysisException e) { + // should not happen + LOG.error("should not happen when build replica allocation", e); + this.replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + } + } + @Override public void write(DataOutput out) throws IOException { Text.writeString(out, GsonUtils.GSON.toJson(this)); } public static TableProperty read(DataInput in) throws IOException { - return GsonUtils.GSON.fromJson(Text.readString(in), TableProperty.class) + TableProperty tableProperty = GsonUtils.GSON.fromJson(Text.readString(in), TableProperty.class) .executeBuildDynamicProperty() - .buildReplicationNum() .buildInMemory() .buildStorageFormat(); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + // get replica num from property map and create replica allocation + String repNum = tableProperty.properties.remove(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM); + if (!Strings.isNullOrEmpty(repNum)) { + ReplicaAllocation replicaAlloc = new ReplicaAllocation(Short.valueOf(repNum)); + tableProperty.properties.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, + replicaAlloc.toCreateStmt()); + } else { + tableProperty.properties.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, + ReplicaAllocation.DEFAULT_ALLOCATION.toCreateStmt()); + } + } + tableProperty.buildReplicaAllocation(); + return tableProperty; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index b7288dbe244f96..c11adad05cf5f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -408,14 +408,16 @@ public long getDataSize(boolean singleReplica) { */ public Pair getHealthStatusWithPriority( SystemInfoService systemInfoService, String clusterName, - long visibleVersion, long visibleVersionHash, int replicationNum, + long visibleVersion, long visibleVersionHash, ReplicaAllocation replicaAlloc, List aliveBeIdsInCluster) { + // FIXME(cmy): should be aware of tag info + short replicationNum = replicaAlloc.getTotalReplicaNum(); int alive = 0; int aliveAndVersionComplete = 0; int stable = 0; int availableInCluster = 0; - + Replica needFurtherRepairReplica = null; Set hosts = Sets.newHashSet(); for (Replica replica : replicas) { @@ -536,19 +538,20 @@ public Pair getHealthStatusWithPriority( * * backends set: 1,2,3 * tablet replicas: 1,2,4,5 - * + * * 2. Version incomplete: * backend matched, but some replica(in backends set)'s version is incomplete - * + * * 3. Redundant: * backends set: 1,2,3 * tablet replicas: 1,2,3,4 - * + * * No need to check if backend is available. We consider all backends in 'backendsSet' are available, * If not, unavailable backends will be relocated by CalocateTableBalancer first. */ - public TabletStatus getColocateHealthStatus(long visibleVersion, int replicationNum, Set backendsSet) { - + public TabletStatus getColocateHealthStatus(long visibleVersion, ReplicaAllocation replicaAlloc, Set backendsSet) { + // FIXME(cmy): need to be aware of the tag info + Short totalReplicaNum = replicaAlloc.getTotalReplicaNum(); // 1. check if replicas' backends are mismatch Set replicaBackendIds = getBackendIds(); if (!replicaBackendIds.containsAll(backendsSet)) { @@ -569,7 +572,7 @@ public TabletStatus getColocateHealthStatus(long visibleVersion, int replication } // 3. check redundant - if (replicas.size() > replicationNum) { + if (replicas.size() > totalReplicaNum) { return TabletStatus.COLOCATE_REDUNDANT; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java index d566eb0d7e1aad..39154f055f58d4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.Tablet.TabletStatus; import org.apache.doris.clone.TabletSchedCtx.Priority; @@ -33,11 +34,13 @@ import org.apache.doris.common.Config; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.persist.ColocatePersistInfo; +import org.apache.doris.resource.Tag; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.logging.log4j.LogManager; @@ -153,7 +156,10 @@ private void relocateAndBalanceGroup() { if (statistic == null) { continue; } - List> backendsPerBucketSeq = colocateIndex.getBackendsPerBucketSeq(groupId); + + // FIXME(cmy): consider tag + Map>> backendsPerBucketSeqMap = colocateIndex.getBackendsPerBucketSeq(groupId); + List> backendsPerBucketSeq = backendsPerBucketSeqMap.get(Tag.DEFAULT_BACKEND_TAG); if (backendsPerBucketSeq.isEmpty()) { continue; } @@ -162,8 +168,11 @@ private void relocateAndBalanceGroup() { List availableBeIds = getAvailableBeIds(db.getClusterName(), infoService); List> balancedBackendsPerBucketSeq = Lists.newArrayList(); if (relocateAndBalance(groupId, unavailableBeIdsInGroup, availableBeIds, colocateIndex, infoService, statistic, balancedBackendsPerBucketSeq)) { - colocateIndex.addBackendsPerBucketSeq(groupId, balancedBackendsPerBucketSeq); - ColocatePersistInfo info = ColocatePersistInfo.createForBackendsPerBucketSeq(groupId, balancedBackendsPerBucketSeq); + // FIXME(cmy): consider tag + Map>> balancedBackendsPerBucketSeqMap = Maps.newHashMap(); + balancedBackendsPerBucketSeqMap.put(Tag.DEFAULT_BACKEND_TAG, balancedBackendsPerBucketSeq); + colocateIndex.addBackendsPerBucketSeq(groupId, balancedBackendsPerBucketSeqMap); + ColocatePersistInfo info = ColocatePersistInfo.createForBackendsPerBucketSeq(groupId, balancedBackendsPerBucketSeqMap); catalog.getEditLog().logColocateBackendsPerBucketSeq(info); LOG.info("balance group {}. now backends per bucket sequence is: {}", groupId, balancedBackendsPerBucketSeq); } @@ -203,7 +212,8 @@ private void matchGroup() { olapTable.readLock(); try { for (Partition partition : olapTable.getPartitions()) { - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()); + short replicationNum = replicaAlloc.getTotalReplicaNum(); long visibleVersion = partition.getVisibleVersion(); long visibleVersionHash = partition.getVisibleVersionHash(); // Here we only get VISIBLE indexes. All other indexes are not queryable. @@ -216,7 +226,7 @@ private void matchGroup() { Set bucketsSeq = backendBucketsSeq.get(idx); Preconditions.checkState(bucketsSeq.size() == replicationNum, bucketsSeq.size() + " vs. " + replicationNum); Tablet tablet = index.getTablet(tabletId); - TabletStatus st = tablet.getColocateHealthStatus(visibleVersion, replicationNum, bucketsSeq); + TabletStatus st = tablet.getColocateHealthStatus(visibleVersion, replicaAlloc, bucketsSeq); if (st != TabletStatus.HEALTHY) { isGroupStable = false; LOG.debug("get unhealthy tablet {} in colocate table. status: {}", tablet.getId(), st); @@ -306,13 +316,15 @@ private boolean relocateAndBalance(GroupId groupId, Set unavailableBeIds, ColocateTableIndex colocateIndex, SystemInfoService infoService, ClusterLoadStatistic statistic, List> balancedBackendsPerBucketSeq) { ColocateGroupSchema groupSchema = colocateIndex.getGroupSchema(groupId); - int replicationNum = groupSchema.getReplicationNum(); - List> backendsPerBucketSeq = Lists.newArrayList(colocateIndex.getBackendsPerBucketSeq(groupId)); + // FIXME(cmy): should be aware of tag info + int replicationNum = groupSchema.getReplicaAlloc().getTotalReplicaNum(); + List> backendsPerBucketSeq = Lists.newArrayList(colocateIndex.getBackendsPerBucketSeq(groupId).get(Tag.DEFAULT_BACKEND_TAG)); // [[A,B,C],[B,C,D]] -> [A,B,C,B,C,D] List flatBackendsPerBucketSeq = backendsPerBucketSeq.stream().flatMap(List::stream).collect(Collectors.toList()); boolean isChanged = false; - OUT: while (true) { + OUT: + while (true) { // update backends and hosts at each round backendsPerBucketSeq = Lists.partition(flatBackendsPerBucketSeq, replicationNum); List> hostsPerBucketSeq = getHostsPerBucketSeq(backendsPerBucketSeq, infoService); diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java index abca24b99d4681..2992566cebb473 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java @@ -46,12 +46,15 @@ import org.apache.doris.common.util.RangeUtils; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.thrift.TStorageMedium; + import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Range; import com.google.common.collect.Sets; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; + import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Collection; @@ -186,11 +189,13 @@ private ArrayList getAddPartitionClause(Database db, OlapTab // construct partition desc PartitionKeyDesc partitionKeyDesc = PartitionKeyDesc.createFixed(Collections.singletonList(lowerValue), Collections.singletonList(upperValue)); - HashMap partitionProperties = Maps.newHashMap(); - if (dynamicPartitionProperty.getReplicationNum() == DynamicPartitionProperty.NOT_SET_REPLICATION_NUM) { - partitionProperties.put("replication_num", String.valueOf(olapTable.getDefaultReplicationNum())); + HashMap partitionProperties = new HashMap<>(1); + if (dynamicPartitionProperty.getReplicaAllocation().isNotSet()) { + partitionProperties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, + olapTable.getDefaultReplicaAllocation().toCreateStmt()); } else { - partitionProperties.put("replication_num", String.valueOf(dynamicPartitionProperty.getReplicationNum())); + partitionProperties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, + dynamicPartitionProperty.getReplicaAllocation().toCreateStmt()); } if (hotPartitionNum > 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java index 77238c43b50fa1..fd9b4bc318bab3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletChecker.java @@ -359,7 +359,7 @@ private LoopControlStatus handlePartitionTablet(Database db, OlapTable tbl, Part db.getClusterName(), partition.getVisibleVersion(), partition.getVisibleVersionHash(), - tbl.getPartitionInfo().getReplicationNum(partition.getId()), + tbl.getPartitionInfo().getReplicaAllocation(partition.getId()), aliveBeIdsInCluster); if (statusWithPrio.first == TabletStatus.HEALTHY) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index fd3668938cc3b2..eb8944b51e2ea7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.Tablet.TabletStatus; @@ -808,11 +809,11 @@ public void finishCloneTask(CloneTask cloneTask, TFinishTaskRequest request) if (tablet == null) { throw new SchedException(Status.UNRECOVERABLE, "tablet does not exist"); } - + List aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true); - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partitionId); + ReplicaAllocation replicaAlloc = olapTable.getPartitionInfo().getReplicaAllocation(partitionId); Pair pair = tablet.getHealthStatusWithPriority( - infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicationNum, + infoService, db.getClusterName(), visibleVersion, visibleVersionHash, replicaAlloc, aliveBeIdsInCluster); if (pair.first == TabletStatus.HEALTHY) { throw new SchedException(Status.FINISHED, "tablet is healthy"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 9c4b2b4c5b822d..2414e683412c73 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -496,7 +496,7 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) Set backendsSet = colocateTableIndex.getTabletBackendsByGroup(groupId, tabletOrderIdx); TabletStatus st = tablet.getColocateHealthStatus( partition.getVisibleVersion(), - tbl.getPartitionInfo().getReplicationNum(partition.getId()), + tbl.getPartitionInfo().getReplicaAllocation(partition.getId()), backendsSet); statusPair = Pair.create(st, Priority.HIGH); tabletCtx.setColocateGroupBackendIds(backendsSet); @@ -506,7 +506,7 @@ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) infoService, tabletCtx.getCluster(), partition.getVisibleVersion(), partition.getVisibleVersionHash(), - tbl.getPartitionInfo().getReplicationNum(partition.getId()), + tbl.getPartitionInfo().getReplicaAllocation(partition.getId()), aliveBeIdsInCluster); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java index 71447c6db7ca92..e9557a88db707f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java @@ -189,38 +189,38 @@ public enum ErrorCode { "Cluster '%s' has backends in decommission"), ERR_WRONG_CLUSTER_NAME(5062, new byte[] { '4', '2', '0', '0', '0' }, "Incorrect cluster name '%s'(name 'default_cluster' is a reserved name)"), - ERR_WRONG_NAME_FORMAT(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_WRONG_NAME_FORMAT(5063, new byte[]{'4', '2', '0', '0', '0'}, "Incorrect %s name '%s'"), - ERR_COMMON_ERROR(5064, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COMMON_ERROR(5064, new byte[]{'4', '2', '0', '0', '0'}, "%s"), - ERR_COLOCATE_FEATURE_DISABLED(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_FEATURE_DISABLED(5063, new byte[]{'4', '2', '0', '0', '0'}, "Colocate feature is disabled by Admin"), - ERR_COLOCATE_TABLE_NOT_EXIST(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_TABLE_NOT_EXIST(5063, new byte[]{'4', '2', '0', '0', '0'}, "Colocate table '%s' does not exist"), - ERR_COLOCATE_TABLE_MUST_BE_OLAP_TABLE(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_TABLE_MUST_BE_OLAP_TABLE(5063, new byte[]{'4', '2', '0', '0', '0'}, "Colocate table '%s' must be OLAP table"), - ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_NUM(5063, new byte[] { '4', '2', '0', '0', '0' }, - "Colocate tables must have same replication num: %s"), - ERR_COLOCATE_TABLE_MUST_HAS_SAME_BUCKET_NUM(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_TABLE_MUST_HAS_SAME_REPLICATION_ALLOCATION(5063, new byte[]{'4', '2', '0', '0', '0'}, + "Colocate tables must have same replication allocation: %s"), + ERR_COLOCATE_TABLE_MUST_HAS_SAME_BUCKET_NUM(5063, new byte[]{'4', '2', '0', '0', '0'}, "Colocate tables must have same bucket num: %s"), - ERR_COLOCATE_TABLE_MUST_HAS_SAME_DISTRIBUTION_COLUMN_SIZE(5063, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_TABLE_MUST_HAS_SAME_DISTRIBUTION_COLUMN_SIZE(5063, new byte[]{'4', '2', '0', '0', '0'}, "Colocate tables distribution columns size must be same : %s"), - ERR_COLOCATE_TABLE_MUST_HAS_SAME_DISTRIBUTION_COLUMN_TYPE(5063, new byte[] { '4', '2', '0', '0', '0' }, - "Colocate tables distribution columns must have the same data type: %s should be %s"), - ERR_COLOCATE_NOT_COLOCATE_TABLE(5064, new byte[] { '4', '2', '0', '0', '0' }, + ERR_COLOCATE_TABLE_MUST_HAS_SAME_DISTRIBUTION_COLUMN_TYPE(5063, new byte[]{'4', '2', '0', '0', '0'}, + "Colocate tables distribution columns must have the same data type: %s should be %s"), + ERR_COLOCATE_NOT_COLOCATE_TABLE(5064, new byte[]{'4', '2', '0', '0', '0'}, "Table %s is not a colocated table"), - ERR_INVALID_OPERATION(5065, new byte[] { '4', '2', '0', '0', '0' }, "Operation %s is invalid"), - ERROR_DYNAMIC_PARTITION_TIME_UNIT(5065, new byte[] {'4', '2', '0', '0', '0'}, + ERR_INVALID_OPERATION(5065, new byte[]{'4', '2', '0', '0', '0'}, "Operation %s is invalid"), + ERROR_DYNAMIC_PARTITION_TIME_UNIT(5065, new byte[]{'4', '2', '0', '0', '0'}, "Unsupported time unit %s. Expect HOUR/DAY/WEEK/MONTH."), - ERROR_DYNAMIC_PARTITION_START_ZERO(5066, new byte[] {'4', '2', '0', '0', '0'}, + ERROR_DYNAMIC_PARTITION_START_ZERO(5066, new byte[]{'4', '2', '0', '0', '0'}, "Dynamic partition start must less than 0"), - ERROR_DYNAMIC_PARTITION_START_FORMAT(5066, new byte[] {'4', '2', '0', '0', '0'}, + ERROR_DYNAMIC_PARTITION_START_FORMAT(5066, new byte[]{'4', '2', '0', '0', '0'}, "Invalid dynamic partition start %s"), - ERROR_DYNAMIC_PARTITION_END_ZERO(5066, new byte[] {'4', '2', '0', '0', '0'}, + ERROR_DYNAMIC_PARTITION_END_ZERO(5066, new byte[]{'4', '2', '0', '0', '0'}, "Dynamic partition end must greater than 0"), - ERROR_DYNAMIC_PARTITION_END_FORMAT(5066, new byte[] {'4', '2', '0', '0', '0'}, + ERROR_DYNAMIC_PARTITION_END_FORMAT(5066, new byte[]{'4', '2', '0', '0', '0'}, "Invalid dynamic partition end %s"), - ERROR_DYNAMIC_PARTITION_END_EMPTY(5066, new byte[] {'4', '2', '0', '0', '0'}, + ERROR_DYNAMIC_PARTITION_END_EMPTY(5066, new byte[]{'4', '2', '0', '0', '0'}, "Dynamic partition end is empty"), ERROR_DYNAMIC_PARTITION_BUCKETS_ZERO(5067, new byte[] {'4', '2', '0', '0', '0'}, "Dynamic partition buckets must greater than 0"), diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeMetaVersion.java b/fe/fe-core/src/main/java/org/apache/doris/common/FeMetaVersion.java index b26f372cfb8a59..75cce11ebacc30 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/FeMetaVersion.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeMetaVersion.java @@ -210,6 +210,8 @@ public final class FeMetaVersion { public static final int VERSION_98 = 98; // add audit steam load and change the serialization backend method to json public static final int VERSION_99 = 99; + // change replica to replica allocation + public static final int VERSION_100 = 100; // note: when increment meta version, should assign the latest version to VERSION_CURRENT - public static final int VERSION_CURRENT = VERSION_99; + public static final int VERSION_CURRENT = VERSION_100; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendsProcDir.java index 4dff354546d5e0..c97ae13fdae0bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendsProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/BackendsProcDir.java @@ -17,7 +17,6 @@ package org.apache.doris.common.proc; -import com.google.gson.Gson; import org.apache.doris.alter.DecommissionBackendJob.DecommissionType; import org.apache.doris.catalog.Catalog; import org.apache.doris.cluster.Cluster; @@ -35,6 +34,7 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.gson.Gson; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -164,6 +164,8 @@ public static List> getClusterBackendInfos(String clusterName) { } backendInfo.add(String.format("%.2f", used) + " %"); backendInfo.add(String.format("%.2f", backend.getMaxDiskUsedPct() * 100) + " %"); + // tag + backendInfo.add(backend.getTag().toString()); backendInfo.add(backend.getHeartbeatErrMsg()); backendInfo.add(backend.getVersion()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupBackendSeqsProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupBackendSeqsProcNode.java index 3e3d5e42a2bcf2..2a164cba6de2a6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupBackendSeqsProcNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupBackendSeqsProcNode.java @@ -18,36 +18,46 @@ package org.apache.doris.common.proc; import org.apache.doris.common.AnalysisException; +import org.apache.doris.resource.Tag; import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import java.util.List; +import java.util.Map; /* * show proc "/colocation_group/group_name"; */ public class ColocationGroupBackendSeqsProcNode implements ProcNodeInterface { - public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() - .add("BucketIndex").add("BackendIds").build(); + private Map>> backendsSeq; - private List> backendsSeq; - - public ColocationGroupBackendSeqsProcNode(List> backendsSeq) { + public ColocationGroupBackendSeqsProcNode(Map>> backendsSeq) { this.backendsSeq = backendsSeq; } @Override public ProcResult fetchResult() throws AnalysisException { BaseProcResult result = new BaseProcResult(); - result.setNames(TITLE_NAMES); - - int index = 0; - for (List seqs : backendsSeq) { + List titleNames = Lists.newArrayList(); + titleNames.add("BucketIndex"); + int bucketNum = 0; + for (Tag tag : backendsSeq.keySet()) { + titleNames.add(tag.toString()); + if (bucketNum == 0) { + bucketNum = backendsSeq.get(tag).size(); + } else if (bucketNum != backendsSeq.get(tag).size()) { + throw new AnalysisException("Invalid bucket number: " + bucketNum + " vs. " + backendsSeq.get(tag).size()); + } + } + result.setNames(titleNames); + for (int i = 0; i < bucketNum; i++) { List info = Lists.newArrayList(); - info.add(String.valueOf(index++)); - info.add(Joiner.on(", ").join(seqs)); + info.add(String.valueOf(i)); // bucket index + for (Tag tag : backendsSeq.keySet()) { + List> bucketBackends = backendsSeq.get(tag); + info.add(Joiner.on(", ").join(bucketBackends.get(i))); + } result.addRow(info); } return result; diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupProcDir.java index 58b54bda0d06f7..67b9c5bc5b219d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ColocationGroupProcDir.java @@ -21,10 +21,12 @@ import org.apache.doris.catalog.ColocateTableIndex; import org.apache.doris.catalog.ColocateTableIndex.GroupId; import org.apache.doris.common.AnalysisException; +import org.apache.doris.resource.Tag; import com.google.common.collect.ImmutableList; import java.util.List; +import java.util.Map; /* * show proc "/colocation_group"; @@ -32,7 +34,7 @@ public class ColocationGroupProcDir implements ProcDirInterface { public static final ImmutableList TITLE_NAMES = new ImmutableList.Builder() .add("GroupId").add("GroupName").add("TableIds") - .add("BucketsNum").add("ReplicationNum").add("DistCols").add("IsStable").build(); + .add("BucketsNum").add("ReplicaAllocation").add("DistCols").add("IsStable").build(); @Override public boolean register(String name, ProcNodeInterface node) { @@ -57,7 +59,7 @@ public ProcNodeInterface lookup(String groupIdStr) throws AnalysisException { GroupId groupId = new GroupId(dbId, grpId); ColocateTableIndex index = Catalog.getCurrentColocateIndex(); - List> beSeqs = index.getBackendsPerBucketSeq(groupId); + Map>> beSeqs = index.getBackendsPerBucketSeq(groupId); return new ColocationGroupBackendSeqsProcNode(beSeqs); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java index f56d7c18244ee3..8eee3269984bd6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/PartitionsProcDir.java @@ -267,8 +267,7 @@ private List> getPartitionInfos() { partitionInfo.add(distributionInfo.getBucketNum()); - short replicationNum = tblPartitionInfo.getReplicationNum(partitionId); - partitionInfo.add(String.valueOf(replicationNum)); + partitionInfo.add(tblPartitionInfo.getReplicaAllocation(partitionId).toCreateStmt()); DataProperty dataProperty = tblPartitionInfo.getDataProperty(partitionId); partitionInfo.add(dataProperty.getStorageMedium().name()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java index 596267cb4228ea..c57f9217c0c8ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/StatisticProcDir.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Table.TableType; import org.apache.doris.catalog.Tablet; @@ -130,7 +131,7 @@ public ProcResult fetchResult() throws AnalysisException { table.readLock(); try { for (Partition partition : olapTable.getAllPartitions()) { - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()); ++dbPartitionNum; for (MaterializedIndex materializedIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { ++dbIndexNum; @@ -141,7 +142,7 @@ public ProcResult fetchResult() throws AnalysisException { Pair res = tablet.getHealthStatusWithPriority( infoService, db.getClusterName(), partition.getVisibleVersion(), partition.getVisibleVersionHash(), - replicationNum, aliveBeIdsInCluster); + replicaAlloc, aliveBeIdsInCluster); // here we treat REDUNDANT as HEALTHY, for user friendly. if (res.first != TabletStatus.HEALTHY && res.first != TabletStatus.REDUNDANT diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index 4454087514da23..a5f24f00ac7628 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.RangePartitionInfo; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.TableProperty; import org.apache.doris.common.AnalysisException; @@ -36,6 +37,7 @@ import org.apache.doris.common.ErrorReport; import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeNameFormat; +import org.apache.doris.common.UserException; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -292,7 +294,8 @@ public static void registerOrRemoveDynamicPartitionTable(long dbId, OlapTable ol } } - public static Map analyzeDynamicPartition(Map properties, PartitionInfo partitionInfo) throws DdlException { + public static Map analyzeDynamicPartition(Map properties, PartitionInfo partitionInfo) + throws UserException { // properties should not be empty, check properties before call this function Map analyzedProperties = new HashMap<>(); if (properties.containsKey(DynamicPartitionProperty.TIME_UNIT)) { @@ -377,11 +380,19 @@ public static Map analyzeDynamicPartition(Map pr properties.remove(DynamicPartitionProperty.TIME_ZONE); analyzedProperties.put(DynamicPartitionProperty.TIME_ZONE, val); } + if (properties.containsKey(DynamicPartitionProperty.REPLICATION_NUM)) { String val = properties.get(DynamicPartitionProperty.REPLICATION_NUM); checkReplicationNum(val); properties.remove(DynamicPartitionProperty.REPLICATION_NUM); - analyzedProperties.put(DynamicPartitionProperty.REPLICATION_NUM, val); + analyzedProperties.put(DynamicPartitionProperty.REPLICATION_ALLOCATION, + new ReplicaAllocation(Short.valueOf(val)).toCreateStmt()); + } + + if (properties.containsKey(DynamicPartitionProperty.REPLICATION_ALLOCATION)) { + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, "dynamic_partition"); + properties.remove(DynamicPartitionProperty.REPLICATION_ALLOCATION); + analyzedProperties.put(DynamicPartitionProperty.REPLICATION_ALLOCATION, replicaAlloc.toCreateStmt()); } if (properties.containsKey(DynamicPartitionProperty.HOT_PARTITION_NUM)) { @@ -422,7 +433,7 @@ public static boolean isDynamicPartitionTable(Table table) { * properties should be checked before call this method */ public static void checkAndSetDynamicPartitionProperty(OlapTable olapTable, Map properties) - throws DdlException { + throws UserException { if (DynamicPartitionUtil.checkInputDynamicPartitionProperties(properties, olapTable.getPartitionInfo())) { Map dynamicPartitionProperties = DynamicPartitionUtil.analyzeDynamicPartition(properties, olapTable.getPartitionInfo()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java index 5812eb75921317..4673b16e012e1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PropertyAnalyzer.java @@ -23,11 +23,13 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.Pair; +import org.apache.doris.resource.Tag; import org.apache.doris.thrift.TStorageFormat; import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; @@ -50,6 +52,7 @@ public class PropertyAnalyzer { public static final String PROPERTIES_SHORT_KEY = "short_key"; public static final String PROPERTIES_REPLICATION_NUM = "replication_num"; + public static final String PROPERTIES_REPLICATION_ALLOCATION = "replication_allocation"; public static final String PROPERTIES_STORAGE_TYPE = "storage_type"; public static final String PROPERTIES_STORAGE_MEDIUM = "storage_medium"; public static final String PROPERTIES_STORAGE_COLDOWN_TIME = "storage_cooldown_time"; @@ -93,6 +96,8 @@ public class PropertyAnalyzer { public static final String PROPERTIES_SWAP_TABLE = "swap"; + public static final String TAG_LOCATION = "tag.location"; + public static DataProperty analyzeDataProperty(Map properties, DataProperty oldDataProperty) throws AnalysisException { if (properties == null) { @@ -173,13 +178,14 @@ public static short analyzeShortKeyColumnCount(Map properties) t return shortKeyColumnCount; } - - public static Short analyzeReplicationNum(Map properties, short oldReplicationNum) + + private static Short analyzeReplicationNum(Map properties, String prefix, short oldReplicationNum) throws AnalysisException { Short replicationNum = oldReplicationNum; - if (properties != null && properties.containsKey(PROPERTIES_REPLICATION_NUM)) { + String propKey = Strings.isNullOrEmpty(prefix) ? PROPERTIES_REPLICATION_NUM : prefix + "." + PROPERTIES_REPLICATION_NUM; + if (properties != null && properties.containsKey(propKey)) { try { - replicationNum = Short.valueOf(properties.get(PROPERTIES_REPLICATION_NUM)); + replicationNum = Short.valueOf(properties.get(propKey)); } catch (Exception e) { throw new AnalysisException(e.getMessage()); } @@ -188,21 +194,7 @@ public static Short analyzeReplicationNum(Map properties, short throw new AnalysisException("Replication num should larger than 0. (suggested 3)"); } - properties.remove(PROPERTIES_REPLICATION_NUM); - } - return replicationNum; - } - - public static Short analyzeReplicationNum(Map properties, boolean isDefault) throws AnalysisException { - String key = "default."; - if (isDefault) { - key += PropertyAnalyzer.PROPERTIES_REPLICATION_NUM; - } else { - key = PropertyAnalyzer.PROPERTIES_REPLICATION_NUM; - } - short replicationNum = Short.valueOf(properties.get(key)); - if (replicationNum <= 0) { - throw new AnalysisException("Replication num should larger than 0. (suggested 3)"); + properties.remove(propKey); } return replicationNum; } @@ -456,9 +448,71 @@ public static Type analyzeSequenceType(Map properties, KeysType throw new AnalysisException("sequence column only support UNIQUE_KEYS"); } PrimitiveType type = PrimitiveType.valueOf(typeStr.toUpperCase()); - if (!type.isFixedPointType() && !type.isDateType()) { + if (!type.isFixedPointType() && !type.isDateType()) { throw new AnalysisException("sequence type only support integer types and date types"); } return ScalarType.createType(type); } + + public static Tag analyzeBackendTagProperties(Map properties) throws AnalysisException { + if (properties.containsKey(TAG_LOCATION)) { + String tagVal = properties.remove(TAG_LOCATION); + return Tag.create(Tag.TYPE_LOCATION, tagVal); + } + return Tag.DEFAULT_BACKEND_TAG; + } + + // There are 2 kinds of replication property: + // 1. "replication_num" = "3" + // 2. "replication_allocation" = "tag.location.zone1: 2, tag.location.zone2: 1" + // These 2 kinds of property will all be converted to a ReplicaAllocation and return. + // Return ReplicaAllocation.NOT_SET if no replica property is set. + // + // prefix is for property key such as "dynamic_partition.replication_num", which prefix is "dynamic_partition" + public static ReplicaAllocation analyzeReplicaAllocation(Map properties, String prefix) + throws AnalysisException { + if (properties == null || properties.isEmpty()) { + return ReplicaAllocation.NOT_SET; + } + // if give "replication_num" property, return with default backend tag + Short replicaNum = analyzeReplicationNum(properties, prefix, (short) 0); + if (replicaNum > 0) { + return new ReplicaAllocation(replicaNum); + } + + String propKey = Strings.isNullOrEmpty(prefix) ? PROPERTIES_REPLICATION_ALLOCATION + : prefix + "." + PROPERTIES_REPLICATION_ALLOCATION; + // if not set, return default replication allocation + if (!properties.containsKey(propKey)) { + return ReplicaAllocation.NOT_SET; + } + + // analyze user specified replication allocation + // format is as: "tag.location.zone1: 2, tag.location.zone2: 1" + ReplicaAllocation replicaAlloc = new ReplicaAllocation(); + String allocationVal = properties.remove(propKey); + allocationVal = allocationVal.replaceAll(" ", ""); + String[] locations = allocationVal.split(","); + for (String location : locations) { + String[] parts = location.split(":"); + if (parts.length != 2) { + throw new AnalysisException("Invalid replication allocation property: " + location); + } + if (!parts[0].startsWith(TAG_LOCATION)) { + throw new AnalysisException("Invalid replication allocation tag property: " + location); + } + String locationVal = parts[0].substring(TAG_LOCATION.length() + 1); // +1 to skip dot. + if (Strings.isNullOrEmpty(locationVal)) { + throw new AnalysisException("Invalid replication allocation location tag property: " + location); + } + + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, locationVal), Short.valueOf(parts[1])); + } + + if (replicaAlloc.isEmpty()) { + throw new AnalysisException("Not specified replica allocation property"); + } + return replicaAlloc; + } + } diff --git a/fe/fe-core/src/main/java/org/apache/doris/consistency/CheckConsistencyJob.java b/fe/fe-core/src/main/java/org/apache/doris/consistency/CheckConsistencyJob.java index aa183b18cc327c..e0e54c70d65981 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/consistency/CheckConsistencyJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/consistency/CheckConsistencyJob.java @@ -147,8 +147,8 @@ public boolean sendTasks() { } // check partition's replication num. if 1 replication. skip - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); - if (replicationNum == (short) 1) { + short replicaNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); + if (replicaNum == (short) 1) { LOG.debug("partition[{}]'s replication num is 1. skip consistency check", partition.getId()); return false; } @@ -199,7 +199,7 @@ public boolean sendTasks() { ++sentTaskReplicaNum; } - if (sentTaskReplicaNum < replicationNum / 2 + 1) { + if (sentTaskReplicaNum < replicaNum / 2 + 1) { LOG.info("tablet[{}] does not have enough replica to check.", tabletId); } else { if (maxDataSize > 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java b/fe/fe-core/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java index 052847b4e6382e..fe14ab8077382a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/consistency/ConsistencyChecker.java @@ -279,7 +279,7 @@ private List chooseTablets() { new PriorityQueue<>(Math.max(table.getAllPartitions().size(), 1), COMPARATOR); for (Partition partition : table.getPartitions()) { // check partition's replication num. if 1 replication. skip - if (table.getPartitionInfo().getReplicationNum(partition.getId()) == (short) 1) { + if (table.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum() == (short) 1) { LOG.debug("partition[{}]'s replication num is 1. ignore", partition.getId()); continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/deploy/DeployManager.java b/fe/fe-core/src/main/java/org/apache/doris/deploy/DeployManager.java index 4f2df31896c9b7..8927484bd316bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/deploy/DeployManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/deploy/DeployManager.java @@ -22,6 +22,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.Pair; +import org.apache.doris.common.UserException; import org.apache.doris.common.util.MasterDaemon; import org.apache.doris.ha.FrontendNodeType; import org.apache.doris.system.Backend; @@ -591,7 +592,7 @@ private boolean inspectNodeChange(List> remoteHosts, default: break; } - } catch (DdlException e) { + } catch (UserException e) { LOG.error("Failed to add {} node: {}:{}", nodeType, remoteIp, remotePort, e); return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java b/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java index 63c54ee9ea2924..777586e3ec18b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/http/meta/ColocateMetaService.java @@ -30,7 +30,6 @@ import org.apache.doris.http.rest.RestBaseResult; import org.apache.doris.http.rest.RestResult; import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.persist.ColocatePersistInfo; import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; @@ -196,9 +195,9 @@ public void executeInMasterWithAdmin(BaseRequest request, BaseResponse response) List clusterBackendIds = Catalog.getCurrentSystemInfo().getClusterBackendIds(clusterName, true); //check the Backend id for (List backendIds : backendsPerBucketSeq) { - if (backendIds.size() != groupSchema.getReplicationNum()) { + if (backendIds.size() != groupSchema.getReplicaAlloc().getTotalReplicaNum()) { throw new DdlException("Invalid backend num per bucket. expected: " - + groupSchema.getReplicationNum() + ", actual: " + backendIds.size()); + + groupSchema.getReplicaAlloc().getTotalReplicaNum() + ", actual: " + backendIds.size()); } for (Long beId : backendIds) { if (!clusterBackendIds.contains(beId)) { @@ -216,10 +215,14 @@ public void executeInMasterWithAdmin(BaseRequest request, BaseResponse response) sendResult(request, response); } - private void updateBackendPerBucketSeq(GroupId groupId, List> backendsPerBucketSeq) { + private void updateBackendPerBucketSeq(GroupId groupId, List> backendsPerBucketSeq) + throws DdlException { + throw new DdlException("Currently not support"); + /* colocateIndex.addBackendsPerBucketSeq(groupId, backendsPerBucketSeq); ColocatePersistInfo info2 = ColocatePersistInfo.createForBackendsPerBucketSeq(groupId, backendsPerBucketSeq); Catalog.getCurrentCatalog().getEditLog().logColocateBackendsPerBucketSeq(info2); + */ } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/http/rest/LoadAction.java b/fe/fe-core/src/main/java/org/apache/doris/http/rest/LoadAction.java index 33a3910df63546..99810070010d74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/http/rest/LoadAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/http/rest/LoadAction.java @@ -116,7 +116,8 @@ public void executeWithoutPassword(BaseRequest request, BaseResponse response) t redirectAddr = execEnv.getMultiLoadMgr().redirectAddr(fullDbName, label); } else { // Choose a backend sequentially. - List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(1, true, false, clusterName); + List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMediumAndTag( + 1, true, false, clusterName, null, null); if (backendIds == null) { throw new DdlException("No backend alive."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java index c467215dc39259..b9c75ffcbe202b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/meta/ColocateMetaService.java @@ -25,9 +25,12 @@ import org.apache.doris.httpv2.entity.ResponseEntityBuilder; import org.apache.doris.httpv2.rest.RestBaseController; import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.persist.ColocatePersistInfo; import org.apache.doris.qe.ConnectContext; +import com.google.common.base.Preconditions; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.web.bind.annotation.RequestBody; @@ -36,14 +39,11 @@ import org.springframework.web.bind.annotation.RestController; import org.springframework.web.servlet.view.RedirectView; -import com.google.common.base.Preconditions; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; +import java.lang.reflect.Type; +import java.util.List; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import java.lang.reflect.Type; -import java.util.List; /* * the colocate meta define in {@link ColocateTableIndex} @@ -138,9 +138,9 @@ public Object bucketseq(HttpServletRequest request, HttpServletResponse response List clusterBackendIds = Catalog.getCurrentSystemInfo().getClusterBackendIds(clusterName, true); //check the Backend id for (List backendIds : backendsPerBucketSeq) { - if (backendIds.size() != groupSchema.getReplicationNum()) { + if (backendIds.size() != groupSchema.getReplicaAlloc().getTotalReplicaNum()) { return ResponseEntityBuilder.okWithCommonError("Invalid backend num per bucket. expected: " - + groupSchema.getReplicationNum() + ", actual: " + backendIds.size()); + + groupSchema.getReplicaAlloc().getTotalReplicaNum() + ", actual: " + backendIds.size()); } for (Long beId : backendIds) { if (!clusterBackendIds.contains(beId)) { @@ -158,11 +158,13 @@ public Object bucketseq(HttpServletRequest request, HttpServletResponse response return ResponseEntityBuilder.ok(); } - private void updateBackendPerBucketSeq(GroupId groupId, List> backendsPerBucketSeq) { + private void updateBackendPerBucketSeq(GroupId groupId, List> backendsPerBucketSeq) + throws DdlException { + throw new DdlException("Currently not support"); + /* colocateIndex.addBackendsPerBucketSeq(groupId, backendsPerBucketSeq); ColocatePersistInfo info2 = ColocatePersistInfo.createForBackendsPerBucketSeq(groupId, backendsPerBucketSeq); Catalog.getCurrentCatalog().getEditLog().logColocateBackendsPerBucketSeq(info2); + */ } - - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java index 6a618bd5fd86c0..194e244e6e6512 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java @@ -17,8 +17,6 @@ package org.apache.doris.httpv2.rest; -import io.netty.handler.codec.http.HttpHeaderNames; - import org.apache.doris.catalog.Catalog; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.DdlException; @@ -44,6 +42,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import io.netty.handler.codec.http.HttpHeaderNames; + @RestController public class LoadAction extends RestBaseController { @@ -128,7 +128,8 @@ private Object executeWithoutPassword(HttpServletRequest request, } } else { // Choose a backend sequentially. - List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(1, true, false, clusterName); + List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMediumAndTag( + 1, true, false, clusterName, null, null); if (backendIds == null) { return new RestBaseResult("No backend alive."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/util/LoadSubmitter.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/util/LoadSubmitter.java index 1cac7e3d02272a..62338d09609002 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/util/LoadSubmitter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/util/LoadSubmitter.java @@ -25,13 +25,13 @@ import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import com.google.common.base.Strings; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; @@ -137,8 +137,8 @@ private File checkAndGetFile(TmpFileMgr.TmpFile tmpFile) { } private Backend selectOneBackend() throws DdlException { - List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds( - 1, true, false, SystemInfoService.DEFAULT_CLUSTER); + List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMediumAndTag( + 1, true, false, SystemInfoService.DEFAULT_CLUSTER, null, null); if (backendIds == null) { throw new DdlException("No alive backend"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java index 9f65a66686928a..3e9994baf9e489 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -195,8 +195,7 @@ public void readFields(DataInput in) throws IOException { break; } case OperationType.OP_MODIFY_PARTITION: { - data = new ModifyPartitionInfo(); - ((ModifyPartitionInfo) data).readFields(in); + data = ModifyPartitionInfo.read(in); isRead = true; break; } @@ -457,8 +456,7 @@ public void readFields(DataInput in) throws IOException { case OperationType.OP_COLOCATE_BACKENDS_PER_BUCKETSEQ: case OperationType.OP_COLOCATE_MARK_UNSTABLE: case OperationType.OP_COLOCATE_MARK_STABLE: { - data = new ColocatePersistInfo(); - ((ColocatePersistInfo) data).readFields(in); + data = ColocatePersistInfo.read(in); isRead = true; break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java index 1c80d720799bc1..aeb810575ed441 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/DeleteHandler.java @@ -75,15 +75,15 @@ import org.apache.doris.transaction.TransactionState.TxnSourceType; import org.apache.doris.transaction.TransactionStatus; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -174,7 +174,8 @@ public void process(DeleteStmt stmt) throws DdlException, QueryStateException { throw new DdlException("Partition does not exist. name: " + partName); } partitions.add(partition); - partitionReplicaNum.put(partition.getId(), ((OlapTable) table).getPartitionInfo().getReplicationNum(partition.getId())); + partitionReplicaNum.put(partition.getId(), + ((OlapTable) table).getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum()); } List deleteConditions = Lists.newArrayList(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/LoadChecker.java b/fe/fe-core/src/main/java/org/apache/doris/load/LoadChecker.java index 0b8d847b97e12e..b4b44d342eed2c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/LoadChecker.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/LoadChecker.java @@ -51,12 +51,12 @@ import org.apache.doris.transaction.TransactionState; import org.apache.doris.transaction.TransactionStatus; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -396,7 +396,7 @@ private Set submitPushTasks(LoadJob job, Database db) { return null; } - short replicationNum = table.getPartitionInfo().getReplicationNum(partition.getId()); + short replicationNum = table.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); // check all indices (base + roll up (not include ROLLUP state index)) List indices = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex index : indices) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java index a9ab2d35ea6de5..e6666a78a04002 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/SparkLoadJob.java @@ -88,9 +88,6 @@ import org.apache.doris.transaction.TransactionState.TxnCoordinator; import org.apache.doris.transaction.TransactionState.TxnSourceType; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Lists; @@ -98,6 +95,9 @@ import com.google.common.collect.Sets; import com.google.gson.annotations.SerializedName; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.io.DataInput; import java.io.DataOutput; import java.io.File; @@ -445,7 +445,7 @@ private Set submitPushTasks() throws UserException { } hasLoadPartitions = true; - int quorumReplicaNum = olapTable.getPartitionInfo().getReplicationNum(partitionId) / 2 + 1; + int quorumReplicaNum = olapTable.getPartitionInfo().getReplicaAllocation(partitionId).getTotalReplicaNum() / 2 + 1; List indexes = partition.getMaterializedIndices(IndexExtState.ALL); for (MaterializedIndex index : indexes) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 31a2dc4ab8c164..187951bfe65492 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.Tablet.TabletStatus; @@ -535,7 +536,7 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta continue; } - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + short replicationNum = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); long indexId = tabletMeta.getIndexId(); MaterializedIndex index = partition.getIndex(indexId); @@ -976,7 +977,7 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon if (partition == null) { throw new MetaNotFoundException("partition[" + partitionId + "] does not exist"); } - short replicationNum = olapTable.getPartitionInfo().getReplicationNum(partition.getId()); + ReplicaAllocation replicaAlloc = olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()); MaterializedIndex materializedIndex = partition.getIndex(indexId); if (materializedIndex == null) { @@ -1012,7 +1013,7 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon List aliveBeIdsInCluster = infoService.getClusterBackendIds(db.getClusterName(), true); Pair status = tablet.getHealthStatusWithPriority(infoService, db.getClusterName(), visibleVersion, visibleVersionHash, - replicationNum, aliveBeIdsInCluster); + replicaAlloc, aliveBeIdsInCluster); if (status.first == TabletStatus.VERSION_INCOMPLETE || status.first == TabletStatus.REPLICA_MISSING || status.first == TabletStatus.UNRECOVERABLE) { @@ -1065,7 +1066,7 @@ private static void addReplica(long tabletId, TTabletInfo backendTabletInfo, lon } } throw new MetaNotFoundException( - "replica is enough[" + tablet.getReplicas().size() + "-" + replicationNum + "]"); + "replica is enough[" + tablet.getReplicas().size() + "-" + replicaAlloc.toCreateStmt() + "]"); } } finally { olapTable.writeUnlock(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/ColocatePersistInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/ColocatePersistInfo.java index b25b3fb0d6eed3..cc555132355ffa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/ColocatePersistInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/ColocatePersistInfo.java @@ -20,50 +20,59 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.ColocateTableIndex.GroupId; import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.resource.Tag; -import com.google.common.collect.Lists; +import com.clearspring.analytics.util.Lists; +import com.google.common.collect.Maps; +import com.google.gson.annotations.SerializedName; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; /** * PersistInfo for ColocateTableIndex */ public class ColocatePersistInfo implements Writable { + @SerializedName(value = "groupId") private GroupId groupId; + @SerializedName(value = "tableId") private long tableId; - private List> backendsPerBucketSeq = Lists.newArrayList(); + @SerializedName(value = "backendsPerBucketSeq") + private Map>> backendsPerBucketSeq = Maps.newHashMap(); public ColocatePersistInfo() { } - public static ColocatePersistInfo createForAddTable(GroupId groupId, long tableId, List> backendsPerBucketSeq) { + public static ColocatePersistInfo createForAddTable(GroupId groupId, long tableId, Map>> backendsPerBucketSeq) { return new ColocatePersistInfo(groupId, tableId, backendsPerBucketSeq); } public static ColocatePersistInfo createForBackendsPerBucketSeq(GroupId groupId, - List> backendsPerBucketSeq) { + Map>> backendsPerBucketSeq) { return new ColocatePersistInfo(groupId, -1L, backendsPerBucketSeq); } public static ColocatePersistInfo createForMarkUnstable(GroupId groupId) { - return new ColocatePersistInfo(groupId, -1L, new ArrayList<>()); + return new ColocatePersistInfo(groupId, -1L, Maps.newHashMap()); } public static ColocatePersistInfo createForMarkStable(GroupId groupId) { - return new ColocatePersistInfo(groupId, -1L, new ArrayList<>()); + return new ColocatePersistInfo(groupId, -1L, Maps.newHashMap()); } public static ColocatePersistInfo createForRemoveTable(long tableId) { - return new ColocatePersistInfo(new GroupId(-1, -1), tableId, new ArrayList<>()); + return new ColocatePersistInfo(new GroupId(-1, -1), tableId, Maps.newHashMap()); } - private ColocatePersistInfo(GroupId groupId, long tableId, List> backendsPerBucketSeq) { + private ColocatePersistInfo(GroupId groupId, long tableId, Map>> backendsPerBucketSeq) { this.groupId = groupId; this.tableId = tableId; this.backendsPerBucketSeq = backendsPerBucketSeq; @@ -77,27 +86,28 @@ public GroupId getGroupId() { return groupId; } - public List> getBackendsPerBucketSeq() { + public Map>> getBackendsPerBucketSeq() { return backendsPerBucketSeq; } + public static ColocatePersistInfo read(DataInput in) throws IOException { + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + ColocatePersistInfo info = new ColocatePersistInfo(); + info.readFields(in); + return info; + } else { + String json = Text.readString(in); + return GsonUtils.GSON.fromJson(json, ColocatePersistInfo.class); + } + } + @Override public void write(DataOutput out) throws IOException { - out.writeLong(tableId); - groupId.write(out); - // out.writeLong(groupId); - // out.writeLong(dbId); - int size = backendsPerBucketSeq.size(); - out.writeInt(size); - for (List beList : backendsPerBucketSeq) { - out.writeInt(beList.size()); - for (Long be : beList) { - out.writeLong(be); - } - } + Text.writeString(out, GsonUtils.GSON.toJson(this)); } - public void readFields(DataInput in) throws IOException { + @Deprecated + private void readFields(DataInput in) throws IOException { tableId = in.readLong(); if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_55) { long grpId = in.readLong(); @@ -108,14 +118,16 @@ public void readFields(DataInput in) throws IOException { } int size = in.readInt(); - backendsPerBucketSeq = new ArrayList<>(); + backendsPerBucketSeq = Maps.newHashMap(); + List> backendsPerBucketSeqList = Lists.newArrayList(); + backendsPerBucketSeq.put(Tag.DEFAULT_BACKEND_TAG, backendsPerBucketSeqList); for (int i = 0; i < size; i++) { int beListSize = in.readInt(); List beLists = new ArrayList<>(); for (int j = 0; j < beListSize; j++) { beLists.add(in.readLong()); } - backendsPerBucketSeq.add(beLists); + backendsPerBucketSeqList.add(beLists); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index 91d71f52270232..f81cdb5978e9bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -424,6 +424,11 @@ public static void loadJournal(Catalog catalog, JournalEntity journal) { Catalog.getCurrentSystemInfo().replayDropBackend(be); break; } + case OperationType.OP_MODIFY_BACKEND: { + Backend be = (Backend) journal.getData(); + Catalog.getCurrentSystemInfo().replayModifyBackend(be); + break; + } case OperationType.OP_BACKEND_STATE_CHANGE: { Backend be = (Backend) journal.getData(); Catalog.getCurrentSystemInfo().updateBackendState(be); @@ -1046,6 +1051,10 @@ public void logDropBackend(Backend be) { logEdit(OperationType.OP_DROP_BACKEND, be); } + public void logModifyBackend(Backend be) { + logEdit(OperationType.OP_MODIFY_BACKEND, be); + } + public void logAddFrontend(Frontend fe) { logEdit(OperationType.OP_ADD_FRONTEND, fe); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/ModifyPartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/ModifyPartitionInfo.java index 7996906d55cdb3..9cdc8bb5f52799 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/ModifyPartitionInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/ModifyPartitionInfo.java @@ -19,8 +19,12 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.DataProperty; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; +import org.apache.doris.persist.gson.GsonUtils; + import com.google.gson.annotations.SerializedName; import java.io.DataInput; @@ -37,23 +41,25 @@ public class ModifyPartitionInfo implements Writable { private long partitionId; @SerializedName(value = "dataProperty") private DataProperty dataProperty; - @SerializedName(value = "replicationNum") + @Deprecated private short replicationNum; @SerializedName(value = "isInMemory") private boolean isInMemory; + @SerializedName(value = "replicaAlloc") + private ReplicaAllocation replicaAlloc; public ModifyPartitionInfo() { // for persist } public ModifyPartitionInfo(long dbId, long tableId, long partitionId, - DataProperty dataProperty, short replicationNum, + DataProperty dataProperty, ReplicaAllocation replicaAlloc, boolean isInMemory) { this.dbId = dbId; this.tableId = tableId; this.partitionId = partitionId; this.dataProperty = dataProperty; - this.replicationNum = replicationNum; + this.replicaAlloc = replicaAlloc; this.isInMemory = isInMemory; } @@ -73,8 +79,8 @@ public DataProperty getDataProperty() { return dataProperty; } - public short getReplicationNum() { - return replicationNum; + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public boolean isInMemory() { @@ -82,9 +88,14 @@ public boolean isInMemory() { } public static ModifyPartitionInfo read(DataInput in) throws IOException { - ModifyPartitionInfo info = new ModifyPartitionInfo(); - info.readFields(in); - return info; + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + ModifyPartitionInfo info = new ModifyPartitionInfo(); + info.readFields(in); + return info; + } else { + String json = Text.readString(in); + return GsonUtils.GSON.fromJson(json, ModifyPartitionInfo.class); + } } @Override @@ -97,32 +108,21 @@ public boolean equals(Object other) { } ModifyPartitionInfo otherInfo = (ModifyPartitionInfo) other; return dbId == otherInfo.getDbId() && tableId == otherInfo.getTableId() && - dataProperty.equals(otherInfo.getDataProperty()) && replicationNum == otherInfo.getReplicationNum() + dataProperty.equals(otherInfo.getDataProperty()) && replicaAlloc.equals(otherInfo.replicaAlloc) && isInMemory == otherInfo.isInMemory(); } @Override public void write(DataOutput out) throws IOException { - out.writeLong(dbId); - out.writeLong(tableId); - out.writeLong(partitionId); - - if (dataProperty == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - dataProperty.write(out); - } - - out.writeShort(replicationNum); - out.writeBoolean(isInMemory); + Text.writeString(out, GsonUtils.GSON.toJson(this)); } - public void readFields(DataInput in) throws IOException { + @Deprecated + private void readFields(DataInput in) throws IOException { dbId = in.readLong(); tableId = in.readLong(); partitionId = in.readLong(); - + boolean hasDataProperty = in.readBoolean(); if (hasDataProperty) { dataProperty = DataProperty.read(in); @@ -131,9 +131,13 @@ public void readFields(DataInput in) throws IOException { } replicationNum = in.readShort(); + if (replicationNum > 0) { + replicaAlloc = new ReplicaAllocation(replicationNum); + } else { + replicaAlloc = ReplicaAllocation.NOT_SET; + } if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_72) { isInMemory = in.readBoolean(); } } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index 1685505e1fb001..cfd6f5c57a6732 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -129,6 +129,7 @@ public class OperationType { public static final short OP_UPDATE_CLUSTER_AND_BACKENDS = 88; public static final short OP_CREATE_REPOSITORY = 89; public static final short OP_DROP_REPOSITORY = 90; + public static final short OP_MODIFY_BACKEND = 91; //colocate table public static final short OP_COLOCATE_ADD_TABLE = 94; diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/PartitionPersistInfo.java b/fe/fe-core/src/main/java/org/apache/doris/persist/PartitionPersistInfo.java index d482e793641d97..7b666410371845 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/PartitionPersistInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/PartitionPersistInfo.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.PartitionKey; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.RangeUtils; @@ -41,7 +42,7 @@ public class PartitionPersistInfo implements Writable { private Range range; private PartitionItem listPartitionItem; private DataProperty dataProperty; - private short replicationNum; + private ReplicaAllocation replicaAlloc; private boolean isInMemory = false; private boolean isTempPartition = false; @@ -49,7 +50,7 @@ public PartitionPersistInfo() { } public PartitionPersistInfo(long dbId, long tableId, Partition partition, Range range, - PartitionItem listPartitionItem, DataProperty dataProperty, short replicationNum, + PartitionItem listPartitionItem, DataProperty dataProperty, ReplicaAllocation replicaAlloc, boolean isInMemory, boolean isTempPartition) { this.dbId = dbId; this.tableId = tableId; @@ -59,7 +60,7 @@ public PartitionPersistInfo(long dbId, long tableId, Partition partition, Range< this.listPartitionItem = listPartitionItem; this.dataProperty = dataProperty; - this.replicationNum = replicationNum; + this.replicaAlloc = replicaAlloc; this.isInMemory = isInMemory; this.isTempPartition = isTempPartition; } @@ -87,9 +88,9 @@ public PartitionItem getListPartitionItem() { public DataProperty getDataProperty() { return dataProperty; } - - public short getReplicationNum() { - return replicationNum; + + public ReplicaAllocation getReplicaAlloc() { + return replicaAlloc; } public boolean isInMemory() { @@ -108,7 +109,7 @@ public void write(DataOutput out) throws IOException { RangeUtils.writeRange(out, range); listPartitionItem.write(out); dataProperty.write(out); - out.writeShort(replicationNum); + replicaAlloc.write(out); out.writeBoolean(isInMemory); out.writeBoolean(isTempPartition); } @@ -126,7 +127,12 @@ public void readFields(DataInput in) throws IOException { } dataProperty = DataProperty.read(in); - replicationNum = in.readShort(); + if (Catalog.getCurrentCatalogJournalVersion() < FeMetaVersion.VERSION_100) { + this.replicaAlloc = new ReplicaAllocation(in.readShort()); + } else { + this.replicaAlloc = ReplicaAllocation.read(in); + } + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_72) { isInMemory = in.readBoolean(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 8ca0cca730a2a8..0b06a69b4c1ab5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -17,7 +17,6 @@ package org.apache.doris.planner; -import org.apache.commons.lang.StringUtils; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Catalog; @@ -68,6 +67,8 @@ import com.google.common.collect.Range; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang.StringUtils; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -126,7 +127,7 @@ public void complete() throws UserException { tSink.setTupleId(tupleDescriptor.getId().asInt()); int numReplicas = 1; for (Partition partition : dstTable.getPartitions()) { - numReplicas = dstTable.getPartitionInfo().getReplicationNum(partition.getId()); + numReplicas = dstTable.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum(); break; } tSink.setNumReplicas(numReplicas); @@ -316,7 +317,7 @@ private TOlapTableLocationParam createLocation(OlapTable table) throws UserExcep Multimap allBePathsMap = HashMultimap.create(); for (Long partitionId : partitionIds) { Partition partition = table.getPartition(partitionId); - int quorum = table.getPartitionInfo().getReplicationNum(partition.getId()) / 2 + 1; + int quorum = table.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum() / 2 + 1; for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.ALL)) { // we should ensure the replica backend is alive // otherwise, there will be a 'unknown node id, id=xxx' error for stream load diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/MultiLoadMgr.java b/fe/fe-core/src/main/java/org/apache/doris/qe/MultiLoadMgr.java index bd78e0dd6a3798..bb0f994a9201af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/MultiLoadMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/MultiLoadMgr.java @@ -19,13 +19,13 @@ import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.BrokerDesc; -import org.apache.doris.analysis.Separator; import org.apache.doris.analysis.DataDescription; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ImportWhereStmt; import org.apache.doris.analysis.LabelName; import org.apache.doris.analysis.LoadStmt; import org.apache.doris.analysis.PartitionNames; +import org.apache.doris.analysis.Separator; import org.apache.doris.analysis.SqlParser; import org.apache.doris.analysis.SqlScanner; import org.apache.doris.catalog.Catalog; @@ -91,8 +91,8 @@ public void startMulti(String fullDbName, String label, Map prop throw new LabelAlreadyUsedException(label); } MultiLoadDesc multiLoadDesc = new MultiLoadDesc(multiLabel, properties); - List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIds(1, - true, false, ConnectContext.get().getClusterName()); + List backendIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMediumAndTag(1, + true, false, ConnectContext.get().getClusterName(), null, null); if (backendIds == null) { throw new DdlException("No backend alive."); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index bcc1ddf23ba4ea..816974be710777 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -90,6 +90,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; @@ -1737,8 +1738,10 @@ private void handleShowDynamicPartition() { } DynamicPartitionProperty dynamicPartitionProperty = olapTable.getTableProperty().getDynamicPartitionProperty(); String tableName = olapTable.getName(); - int replicationNum = dynamicPartitionProperty.getReplicationNum(); - replicationNum = (replicationNum == DynamicPartitionProperty.NOT_SET_REPLICATION_NUM) ? olapTable.getDefaultReplicationNum() : replicationNum; + ReplicaAllocation replicaAlloc = dynamicPartitionProperty.getReplicaAllocation(); + if (replicaAlloc.isNotSet()) { + replicaAlloc = olapTable.getDefaultReplicaAllocation(); + } rows.add(Lists.newArrayList( tableName, String.valueOf(dynamicPartitionProperty.getEnable()), @@ -1747,7 +1750,7 @@ private void handleShowDynamicPartition() { String.valueOf(dynamicPartitionProperty.getEnd()), dynamicPartitionProperty.getPrefix(), String.valueOf(dynamicPartitionProperty.getBuckets()), - String.valueOf(replicationNum), + replicaAlloc.toCreateStmt(), dynamicPartitionProperty.getStartOfInfo(), dynamicPartitionScheduler.getRuntimeInfo(olapTable.getId(), DynamicPartitionScheduler.LAST_UPDATE_TIME), dynamicPartitionScheduler.getRuntimeInfo(olapTable.getId(), DynamicPartitionScheduler.LAST_SCHEDULER_TIME), diff --git a/fe/fe-core/src/main/java/org/apache/doris/resource/Tag.java b/fe/fe-core/src/main/java/org/apache/doris/resource/Tag.java index c01eb9dc1bb1f7..d2504429ec5c35 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/resource/Tag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/resource/Tag.java @@ -61,6 +61,7 @@ public class Tag implements Writable { public static final String VALUE_STORE = "store"; public static final String VALUE_COMPUTATION = "computation"; public static final String VALUE_DEFAULT_CLUSTER = "default_cluster"; + public static final String VALUE_DEFAULT_TAG = "default"; public static final ImmutableSet RESERVED_TAG_TYPE = ImmutableSet.of( TYPE_ROLE, TYPE_FUNCTION, TYPE_LOCATION); @@ -69,6 +70,12 @@ public class Tag implements Writable { VALUE_DEFAULT_CLUSTER); private static final String TAG_REGEX = "^[a-z][a-z0-9_]{0,32}$"; + public static final Tag DEFAULT_BACKEND_TAG; + + static { + DEFAULT_BACKEND_TAG = new Tag(TYPE_LOCATION, VALUE_DEFAULT_TAG); + } + @SerializedName(value = "type") public String type; @SerializedName(value = "value") @@ -98,7 +105,7 @@ public boolean equals(Object other) { return false; } Tag otherTag = (Tag) other; - return type.equalsIgnoreCase(otherTag.type) && value.equalsIgnoreCase(otherTag.value); + return type.equals(otherTag.type) && value.equals(otherTag.value); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index 869323cfc1d18b..6c2069baa9a8a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -25,6 +25,7 @@ import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.resource.Tag; import org.apache.doris.system.HeartbeatResponse.HbStatus; import org.apache.doris.thrift.TDisk; import org.apache.doris.thrift.TStorageMedium; @@ -112,6 +113,8 @@ public enum BackendState { // additional backendStatus information for BE, display in JSON format @SerializedName("backendStatus") private BackendStatus backendStatus = new BackendStatus(); + @SerializedName("tag") + private Tag tag = Tag.DEFAULT_BACKEND_TAG; public Backend() { this.host = ""; @@ -128,7 +131,6 @@ public Backend() { this.ownerClusterName = ""; this.backendState = BackendState.free.ordinal(); - this.decommissionType = DecommissionType.SystemDecommission.ordinal(); } @@ -697,5 +699,13 @@ public class BackendStatus { // the last time when the stream load status was reported by backend public long lastStreamLoadTime = -1; } + + public void setTag(Tag tag) { + this.tag = tag; + } + + public Tag getTag() { + return tag; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 2a2ed2b55c4d47..7501f60add04bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -17,9 +17,11 @@ package org.apache.doris.system; +import org.apache.doris.analysis.ModifyBackendClause; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DiskInfo; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.cluster.Cluster; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; @@ -27,7 +29,9 @@ import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.Pair; import org.apache.doris.common.Status; +import org.apache.doris.common.UserException; import org.apache.doris.metric.MetricRepo; +import org.apache.doris.resource.Tag; import org.apache.doris.system.Backend.BackendState; import org.apache.doris.thrift.TStatusCode; import org.apache.doris.thrift.TStorageMedium; @@ -60,6 +64,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; +import java.util.stream.Stream; public class SystemInfoService { private static final Logger LOG = LogManager.getLogger(SystemInfoService.class); @@ -103,18 +108,18 @@ public SystemInfoService() { } // for deploy manager - public void addBackends(List> hostPortPairs, boolean isFree) throws DdlException { - addBackends(hostPortPairs, isFree, ""); + public void addBackends(List> hostPortPairs, boolean isFree) throws UserException { + addBackends(hostPortPairs, isFree, "", Tag.DEFAULT_BACKEND_TAG); } - + /** * @param hostPortPairs : backend's host and port * @param isFree : if true the backend is not owned by any cluster * @param destCluster : if not null or empty backend will be added to destCluster * @throws DdlException */ - public void addBackends(List> hostPortPairs, - boolean isFree, String destCluster) throws DdlException { + public void addBackends(List> hostPortPairs, + boolean isFree, String destCluster, Tag tag) throws UserException { for (Pair pair : hostPortPairs) { // check is already exist if (getBackendWithHeartbeatPort(pair.first, pair.second) != null) { @@ -123,7 +128,7 @@ public void addBackends(List> hostPortPairs, } for (Pair pair : hostPortPairs) { - addBackend(pair.first, pair.second, isFree, destCluster); + addBackend(pair.first, pair.second, isFree, destCluster, tag); } } @@ -144,7 +149,8 @@ private void setBackendOwner(Backend backend, String clusterName) { } // Final entry of adding backend - private void addBackend(String host, int heartbeatPort, boolean isFree, String destCluster) throws DdlException { + private void addBackend(String host, int heartbeatPort, boolean isFree, String destCluster, + Tag tag) throws UserException { Backend newBackend = new Backend(Catalog.getCurrentCatalog().getNextId(), host, heartbeatPort); // update idToBackend Map copiedBackends = Maps.newHashMap(idToBackendRef); @@ -159,7 +165,7 @@ private void addBackend(String host, int heartbeatPort, boolean isFree, String d idToReportVersionRef = newIdToReportVersion; if (!Strings.isNullOrEmpty(destCluster)) { - // add backend to destCluster + // add backend to destCluster setBackendOwner(newBackend, destCluster); } else if (!isFree) { // add backend to DEFAULT_CLUSTER @@ -168,6 +174,9 @@ private void addBackend(String host, int heartbeatPort, boolean isFree, String d // backend is free } + // set tags + newBackend.setTag(tag); + // log Catalog.getCurrentCatalog().getEditLog().logAddBackend(newBackend); LOG.info("finished to add {} ", newBackend); @@ -727,15 +736,41 @@ private Map> getHostBackendsMap(boolean needAlive, boolean return classMap; } - public List seqChooseBackendIdsByStorageMedium(int backendNum, boolean needAlive, boolean isCreate, - String clusterName, TStorageMedium storageMedium) { - final List backends = getClusterBackends(clusterName).stream().filter(v -> !v.diskExceedLimitByStorageMedium(storageMedium)).collect(Collectors.toList()); - return seqChooseBackendIds(backendNum, needAlive, isCreate, clusterName, backends); + + // Find enough backend to allocate replica of a tablet. + // filters include: tag, cluster, storage medium + public Map> chooseBackendIdByFilters(ReplicaAllocation replicaAlloc, String clusterName, TStorageMedium storageMedium) + throws DdlException { + Map> chosenBackendIds = Maps.newHashMap(); + Map allocMap = replicaAlloc.getAllocMap(); + short totalReplicaNum = 0; + for (Map.Entry entry : allocMap.entrySet()) { + List beIds = Catalog.getCurrentSystemInfo().seqChooseBackendIdsByStorageMediumAndTag(entry.getValue(), + true, true, clusterName, storageMedium, entry.getKey()); + if (beIds == null) { + throw new DdlException("Failed to find enough host with storage medium and tag(" + + (storageMedium == null ? "NaN" : storageMedium) + "/" + entry.getKey() + + ") in all backends. need: " + entry.getValue()); + } + chosenBackendIds.put(entry.getKey(), beIds); + totalReplicaNum += beIds.size(); + } + Preconditions.checkState(totalReplicaNum == replicaAlloc.getTotalReplicaNum()); + return chosenBackendIds; } - public List seqChooseBackendIds(int backendNum, boolean needAlive, boolean isCreate, - String clusterName) { - final List backends = getClusterBackends(clusterName).stream().filter(v -> !v.diskExceedLimit()).collect(Collectors.toList()); + public List seqChooseBackendIdsByStorageMediumAndTag(int backendNum, boolean needAlive, boolean isCreate, + String clusterName, TStorageMedium storageMedium, Tag tag) { + Stream beStream = getClusterBackends(clusterName).stream(); + if (storageMedium == null) { + beStream = beStream.filter(v -> !v.diskExceedLimit()); + } else { + beStream = beStream.filter(v -> !v.diskExceedLimitByStorageMedium(storageMedium)); + } + if (tag != null) { + beStream = beStream.filter(v -> v.getTag().equals(tag)); + } + final List backends = beStream.collect(Collectors.toList()); return seqChooseBackendIds(backendNum, needAlive, isCreate, clusterName, backends); } @@ -771,8 +806,12 @@ public synchronized List seqChooseBackendIds(int backendNum, boolean needA } // host -> BE list + List sourceBackend = srcBackends; + if (sourceBackend == null) { + sourceBackend = getClusterBackends(clusterName); + } Map> backendMaps = Maps.newHashMap(); - for (Backend backend : srcBackends) { + for (Backend backend : sourceBackend) { if (backendMaps.containsKey(backend.getHost())) { backendMaps.get(backend.getHost()).add(backend); } else { @@ -782,7 +821,6 @@ public synchronized List seqChooseBackendIds(int backendNum, boolean needA } } - // if more than one backend exists in same host, select a backend at random List backends = Lists.newArrayList(); for (List list : backendMaps.values()) { @@ -1152,5 +1190,44 @@ public void updatePathInfo(List addedDisks, List removedDisk pathHashToDishInfoRef = newPathInfos; LOG.debug("update path infos: {}", newPathInfos); } + + public void modifyBackends(ModifyBackendClause alterClause) throws UserException { + List> hostPortPairs = alterClause.getHostPortPairs(); + List backends = Lists.newArrayList(); + for (Pair pair : hostPortPairs) { + Backend be = getBackendWithHeartbeatPort(pair.first, pair.second); + if (be == null) { + throw new DdlException("backend does not exists[" + pair.first + ":" + pair.second + "]"); + } + backends.add(be); + } + + Tag tag = alterClause.getTag(); + for (Backend be : backends) { + if (!be.getTag().equals(tag)) { + be.setTag(tag); + Catalog.getCurrentCatalog().getEditLog().logModifyBackend(be); + LOG.info("finished to modify backend {} ", be); + } + } + } + + public void replayModifyBackend(Backend backend) { + Backend memBe = getBackend(backend.getId()); + memBe.setTag(backend.getTag()); + LOG.debug("replay modify backend: {}", backend); + } + + // Check if there is enough suitable BE for replica allocation + public void checkReplicaAllocation(String cluster, ReplicaAllocation replicaAlloc) throws DdlException { + List backends = getClusterBackends(cluster); + for (Map.Entry entry : replicaAlloc.getAllocMap().entrySet()) { + if (backends.stream().filter(b -> b.getTag().equals(entry.getKey())).count() + < entry.getValue()) { + throw new DdlException("Failed to find enough host with tag(" + entry.getKey() + + ") in all backends. need: " + entry.getValue()); + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java index 121ff60332708a..81f80bc84d16d1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java @@ -495,7 +495,7 @@ public void commitTransaction(List tableList, long transactionId, List errorReplicaIds) thr transactionState.setErrorMsg(errMsg); return; } - int quorumReplicaNum = partitionInfo.getReplicationNum(partitionId) / 2 + 1; + int quorumReplicaNum = partitionInfo.getReplicaAllocation(partitionId).getTotalReplicaNum() / 2 + 1; List allIndices; if (transactionState.getLoadedTblIndexes().isEmpty()) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterJobV2Test.java b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterJobV2Test.java index 1b28317a368871..add00cc7e5846a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterJobV2Test.java +++ b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterJobV2Test.java @@ -53,7 +53,7 @@ public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 1000; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); Config.enable_alpha_rowset = true; // create connect context diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java index 6aa71484c56701..8cc186699cee47 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java @@ -39,14 +39,14 @@ import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.utframe.UtFrameUtils; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - import java.io.File; import java.util.List; import java.util.Map; @@ -64,7 +64,7 @@ public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 100; Config.dynamic_partition_enable = true; Config.dynamic_partition_check_interval_seconds = 1; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); @@ -270,28 +270,28 @@ public void testConflictAlterOperations() throws Exception { alterTable(stmt, false); // set table's default replication num - Assert.assertEquals(Short.valueOf("1"), tbl.getDefaultReplicationNum()); + Assert.assertEquals((short) 1, tbl.getDefaultReplicaAllocation().getTotalReplicaNum()); stmt = "alter table test.tbl1 set ('default.replication_num' = '3');"; alterTable(stmt, false); - Assert.assertEquals(Short.valueOf("3"), tbl.getDefaultReplicationNum()); + Assert.assertEquals((short) 3, tbl.getDefaultReplicaAllocation().getTotalReplicaNum()); // set range table's real replication num Partition p1 = tbl.getPartition("p1"); - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl.getPartitionInfo().getReplicationNum(p1.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl.getPartitionInfo().getReplicaAllocation(p1.getId()).getTotalReplicaNum())); stmt = "alter table test.tbl1 set ('replication_num' = '3');"; alterTable(stmt, true); - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl.getPartitionInfo().getReplicationNum(p1.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl.getPartitionInfo().getReplicaAllocation(p1.getId()).getTotalReplicaNum())); // set un-partitioned table's real replication num OlapTable tbl2 = (OlapTable) db.getTable("tbl2"); Partition partition = tbl2.getPartition(tbl2.getName()); - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl2.getPartitionInfo().getReplicationNum(partition.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl2.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum())); stmt = "alter table test.tbl2 set ('replication_num' = '3');"; - alterTable(stmt, false); - Assert.assertEquals(Short.valueOf("3"), Short.valueOf(tbl2.getPartitionInfo().getReplicationNum(partition.getId()))); + alterTable(stmt, true); + // Assert.assertEquals(Short.valueOf("3"), Short.valueOf(tbl2.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum())); Thread.sleep(5000); // sleep to wait dynamic partition scheduler run - // add partition without set replication num + // add partition without set replication num, and default num is 3. stmt = "alter table test.tbl1 add partition p4 values less than('2020-04-10')"; alterTable(stmt, true); @@ -311,16 +311,16 @@ public void testBatchUpdatePartitionProperties() throws Exception { Partition p4 = tbl4.getPartition("p4"); // batch update replication_num property - String stmt = "alter table test.tbl4 modify partition (p1, p2, p4) set ('replication_num' = '3')"; + String stmt = "alter table test.tbl4 modify partition (p1, p2, p4) set ('replication_num' = '1')"; List partitionList = Lists.newArrayList(p1, p2, p4); for (Partition partition : partitionList) { - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicationNum(partition.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum())); } alterTable(stmt, false); for (Partition partition : partitionList) { - Assert.assertEquals(Short.valueOf("3"), Short.valueOf(tbl4.getPartitionInfo().getReplicationNum(partition.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum())); } - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicationNum(p3.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicaAllocation(p3.getId()).getTotalReplicaNum())); // batch update in_memory property stmt = "alter table test.tbl4 modify partition (p1, p2, p3) set ('in_memory' = 'true')"; @@ -355,7 +355,7 @@ public void testBatchUpdatePartitionProperties() throws Exception { partitionList = Lists.newArrayList(p1, p2, p3, p4); alterTable(stmt, false); for (Partition partition : partitionList) { - Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicationNum(partition.getId()))); + Assert.assertEquals(Short.valueOf("1"), Short.valueOf(tbl4.getPartitionInfo().getReplicaAllocation(partition.getId()).getTotalReplicaNum())); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java index 898deb33a21d29..3e6833855b5973 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/alter/BatchRollupJobTest.java @@ -49,7 +49,7 @@ public class BatchRollupJobTest { @BeforeClass public static void setup() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); ctx = UtFrameUtils.createDefaultCtx(); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminSetConfigStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminSetConfigStmtTest.java index 83e0ef3a0c6b8e..f6ce7409ac2a70 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminSetConfigStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AdminSetConfigStmtTest.java @@ -40,7 +40,7 @@ public class AdminSetConfigStmtTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java index 753e255f33bf76..3f0028cbff9dd0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AggregateTest.java @@ -42,7 +42,7 @@ public class AggregateTest { @BeforeClass public static void beforeClass() throws Exception{ FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); dorisAssert = new DorisAssert(); dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME + " (empid int, name varchar, " + diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/InsertStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/InsertStmtTest.java index cbd4b67112f3ec..1651552c95206c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/InsertStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/InsertStmtTest.java @@ -56,7 +56,7 @@ public static void tearDown() throws Exception { @BeforeClass public static void setUp() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); String createTblStmtStr = "create table db.tbl(kk1 int, kk2 varchar(32), kk3 int, kk4 int) " + "AGGREGATE KEY(kk1, kk2,kk3,kk4) distributed by hash(kk1) buckets 3 properties('replication_num' = '1');"; dorisAssert = new DorisAssert(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ListPartitionPrunerTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ListPartitionPrunerTest.java index 4c4154ff860769..b93cf41f79b295 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ListPartitionPrunerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ListPartitionPrunerTest.java @@ -46,7 +46,7 @@ public static void tearDown() throws Exception { public static void setUp() throws Exception { Config.enable_batch_delete_by_default = true; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); String createSinglePartColWithSinglePartKey = "create table test.t1\n" + "(k1 int not null, k2 varchar(128), k3 int, v1 int, v2 int)\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 17b107cd413f89..17f75e19541c93 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -58,7 +58,7 @@ public static void tearDown() throws Exception { public static void setUp() throws Exception { Config.enable_batch_delete_by_default = true; Config.enable_http_server_v2 = false; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); String createTblStmtStr = "create table db1.tbl1(k1 varchar(32), k2 varchar(32), k3 varchar(32), k4 int, k5 largeint) " + "AGGREGATE KEY(k1, k2,k3,k4,k5) distributed by hash(k1) buckets 3 properties('replication_num' = '1');"; String createBaseAllStmtStr = "create table db1.baseall(k1 int, k2 varchar(32)) distributed by hash(k1) " diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SetVariableTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SetVariableTest.java index 465234850e53cf..f27c2279d6b3a1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SetVariableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SetVariableTest.java @@ -38,7 +38,7 @@ public class SetVariableTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/StmtRewriterTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/StmtRewriterTest.java index 039a1901ade7b1..6ee7e44121298d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/StmtRewriterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/StmtRewriterTest.java @@ -42,7 +42,7 @@ public class StmtRewriterTest { @BeforeClass public static void beforeClass() throws Exception{ FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); dorisAssert = new DorisAssert(); dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME + " (empid int, name varchar, " + diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/CatalogMocker.java b/fe/fe-core/src/test/java/org/apache/doris/backup/CatalogMocker.java index d184f22ac84ca4..1ee3322c66346c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/CatalogMocker.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/CatalogMocker.java @@ -41,6 +41,7 @@ import org.apache.doris.catalog.RangePartitionItem; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.SinglePartitionInfo; import org.apache.doris.catalog.Tablet; @@ -238,7 +239,7 @@ public static Database mockDb() throws AnalysisException { Partition partition = new Partition(TEST_SINGLE_PARTITION_ID, TEST_SINGLE_PARTITION_NAME, baseIndex, distributionInfo); PartitionInfo partitionInfo = new SinglePartitionInfo(); - partitionInfo.setReplicationNum(TEST_SINGLE_PARTITION_ID, (short) 3); + partitionInfo.setReplicaAllocation(TEST_SINGLE_PARTITION_ID, new ReplicaAllocation((short) 3)); partitionInfo.setIsInMemory(TEST_SINGLE_PARTITION_ID, false); DataProperty dataProperty = new DataProperty(TStorageMedium.HDD); partitionInfo.setDataProperty(TEST_SINGLE_PARTITION_ID, dataProperty); @@ -301,23 +302,23 @@ public static Database mockDb() throws AnalysisException { PartitionKey rangeP2Lower = PartitionKey.createPartitionKey(Lists.newArrayList(new PartitionValue("10")), - Lists.newArrayList(TEST_TBL_BASE_SCHEMA.get(0))); + Lists.newArrayList(TEST_TBL_BASE_SCHEMA.get(0))); PartitionKey rangeP2Upper = PartitionKey.createPartitionKey(Lists.newArrayList(new PartitionValue("20")), - Lists.newArrayList(TEST_TBL_BASE_SCHEMA.get(0))); + Lists.newArrayList(TEST_TBL_BASE_SCHEMA.get(0))); Range rangeP2 = Range.closedOpen(rangeP2Lower, rangeP2Upper); PartitionItem item2 = new RangePartitionItem(rangeP2); rangePartitionInfo.setItem(TEST_PARTITION1_ID, false, item2); - rangePartitionInfo.setReplicationNum(TEST_PARTITION1_ID, (short) 3); - rangePartitionInfo.setReplicationNum(TEST_PARTITION2_ID, (short) 3); + rangePartitionInfo.setReplicaAllocation(TEST_PARTITION1_ID, new ReplicaAllocation((short) 3)); + rangePartitionInfo.setReplicaAllocation(TEST_PARTITION2_ID, new ReplicaAllocation((short) 3)); DataProperty dataPropertyP1 = new DataProperty(TStorageMedium.HDD); DataProperty dataPropertyP2 = new DataProperty(TStorageMedium.HDD); rangePartitionInfo.setDataProperty(TEST_PARTITION1_ID, dataPropertyP1); rangePartitionInfo.setDataProperty(TEST_PARTITION2_ID, dataPropertyP2); OlapTable olapTable2 = new OlapTable(TEST_TBL2_ID, TEST_TBL2_NAME, TEST_TBL_BASE_SCHEMA, - KeysType.AGG_KEYS, rangePartitionInfo, distributionInfo2); + KeysType.AGG_KEYS, rangePartitionInfo, distributionInfo2); Deencapsulation.setField(olapTable2, "baseIndexId", TEST_TBL2_ID); Tablet baseTabletP1 = new Tablet(TEST_BASE_TABLET_P1_ID); diff --git a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java index bf7fdf6c2e06c8..d54dcf906d8853 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/backup/RestoreJobTest.java @@ -19,8 +19,8 @@ import org.apache.doris.analysis.StorageBackend; import org.apache.doris.backup.BackupJobInfo.BackupIndexInfo; -import org.apache.doris.backup.BackupJobInfo.BackupPartitionInfo; import org.apache.doris.backup.BackupJobInfo.BackupOlapTableInfo; +import org.apache.doris.backup.BackupJobInfo.BackupPartitionInfo; import org.apache.doris.backup.BackupJobInfo.BackupTabletInfo; import org.apache.doris.backup.RestoreJob.RestoreJobState; import org.apache.doris.catalog.Catalog; @@ -29,6 +29,7 @@ import org.apache.doris.catalog.MaterializedIndex.IndexExtState; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.Table; import org.apache.doris.catalog.Tablet; @@ -37,6 +38,7 @@ import org.apache.doris.common.MarkedCountDownLatch; import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.persist.EditLog; +import org.apache.doris.resource.Tag; import org.apache.doris.system.SystemInfoService; import org.apache.doris.task.AgentTask; import org.apache.doris.task.AgentTaskQueue; @@ -47,6 +49,7 @@ import org.apache.doris.thrift.TFinishTaskRequest; import org.apache.doris.thrift.TStatus; import org.apache.doris.thrift.TStatusCode; +import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TTaskType; import com.google.common.collect.Lists; @@ -158,11 +161,12 @@ public void setUp() throws AnalysisException { new Expectations() { { - systemInfoService.seqChooseBackendIds(anyInt, anyBoolean, anyBoolean, anyString); + systemInfoService.seqChooseBackendIdsByStorageMediumAndTag(anyInt, anyBoolean, anyBoolean, anyString, + (TStorageMedium) any, (Tag) any); minTimes = 0; result = new Delegate() { public synchronized List seqChooseBackendIds(int backendNum, boolean needAlive, - boolean isCreate, String clusterName) { + boolean isCreate, String clusterName) { List beIds = Lists.newArrayList(); beIds.add(CatalogMocker.BACKEND1_ID); beIds.add(CatalogMocker.BACKEND2_ID); @@ -245,9 +249,9 @@ boolean await(long timeout, TimeUnit unit) { // drop this table, cause we want to try restoring this table db.dropTable(expectedRestoreTbl.getName()); - + job = new RestoreJob(label, "2018-01-01 01:01:01", db.getId(), db.getFullName(), - jobInfo, false, 3, 100000, -1, catalog, repo.getId()); + jobInfo, false, new ReplicaAllocation((short) 3), 100000, -1, catalog, repo.getId()); List
tbls = Lists.newArrayList(); List resources = Lists.newArrayList(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/AdminStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/AdminStmtTest.java index d4dff175a6c26c..976d1c5d2a1042 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/AdminStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/AdminStmtTest.java @@ -54,7 +54,7 @@ public class AdminStmtTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogOperationTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogOperationTest.java index a85a5d21adad1e..a0ada8de2aa62e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogOperationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogOperationTest.java @@ -47,7 +47,7 @@ public class CatalogOperationTest { @BeforeClass public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 1000; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java index 0ec5a1b352bef3..80d60296c102ae 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CatalogTestUtil.java @@ -213,7 +213,7 @@ public static Database createSimpleDb(long dbId, long tableId, long partitionId, // table PartitionInfo partitionInfo = new SinglePartitionInfo(); partitionInfo.setDataProperty(partitionId, DataProperty.DEFAULT_DATA_PROPERTY); - partitionInfo.setReplicationNum(partitionId, (short) 3); + partitionInfo.setReplicaAllocation(partitionId, new ReplicaAllocation((short) 3)); OlapTable table = new OlapTable(tableId, testTable1, columns, KeysType.AGG_KEYS, partitionInfo, distributionInfo); table.addPartition(partition); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColocateTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ColocateTableTest.java index 3accf552267889..d50acbb9897fe3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/ColocateTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ColocateTableTest.java @@ -24,10 +24,12 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.resource.Tag; +import org.apache.doris.utframe.UtFrameUtils; import com.google.common.collect.Multimap; +import com.google.common.collect.Table; -import org.apache.doris.utframe.UtFrameUtils; import org.junit.After; import org.junit.AfterClass; import org.junit.Assert; @@ -38,7 +40,6 @@ import org.junit.rules.ExpectedException; import java.io.File; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.UUID; @@ -58,7 +59,7 @@ public class ColocateTableTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); connectContext = UtFrameUtils.createDefaultCtx(); } @@ -110,7 +111,7 @@ public void testCreateOneTable() throws Exception { Assert.assertEquals(1, Deencapsulation.>getField(index, "group2Tables").size()); Assert.assertEquals(1, index.getAllGroupIds().size()); Assert.assertEquals(1, Deencapsulation.>getField(index, "table2Group").size()); - Assert.assertEquals(1, Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); + Assert.assertEquals(1, Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); Assert.assertEquals(1, Deencapsulation.>getField(index, "group2Schema").size()); Assert.assertEquals(0, index.getUnstableGroupIds().size()); @@ -120,8 +121,8 @@ public void testCreateOneTable() throws Exception { Assert.assertEquals(dbId, index.getGroup(tableId).dbId); GroupId groupId = index.getGroup(tableId); - List backendIds = index.getBackendsPerBucketSeq(groupId).get(0); - Assert.assertEquals(1, backendIds.size()); + Map>> backendIds = index.getBackendsPerBucketSeq(groupId); + Assert.assertEquals(1, backendIds.get(Tag.DEFAULT_BACKEND_TAG).get(0).size()); String fullGroupName = dbId + "_" + groupName; Assert.assertEquals(tableId, index.getTableIdByGroup(fullGroupName)); @@ -129,7 +130,7 @@ public void testCreateOneTable() throws Exception { Assert.assertNotNull(groupSchema); Assert.assertEquals(dbId, groupSchema.getGroupId().dbId); Assert.assertEquals(1, groupSchema.getBucketsNum()); - Assert.assertEquals(1, groupSchema.getReplicationNum()); + Assert.assertEquals((short) 1, groupSchema.getReplicaAlloc().getTotalReplicaNum()); } @Test @@ -166,7 +167,7 @@ public void testCreateTwoTableWithSameGroup() throws Exception { Assert.assertEquals(2, Deencapsulation.>getField(index, "group2Tables").size()); Assert.assertEquals(1, index.getAllGroupIds().size()); Assert.assertEquals(2, Deencapsulation.>getField(index, "table2Group").size()); - Assert.assertEquals(1, Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); + Assert.assertEquals(1, Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); Assert.assertEquals(1, Deencapsulation.>getField(index, "group2Schema").size()); Assert.assertEquals(0, index.getUnstableGroupIds().size()); @@ -181,7 +182,7 @@ public void testCreateTwoTableWithSameGroup() throws Exception { Assert.assertEquals(1, index.getAllGroupIds().size()); Assert.assertEquals(1, Deencapsulation.>getField(index, "table2Group").size()); Assert.assertEquals(1, - Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); + Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); Assert.assertEquals(0, index.getUnstableGroupIds().size()); Assert.assertFalse(index.isColocateTable(firstTblId)); @@ -194,7 +195,7 @@ public void testCreateTwoTableWithSameGroup() throws Exception { Assert.assertEquals(0, index.getAllGroupIds().size()); Assert.assertEquals(0, Deencapsulation.>getField(index, "table2Group").size()); Assert.assertEquals(0, - Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); + Deencapsulation.>>>getField(index, "group2BackendsPerBucketSeq").size()); Assert.assertEquals(0, index.getUnstableGroupIds().size()); Assert.assertFalse(index.isColocateTable(firstTblId)); @@ -246,7 +247,7 @@ public void testReplicationNum() throws Exception { ");"); expectedEx.expect(DdlException.class); - expectedEx.expectMessage("Colocate tables must have same replication num: 1"); + expectedEx.expectMessage("Colocate tables must have same replication allocation: tag.location.default: 1"); createTable("create table " + dbName + "." + tableName2 + " (\n" + " `k1` int NULL COMMENT \"\",\n" + " `k2` varchar(10) NULL COMMENT \"\"\n" + diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java index 00248b0486acbf..67a85a9d8d774f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java @@ -51,7 +51,7 @@ public class CreateFunctionTest { @BeforeClass public static void setup() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); FeConstants.runningUnitTest = true; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableLikeTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableLikeTest.java index a0eae7aada4bb1..9dfc635834efdd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableLikeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableLikeTest.java @@ -17,7 +17,6 @@ package org.apache.doris.catalog; -import avro.shaded.com.google.common.collect.Lists; import org.apache.doris.analysis.CreateDbStmt; import org.apache.doris.analysis.CreateTableLikeStmt; import org.apache.doris.analysis.CreateTableStmt; @@ -25,6 +24,7 @@ import org.apache.doris.common.ExceptionChecker; import org.apache.doris.qe.ConnectContext; import org.apache.doris.utframe.UtFrameUtils; + import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -34,6 +34,8 @@ import java.util.List; import java.util.UUID; +import avro.shaded.com.google.common.collect.Lists; + /** * @author wangcong * @version 1.0 @@ -46,7 +48,7 @@ public class CreateTableLikeTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java index 2ebbc40cb3dc60..20524ebf7a3c17 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateTableTest.java @@ -43,7 +43,7 @@ public class CreateTableTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); @@ -239,7 +239,8 @@ public void testAbnormal() throws DdlException { + "properties('replication_num' = '1', 'short_key' = '4');")); ExceptionChecker - .expectThrowsWithMsg(DdlException.class, "Failed to find enough host in all backends. need: 3", + .expectThrowsWithMsg(DdlException.class, "Failed to find enough host with storage medium and " + + "tag(NaN/{\"location\" : \"default\"}) in all backends. need: 3", () -> createTable("create table test.atbl5\n" + "(k1 int, k2 int, k3 int)\n" + "duplicate key(k1, k2, k3)\n" + "distributed by hash(k1) buckets 1\n" + "properties('replication_num' = '3');")); @@ -256,7 +257,8 @@ public void testAbnormal() throws DdlException { ConfigBase.setMutableConfig("enable_strict_storage_medium_check", "true"); ExceptionChecker - .expectThrowsWithMsg(DdlException.class, "Failed to find enough host with storage medium is SSD in all backends. need: 1", + .expectThrowsWithMsg(DdlException.class, "Failed to find enough host with storage medium and " + + "tag(SSD/{\"location\" : \"default\"}) in all backends. need: 1", () -> createTable("create table test.tb7(key1 int, key2 varchar(10)) distributed by hash(key1) \n" + "buckets 1 properties('replication_num' = '1', 'storage_medium' = 'ssd');")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java index 3e495c4998d6b2..860a3ce0aeae79 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateViewTest.java @@ -41,7 +41,7 @@ public class CreateViewTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropDbTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropDbTest.java index 8a75300fac59d0..2a09504e3159ba 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropDbTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropDbTest.java @@ -25,12 +25,12 @@ import org.apache.doris.common.ExceptionChecker; import org.apache.doris.qe.ConnectContext; import org.apache.doris.utframe.UtFrameUtils; + import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; - import java.io.File; import java.util.List; import java.util.UUID; @@ -42,7 +42,7 @@ public class DropDbTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropPartitionTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropPartitionTest.java index ba2ef863f0db03..0bf79970b6b8ff 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropPartitionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropPartitionTest.java @@ -42,7 +42,7 @@ public class DropPartitionTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropTableTest.java index 4c12e5380ed735..779be5936651e5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DropTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DropTableTest.java @@ -25,6 +25,7 @@ import org.apache.doris.common.ExceptionChecker; import org.apache.doris.qe.ConnectContext; import org.apache.doris.utframe.UtFrameUtils; + import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -41,7 +42,7 @@ public class DropTableTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java index 06f01c04ca79c2..4aefba33743540 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/DynamicPartitionTableTest.java @@ -57,7 +57,7 @@ public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 1000; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); @@ -107,7 +107,7 @@ public void testNormal() throws Exception { createTable(createOlapTblStmt); Database db = Catalog.getCurrentCatalog().getDb("default_cluster:test"); OlapTable table = (OlapTable) db.getTable("dynamic_partition_normal"); - Assert.assertEquals(table.getTableProperty().getDynamicPartitionProperty().getReplicationNum(), DynamicPartitionProperty.NOT_SET_REPLICATION_NUM); + Assert.assertTrue(table.getTableProperty().getDynamicPartitionProperty().getReplicaAllocation().isNotSet()); } @Test @@ -447,7 +447,7 @@ public void testSetDynamicPartitionReplicationNum() throws Exception { createTable(createOlapTblStmt); Database db = Catalog.getCurrentCatalog().getDb("default_cluster:test"); OlapTable table = (OlapTable) db.getTable(tableName); - Assert.assertEquals(table.getTableProperty().getDynamicPartitionProperty().getReplicationNum(), 2); + Assert.assertEquals(2, table.getTableProperty().getDynamicPartitionProperty().getReplicaAllocation().getTotalReplicaNum()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTagTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTagTest.java new file mode 100644 index 00000000000000..4dd731624e6fc5 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ModifyBackendTagTest.java @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.analysis.AlterSystemStmt; +import org.apache.doris.analysis.AlterTableStmt; +import org.apache.doris.analysis.CreateDbStmt; +import org.apache.doris.analysis.CreateTableStmt; +import org.apache.doris.clone.DynamicPartitionScheduler; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.ExceptionChecker; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.DdlExecutor; +import org.apache.doris.resource.Tag; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; +import org.apache.doris.utframe.UtFrameUtils; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +public class ModifyBackendTagTest { + + private static String runningDir = "fe/mocked/ModifyBackendTagTest/" + UUID.randomUUID().toString() + "/"; + private static ConnectContext connectContext; + + @BeforeClass + public static void beforeClass() throws Exception { + UtFrameUtils.createDorisCluster(runningDir); + // create connect context + connectContext = UtFrameUtils.createDefaultCtx(); + // create database + String createDbStmtStr = "create database test;"; + CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, connectContext); + Catalog.getCurrentCatalog().createDb(createDbStmt); + } + + @AfterClass + public static void tearDown() { + File file = new File(runningDir); + file.delete(); + } + + @Test + public void testModifyBackend() throws Exception { + SystemInfoService infoService = Catalog.getCurrentSystemInfo(); + List backends = infoService.getClusterBackends(SystemInfoService.DEFAULT_CLUSTER); + Assert.assertEquals(1, backends.size()); + String beHostPort = backends.get(0).getHost() + ":" + backends.get(0).getHeartbeatPort(); + + // modify backend tag + String stmtStr = "alter system modify backend \"" + beHostPort + "\" set ('tag.location' = 'zone1')"; + AlterSystemStmt stmt = (AlterSystemStmt) UtFrameUtils.parseAndAnalyzeStmt(stmtStr, connectContext); + DdlExecutor.execute(Catalog.getCurrentCatalog(), stmt); + backends = infoService.getClusterBackends(SystemInfoService.DEFAULT_CLUSTER); + Assert.assertEquals(1, backends.size()); + + // create table + String createStr = "create table test.tbl1(\n" + + "k1 int\n" + + ") distributed by hash(k1)\n" + + "buckets 3 properties(\n" + + "\"replication_num\" = \"1\"\n" + + ");"; + CreateTableStmt createStmt = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createStr, connectContext); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Failed to find enough host with storage medium and tag(NaN/{\"location\" : \"default\"}) in all backends. need: 1", + () -> DdlExecutor.execute(Catalog.getCurrentCatalog(), createStmt)); + + createStr = "create table test.tbl1(\n" + + "k1 int\n" + + ") distributed by hash(k1)\n" + + "buckets 3 properties(\n" + + "\"replication_allocation\" = \"tag.location.zone1: 1\"\n" + + ");"; + CreateTableStmt createStmt2 = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createStr, connectContext); + ExceptionChecker.expectThrowsNoException(() -> DdlExecutor.execute(Catalog.getCurrentCatalog(), createStmt2)); + + // create dynamic partition tbl + createStr = "create table test.tbl3(\n" + + "k1 date, k2 int\n" + + ") partition by range(k1)()\n" + + "distributed by hash(k1)\n" + + "buckets 3 properties(\n" + + " \"dynamic_partition.enable\" = \"true\",\n" + + " \"dynamic_partition.time_unit\" = \"DAY\",\n" + + " \"dynamic_partition.start\" = \"-3\",\n" + + " \"dynamic_partition.end\" = \"3\",\n" + + " \"dynamic_partition.prefix\" = \"p\",\n" + + " \"dynamic_partition.buckets\" = \"1\",\n" + + " \"dynamic_partition.replication_num\" = \"1\"\n" + + ");"; + CreateTableStmt createStmt3 = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createStr, connectContext); + // although there is no exception throw, but partition create failed, because there is no BE + // with "default" tag + ExceptionChecker.expectThrowsNoException(() -> DdlExecutor.execute(Catalog.getCurrentCatalog(), createStmt3)); + Database db = Catalog.getCurrentCatalog().getDb("default_cluster:test"); + Table tbl3 = db.getTable("tbl3"); + String err = Catalog.getCurrentCatalog().getDynamicPartitionScheduler().getRuntimeInfo(tbl3.getId(), DynamicPartitionScheduler.CREATE_PARTITION_MSG); + Assert.assertTrue(err.contains("Failed to find enough host with storage medium and tag")); + + createStr = "create table test.tbl4(\n" + + "k1 date, k2 int\n" + + ") partition by range(k1)()\n" + + "distributed by hash(k1)\n" + + "buckets 3 properties(\n" + + " \"dynamic_partition.enable\" = \"true\",\n" + + " \"dynamic_partition.time_unit\" = \"DAY\",\n" + + " \"dynamic_partition.start\" = \"-3\",\n" + + " \"dynamic_partition.end\" = \"3\",\n" + + " \"dynamic_partition.prefix\" = \"p\",\n" + + " \"dynamic_partition.buckets\" = \"1\",\n" + + " \"dynamic_partition.replication_allocation\" = \"tag.location.zone1:1\"\n" + + ");"; + CreateTableStmt createStmt4 = (CreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(createStr, connectContext); + ExceptionChecker.expectThrowsNoException(() -> DdlExecutor.execute(Catalog.getCurrentCatalog(), createStmt4)); + DynamicPartitionScheduler scheduler = Catalog.getCurrentCatalog().getDynamicPartitionScheduler(); + OlapTable tbl = (OlapTable) db.getTable("tbl4"); + PartitionInfo partitionInfo = tbl.getPartitionInfo(); + Assert.assertEquals(4, partitionInfo.idToItem.size()); + ReplicaAllocation replicaAlloc = new ReplicaAllocation(); + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zone1"), (short) 1); + for (ReplicaAllocation allocation : partitionInfo.idToReplicaAllocation.values()) { + Assert.assertEquals(replicaAlloc, allocation); + } + + ReplicaAllocation defaultAlloc = tbl.getDefaultReplicaAllocation(); + Assert.assertEquals(ReplicaAllocation.DEFAULT_ALLOCATION, defaultAlloc); + TableProperty tableProperty = tbl.getTableProperty(); + Map tblProperties = tableProperty.getProperties(); + Assert.assertFalse(tblProperties.containsKey("default.replication_allocation")); + + // modify default replica + String alterStr = "alter table test.tbl4 set ('default.replication_allocation' = 'tag.location.zonex:1')"; + AlterTableStmt alterStmt = (AlterTableStmt) UtFrameUtils.parseAndAnalyzeStmt(alterStr, connectContext); + ExceptionChecker.expectThrowsNoException(() -> DdlExecutor.execute(Catalog.getCurrentCatalog(), alterStmt)); + defaultAlloc = tbl.getDefaultReplicaAllocation(); + ReplicaAllocation expectedAlloc = new ReplicaAllocation(); + expectedAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zonex"), (short) 1); + Assert.assertEquals(expectedAlloc, defaultAlloc); + tblProperties = tableProperty.getProperties(); + Assert.assertTrue(tblProperties.containsKey("default.replication_allocation")); + + // modify partition replica with wrong zone + String partName = tbl.getPartitionNames().stream().findFirst().get(); + alterStr = "alter table test.tbl4 modify partition " + partName + + " set ('replication_allocation' = 'tag.location.zonex:1')"; + AlterTableStmt alterStmt2 = (AlterTableStmt) UtFrameUtils.parseAndAnalyzeStmt(alterStr, connectContext); + ExceptionChecker.expectThrowsWithMsg(DdlException.class, + "Failed to find enough host with tag({\"location\" : \"zonex\"}) in all backends. need: 1", + () -> DdlExecutor.execute(Catalog.getCurrentCatalog(), alterStmt2)); + tblProperties = tableProperty.getProperties(); + Assert.assertTrue(tblProperties.containsKey("default.replication_allocation")); + + alterStr = "alter table test.tbl4 modify partition " + partName + + " set ('replication_allocation' = 'tag.location.zone1:1')"; + AlterTableStmt alterStmt3 = (AlterTableStmt) UtFrameUtils.parseAndAnalyzeStmt(alterStr, connectContext); + ExceptionChecker.expectThrowsNoException(() -> DdlExecutor.execute(Catalog.getCurrentCatalog(), alterStmt3)); + tblProperties = tableProperty.getProperties(); + Assert.assertTrue(tblProperties.containsKey("default.replication_allocation")); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/RecoverTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/RecoverTest.java index e2ca0e8b28dc98..508be5e48d28b8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/RecoverTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/RecoverTest.java @@ -47,7 +47,7 @@ public class RecoverTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java new file mode 100644 index 00000000000000..85db811db0e3e1 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/ReplicaAllocationTest.java @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ExceptionChecker; +import org.apache.doris.common.FeConstants; +import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.meta.MetaContext; +import org.apache.doris.resource.Tag; + +import com.google.common.collect.Maps; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Map; + +public class ReplicaAllocationTest { + + @Test + public void testNormal() throws AnalysisException { + // DEFAULT_ALLOCATION + ReplicaAllocation replicaAlloc = ReplicaAllocation.DEFAULT_ALLOCATION; + Assert.assertFalse(replicaAlloc.isNotSet()); + Assert.assertTrue(replicaAlloc.equals(ReplicaAllocation.DEFAULT_ALLOCATION)); + Assert.assertFalse(replicaAlloc.isEmpty()); + Assert.assertEquals(3, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("tag.location.default: 3", replicaAlloc.toCreateStmt()); + + // NOT SET + replicaAlloc = ReplicaAllocation.NOT_SET; + Assert.assertTrue(replicaAlloc.isNotSet()); + Assert.assertFalse(replicaAlloc.equals(ReplicaAllocation.DEFAULT_ALLOCATION)); + Assert.assertTrue(replicaAlloc.isEmpty()); + Assert.assertEquals(0, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("", replicaAlloc.toCreateStmt()); + + // set replica num + replicaAlloc = new ReplicaAllocation((short) 5); + Assert.assertFalse(replicaAlloc.isNotSet()); + Assert.assertFalse(replicaAlloc.equals(ReplicaAllocation.DEFAULT_ALLOCATION)); + Assert.assertFalse(replicaAlloc.isEmpty()); + Assert.assertEquals(5, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("tag.location.default: 5", replicaAlloc.toCreateStmt()); + + // set replica num with tag + replicaAlloc = new ReplicaAllocation(); + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zone1"), (short) 3); + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zone2"), (short) 2); + Assert.assertFalse(replicaAlloc.isNotSet()); + Assert.assertFalse(replicaAlloc.isEmpty()); + Assert.assertEquals(5, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("tag.location.zone2: 2, tag.location.zone1: 3", replicaAlloc.toCreateStmt()); + } + + @Test + public void testPropertyAnalyze() throws AnalysisException { + Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, "3"); + ReplicaAllocation replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + Assert.assertEquals(ReplicaAllocation.DEFAULT_ALLOCATION, replicaAlloc); + Assert.assertTrue(properties.isEmpty()); + + // not set + properties = Maps.newHashMap(); + replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + Assert.assertEquals(ReplicaAllocation.NOT_SET, replicaAlloc); + + properties = Maps.newHashMap(); + properties.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, "3"); + replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, "default"); + Assert.assertEquals(ReplicaAllocation.DEFAULT_ALLOCATION, replicaAlloc); + Assert.assertTrue(properties.isEmpty()); + + properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "tag.location.zone2: 2, tag.location.zone1: 3"); + replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, ""); + Assert.assertNotEquals(ReplicaAllocation.DEFAULT_ALLOCATION, replicaAlloc); + Assert.assertFalse(replicaAlloc.isNotSet()); + Assert.assertFalse(replicaAlloc.isEmpty()); + Assert.assertEquals(5, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("tag.location.zone2: 2, tag.location.zone1: 3", replicaAlloc.toCreateStmt()); + Assert.assertTrue(properties.isEmpty()); + + properties = Maps.newHashMap(); + properties.put("dynamic_partition." + PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "tag.location.zone2: 1, tag.location.zone1: 3"); + replicaAlloc = PropertyAnalyzer.analyzeReplicaAllocation(properties, "dynamic_partition"); + Assert.assertNotEquals(ReplicaAllocation.DEFAULT_ALLOCATION, replicaAlloc); + Assert.assertFalse(replicaAlloc.isNotSet()); + Assert.assertFalse(replicaAlloc.isEmpty()); + Assert.assertEquals(4, replicaAlloc.getTotalReplicaNum()); + Assert.assertEquals("tag.location.zone2: 1, tag.location.zone1: 3", replicaAlloc.toCreateStmt()); + Assert.assertTrue(properties.isEmpty()); + } + + @Test + public void testAbnormal() { + final Map properties = Maps.newHashMap(); + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "3"); + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, "Invalid replication allocation property: 3", + ()->PropertyAnalyzer.analyzeReplicaAllocation(properties, "")); + + properties.clear(); + properties.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "tag.location.12321:1"); + ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, "Invalid tag format: location:12321", + ()->PropertyAnalyzer.analyzeReplicaAllocation(properties, "")); + } + + @Test + public void testPersist() throws IOException, AnalysisException { + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeConstants.meta_version); + metaContext.setThreadLocalInfo(); + + // 1. Write objects to file + File file = new File("./replicaInfo"); + file.createNewFile(); + DataOutputStream dos = new DataOutputStream(new FileOutputStream(file)); + + ReplicaAllocation replicaAlloc = new ReplicaAllocation(); + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zone1"), (short) 3); + replicaAlloc.put(Tag.create(Tag.TYPE_LOCATION, "zone2"), (short) 2); + replicaAlloc.write(dos); + + dos.flush(); + dos.close(); + + // 2. Read objects from file + DataInputStream dis = new DataInputStream(new FileInputStream(file)); + ReplicaAllocation newAlloc = ReplicaAllocation.read(dis); + Assert.assertEquals(replicaAlloc, newAlloc); + + // 3. delete files + dis.close(); + file.delete(); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java index fe14c4c8d5ab11..e2005f4f1e366e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TablePropertyTest.java @@ -18,6 +18,9 @@ package org.apache.doris.catalog; +import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.meta.MetaContext; + import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -41,6 +44,9 @@ public void tearDown() { @Test public void testNormal() throws IOException { + MetaContext metaContext = new MetaContext(); + metaContext.setMetaVersion(FeMetaVersion.VERSION_CURRENT); + metaContext.setThreadLocalInfo(); // 1. Write objects to file File file = new File(fileName); file.createNewFile(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java index 0507b96fa99f96..f9a1811c4d9fd8 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TabletTest.java @@ -17,18 +17,13 @@ package org.apache.doris.catalog; -import mockit.Expectations; -import mockit.Mocked; - import org.apache.doris.catalog.Replica.ReplicaState; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; -import org.apache.doris.system.Backend; import org.apache.doris.thrift.TStorageMedium; import com.google.common.collect.Sets; -import org.apache.arrow.flatbuf.Bool; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -38,8 +33,9 @@ import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.util.Set; -import java.util.concurrent.atomic.AtomicInteger; + +import mockit.Expectations; +import mockit.Mocked; public class TabletTest { @@ -179,7 +175,8 @@ private final void testTabletColocateHealthStatus0(Tablet.TabletStatus exceptedT } tablet.addReplica(new Replica(replicaId++, pair.first, versionAndSuccessVersion, 0L, 0, 200000L, 3000L, ReplicaState.NORMAL, lastFailVersion, 0, versionAndSuccessVersion, 0)); } - Assert.assertEquals(tablet.getColocateHealthStatus(100L, 3, Sets.newHashSet(1L, 2L, 3L)), exceptedTabletStatus); + Assert.assertEquals(tablet.getColocateHealthStatus(100L, new ReplicaAllocation((short) 3), + Sets.newHashSet(1L, 2L, 3L)), exceptedTabletStatus); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/TempPartitionTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/TempPartitionTest.java index 268ef36d1b68fd..541b6eb0b185c9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/TempPartitionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/TempPartitionTest.java @@ -69,7 +69,7 @@ public class TempPartitionTest { @BeforeClass public static void setup() throws Exception { FeConstants.default_scheduler_interval_millisecond = 100; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); ctx = UtFrameUtils.createDefaultCtx(); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableCheckerAndBalancerTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableCheckerAndBalancerTest.java index 3b5695affd328e..9bdc39eb694a78 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableCheckerAndBalancerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableCheckerAndBalancerTest.java @@ -17,18 +17,16 @@ package org.apache.doris.clone; -import mockit.Delegate; -import mockit.Expectations; -import mockit.Mocked; - import org.apache.doris.catalog.ColocateGroupSchema; import org.apache.doris.catalog.ColocateTableIndex; import org.apache.doris.catalog.ColocateTableIndex.GroupId; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.common.Config; import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.resource.Tag; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; @@ -45,6 +43,10 @@ import java.util.Map; import java.util.Set; +import mockit.Delegate; +import mockit.Expectations; +import mockit.Mocked; + public class ColocateTableCheckerAndBalancerTest { private ColocateTableCheckerAndBalancer balancer = ColocateTableCheckerAndBalancer.getInstance(); @@ -89,7 +91,9 @@ private ColocateTableIndex createColocateIndex(GroupId groupId, List flatL ColocateTableIndex colocateTableIndex = new ColocateTableIndex(); int replicationNum = 3; List> backendsPerBucketSeq = Lists.partition(flatList, replicationNum); - colocateTableIndex.addBackendsPerBucketSeq(groupId, backendsPerBucketSeq); + Map>> backendsPerBucketSeqMap = Maps.newHashMap(); + backendsPerBucketSeqMap.put(Tag.DEFAULT_BACKEND_TAG, backendsPerBucketSeq); + colocateTableIndex.addBackendsPerBucketSeq(groupId, backendsPerBucketSeqMap); return colocateTableIndex; } @@ -134,7 +138,8 @@ public void testBalance(@Mocked SystemInfoService infoService, GroupId groupId = new GroupId(10000, 10001); List distributionCols = Lists.newArrayList(); distributionCols.add(new Column("k1", PrimitiveType.INT)); - ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, (short) 3); + ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, + ReplicaAllocation.DEFAULT_ALLOCATION); Map group2Schema = Maps.newHashMap(); group2Schema.put(groupId, groupSchema); @@ -205,7 +210,8 @@ public void testFixBalanceEndlessLoop(@Mocked SystemInfoService infoService, GroupId groupId = new GroupId(10000, 10001); List distributionCols = Lists.newArrayList(); distributionCols.add(new Column("k1", PrimitiveType.INT)); - ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, (short) 1); + ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, + new ReplicaAllocation((short) 1)); Map group2Schema = Maps.newHashMap(); group2Schema.put(groupId, groupSchema); @@ -249,7 +255,7 @@ BackendLoadStatistic delegate(Long beId) { }; GroupId groupId = new GroupId(10000, 10001); List distributionCols = Lists.newArrayList(); - ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, (short) 1); + ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, new ReplicaAllocation((short) 1)); Map group2Schema = Maps.newHashMap(); group2Schema.put(groupId, groupSchema); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java index a6ea1e490ca5c8..0ef5a3140b97ab 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java @@ -17,11 +17,6 @@ package org.apache.doris.clone; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import mockit.Delegate; -import mockit.Expectations; -import mockit.Mocked; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DataProperty; @@ -33,6 +28,7 @@ import org.apache.doris.catalog.Partition; import org.apache.doris.catalog.RangePartitionInfo; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.common.AnalysisException; @@ -50,6 +46,10 @@ import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; import org.apache.doris.thrift.TTabletInfo; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -65,6 +65,9 @@ import java.util.stream.Collectors; import java.util.stream.LongStream; +import mockit.Delegate; +import mockit.Expectations; +import mockit.Mocked; import static com.google.common.collect.MoreCollectors.onlyElement; public class RebalanceTest { @@ -169,7 +172,8 @@ private void createPartitionsForTable(OlapTable olapTable, MaterializedIndex ind long id = 31 + idx; Partition partition = new Partition(id, "p" + idx, index, new HashDistributionInfo()); olapTable.addPartition(partition); - olapTable.getPartitionInfo().addPartition(id, new DataProperty(TStorageMedium.HDD), (short) 3, false); + olapTable.getPartitionInfo().addPartition(id, new DataProperty(TStorageMedium.HDD), + ReplicaAllocation.DEFAULT_ALLOCATION, false); }); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java index 5473264e0c4760..f26aa069b3c66c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java @@ -17,8 +17,6 @@ package org.apache.doris.cluster; -import mockit.Expectations; -import mockit.Mocked; import org.apache.doris.analysis.AddBackendClause; import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.DropBackendClause; @@ -29,6 +27,7 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.UserException; import org.apache.doris.persist.EditLog; import org.apache.doris.qe.ConnectContext; import org.apache.doris.system.Backend; @@ -48,6 +47,9 @@ import java.io.FileOutputStream; import java.io.IOException; +import mockit.Expectations; +import mockit.Mocked; + public class SystemInfoServiceTest { @Mocked @@ -206,7 +208,7 @@ public void validHostAndPortTest4() throws Exception { } @Test - public void addBackendTest() throws AnalysisException { + public void addBackendTest() throws UserException { clearAllBackend(); AddBackendClause stmt = new AddBackendClause(Lists.newArrayList("192.168.0.1:1234")); stmt.analyze(analyzer); @@ -235,7 +237,7 @@ public void addBackendTest() throws AnalysisException { } @Test - public void removeBackendTest() throws AnalysisException { + public void removeBackendTest() throws UserException { clearAllBackend(); AddBackendClause stmt = new AddBackendClause(Lists.newArrayList("192.168.0.1:1234")); stmt.analyze(analyzer); diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/UnitTestUtil.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/UnitTestUtil.java index b019b7509c2f4a..f0b932b4c03ca6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/UnitTestUtil.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/UnitTestUtil.java @@ -31,6 +31,7 @@ import org.apache.doris.catalog.RandomDistributionInfo; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.SinglePartitionInfo; import org.apache.doris.catalog.Tablet; @@ -44,10 +45,10 @@ import org.apache.doris.thrift.TDisk; import org.apache.doris.thrift.TStorageMedium; import org.apache.doris.thrift.TStorageType; +import org.apache.doris.thrift.TTabletType; import com.google.common.collect.Maps; -import org.apache.doris.thrift.TTabletType; import org.junit.Assert; import java.lang.reflect.Method; @@ -110,7 +111,7 @@ public static Database createDb(long dbId, long tableId, long partitionId, long // table PartitionInfo partitionInfo = new SinglePartitionInfo(); partitionInfo.setDataProperty(partitionId, DataProperty.DEFAULT_DATA_PROPERTY); - partitionInfo.setReplicationNum(partitionId, (short) 3); + partitionInfo.setReplicaAllocation(partitionId, new ReplicaAllocation((short) 3)); partitionInfo.setIsInMemory(partitionId, false); partitionInfo.setTabletType(partitionId, TTabletType.TABLET_TYPE_DISK); OlapTable table = new OlapTable(tableId, TABLE_NAME, columns, diff --git a/fe/fe-core/src/test/java/org/apache/doris/http/DorisHttpTestCase.java b/fe/fe-core/src/test/java/org/apache/doris/http/DorisHttpTestCase.java index 94fa7f08c032e2..e4578dac959a6e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/http/DorisHttpTestCase.java +++ b/fe/fe-core/src/test/java/org/apache/doris/http/DorisHttpTestCase.java @@ -32,6 +32,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.RandomDistributionInfo; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.SinglePartitionInfo; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; @@ -152,7 +153,7 @@ public static OlapTable newTable(String name) { // table PartitionInfo partitionInfo = new SinglePartitionInfo(); partitionInfo.setDataProperty(testPartitionId, DataProperty.DEFAULT_DATA_PROPERTY); - partitionInfo.setReplicationNum(testPartitionId, (short) 3); + partitionInfo.setReplicaAllocation(testPartitionId, new ReplicaAllocation((short) 3)); OlapTable table = new OlapTable(testTableId, name, columns, KeysType.AGG_KEYS, partitionInfo, distributionInfo); table.addPartition(partition); @@ -170,7 +171,7 @@ private static EsTable newEsTable(String name) { columns.add(k2); PartitionInfo partitionInfo = new SinglePartitionInfo(); partitionInfo.setDataProperty(testPartitionId + 100, DataProperty.DEFAULT_DATA_PROPERTY); - partitionInfo.setReplicationNum(testPartitionId + 100, (short) 3); + partitionInfo.setReplicaAllocation(testPartitionId + 100, ReplicaAllocation.DEFAULT_ALLOCATION); EsTable table = null; Map props = new HashMap<>(); props.put(EsTable.HOSTS, "http://node-1:8080"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadJobTest.java index 1046e5dcd6bc88..c17cb52a4dd411 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/loadv2/SparkLoadJobTest.java @@ -33,6 +33,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.RangePartitionInfo; import org.apache.doris.catalog.Replica; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ResourceMgr; import org.apache.doris.catalog.SparkResource; import org.apache.doris.catalog.Table; @@ -62,13 +63,13 @@ import org.apache.doris.transaction.TransactionState; import org.apache.doris.transaction.TransactionState.LoadJobSourceType; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; @@ -346,7 +347,7 @@ public void testUpdateEtlStatusFinishedAndCommitTransaction( long fileSize = 6L; filePathToSize.put(filePath, fileSize); PartitionInfo partitionInfo = new RangePartitionInfo(); - partitionInfo.addPartition(partitionId, null, (short) 1, false); + partitionInfo.addPartition(partitionId, null, new ReplicaAllocation((short) 1), false); new Expectations() { { diff --git a/fe/fe-core/src/test/java/org/apache/doris/persist/BatchModifyPartitionsInfoTest.java b/fe/fe-core/src/test/java/org/apache/doris/persist/BatchModifyPartitionsInfoTest.java index 45efb08312fea2..b828fe01e79c3e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/persist/BatchModifyPartitionsInfoTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/persist/BatchModifyPartitionsInfoTest.java @@ -18,7 +18,9 @@ package org.apache.doris.persist; import org.apache.doris.catalog.DataProperty; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.common.AnalysisException; + import com.google.common.collect.Lists; import org.junit.After; @@ -59,7 +61,7 @@ public void testSerializeBatchModifyPartitionsInfo() throws IOException, Analysi List partitionIds = Lists.newArrayList(PARTITION_ID_1, PARTITION_ID_2, PARTITION_ID_3); for (long partitionId : partitionIds) { ModifyInfos.add(new ModifyPartitionInfo(DB_ID, TB_ID, partitionId, - DataProperty.DEFAULT_DATA_PROPERTY, (short) 3, true)); + DataProperty.DEFAULT_DATA_PROPERTY, ReplicaAllocation.DEFAULT_ALLOCATION, true)); } BatchModifyPartitionsInfo batchModifyPartitionsInfo = new BatchModifyPartitionsInfo(ModifyInfos); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/ColocatePlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/ColocatePlanTest.java index a2fd126c14ac0b..2132a88ab08784 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/ColocatePlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/ColocatePlanTest.java @@ -25,15 +25,14 @@ import org.apache.doris.utframe.UtFrameUtils; import org.apache.commons.lang.StringUtils; - -import java.io.File; -import java.util.UUID; - import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import java.io.File; +import java.util.UUID; + public class ColocatePlanTest { private static final String COLOCATE_ENABLE = "colocate: true"; private static String runningDir = "fe/mocked/DemoTest/" + UUID.randomUUID().toString() + "/"; @@ -42,7 +41,7 @@ public class ColocatePlanTest { @BeforeClass public static void setUp() throws Exception { FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir, 2); + UtFrameUtils.createDorisCluster(runningDir, 2); ctx = UtFrameUtils.createDefaultCtx(); String createDbStmtStr = "create database db1;"; CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/DistributedPlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/DistributedPlannerTest.java index 9bf0ed8d2c6a25..0d304e992c53f1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/DistributedPlannerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/DistributedPlannerTest.java @@ -52,7 +52,7 @@ public class DistributedPlannerTest { @BeforeClass public static void setUp() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); ctx = UtFrameUtils.createDefaultCtx(); String createDbStmtStr = "create database db1;"; CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java index c6ed6385ed23aa..0a8ef63eb574bc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/MaterializedViewFunctionTest.java @@ -56,7 +56,7 @@ public class MaterializedViewFunctionTest { public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 10; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); dorisAssert = new DorisAssert(); dorisAssert.withEnableMV().withDatabase(HR_DB_NAME).useDatabase(HR_DB_NAME); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java index 0e4e764c8ba0a8..544b00615737da 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/OlapTableSinkTest.java @@ -31,6 +31,7 @@ import org.apache.doris.catalog.PartitionType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.RangePartitionInfo; +import org.apache.doris.catalog.ReplicaAllocation; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.SinglePartitionInfo; import org.apache.doris.common.UserException; @@ -86,7 +87,7 @@ private TupleDescriptor getTuple() { public void testSinglePartition() throws UserException { TupleDescriptor tuple = getTuple(); SinglePartitionInfo partInfo = new SinglePartitionInfo(); - partInfo.setReplicationNum(2, (short) 3); + partInfo.setReplicaAllocation(2, new ReplicaAllocation((short) 3)); MaterializedIndex index = new MaterializedIndex(2, MaterializedIndex.IndexState.NORMAL); HashDistributionInfo distInfo = new HashDistributionInfo( 2, Lists.newArrayList(new Column("k1", PrimitiveType.BIGINT))); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java index 135a50b46a89e2..edb7ba03097e17 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/PlannerTest.java @@ -47,7 +47,7 @@ public void tearDown() throws Exception { @BeforeClass public static void setUp() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); ctx = UtFrameUtils.createDefaultCtx(); String createDbStmtStr = "create database db1;"; CreateDbStmt createDbStmt = (CreateDbStmt) UtFrameUtils.parseAndAnalyzeStmt(createDbStmtStr, ctx); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index 53fede1e79fb63..99d1e6913e4751 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -64,7 +64,7 @@ public class QueryPlanTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); // create connect context connectContext = UtFrameUtils.createDefaultCtx(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/plugin/PluginMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/plugin/PluginMgrTest.java index a87b7c65ec1dcf..e5a1885350388d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/plugin/PluginMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/plugin/PluginMgrTest.java @@ -17,12 +17,6 @@ package org.apache.doris.plugin; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - -import avro.shaded.com.google.common.collect.Maps; import org.apache.doris.analysis.InstallPluginStmt; import org.apache.doris.catalog.Catalog; import org.apache.doris.common.Config; @@ -44,13 +38,19 @@ import java.nio.file.Files; import java.util.UUID; +import avro.shaded.com.google.common.collect.Maps; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + public class PluginMgrTest { private static String runningDir = "fe/mocked/PluginMgrTest/" + UUID.randomUUID().toString() + "/"; @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); } @AfterClass diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java index 3d2a45ad29a4cf..0c10a80f8b04c3 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java @@ -20,11 +20,8 @@ import org.apache.doris.catalog.Catalog; import org.apache.doris.common.util.DigitalVersion; import org.apache.doris.plugin.AuditEvent; -import org.apache.doris.plugin.PluginInfo; -import org.apache.doris.plugin.AuditEvent.AuditEventBuilder; import org.apache.doris.plugin.AuditEvent.EventType; -import org.apache.doris.qe.AuditEventProcessor; -import org.apache.doris.qe.AuditLogBuilder; +import org.apache.doris.plugin.PluginInfo; import org.apache.doris.utframe.UtFrameUtils; import org.junit.AfterClass; @@ -42,7 +39,7 @@ public class AuditEventProcessorTest { @BeforeClass public static void beforeClass() throws Exception { - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); } @AfterClass diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/MultiLoadMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/MultiLoadMgrTest.java index a65788f654f8a0..34d56c2b8d1235 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/MultiLoadMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/MultiLoadMgrTest.java @@ -20,7 +20,9 @@ import org.apache.doris.backup.CatalogMocker; import org.apache.doris.catalog.Catalog; import org.apache.doris.common.DdlException; +import org.apache.doris.resource.Tag; import org.apache.doris.system.SystemInfoService; +import org.apache.doris.thrift.TStorageMedium; import com.google.common.collect.Lists; @@ -32,8 +34,6 @@ import mockit.Delegate; import mockit.Expectations; -import mockit.Mock; -import mockit.MockUp; import mockit.Mocked; @@ -62,11 +62,13 @@ public void setUp() { }; new Expectations() { { - systemInfoService.seqChooseBackendIds(anyInt, anyBoolean, anyBoolean, anyString); + systemInfoService.seqChooseBackendIdsByStorageMediumAndTag(anyInt, anyBoolean, anyBoolean, anyString, + (TStorageMedium) any, (Tag) any); minTimes = 0; result = new Delegate() { - public synchronized List seqChooseBackendIds(int backendNum, boolean needAlive, - boolean isCreate, String clusterName) { + public synchronized List seqChooseBackendIdsByStorageMediumAndTag(int backendNum, boolean needAlive, + boolean isCreate, String clusterName, TStorageMedium medium, + Tag tag) { List beIds = Lists.newArrayList(); beIds.add(CatalogMocker.BACKEND1_ID); beIds.add(CatalogMocker.BACKEND2_ID); @@ -77,6 +79,7 @@ public synchronized List seqChooseBackendIds(int backendNum, boolean needA } }; } + @Test public void testStartNormal() throws DdlException { MultiLoadMgr mgr = new MultiLoadMgr(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java index a053c382e00d2b..f0b3e266300f6c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/VariableMgrTest.java @@ -17,9 +17,6 @@ package org.apache.doris.qe; -import mockit.Expectations; -import mockit.Mocked; - import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.SetType; import org.apache.doris.analysis.SetVar; @@ -41,6 +38,9 @@ import java.util.List; +import mockit.Expectations; +import mockit.Mocked; + public class VariableMgrTest { private static final Logger LOG = LoggerFactory.getLogger(VariableMgrTest.class); @Mocked diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java index e1e7838c6ade20..0e0036cb14b0df 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java @@ -23,14 +23,13 @@ import org.apache.doris.utframe.UtFrameUtils; import org.apache.commons.lang3.StringUtils; - -import java.util.UUID; - import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import java.util.UUID; + public class ExtractCommonFactorsRuleFunctionTest { private static String baseDir = "fe"; private static String runningDir = baseDir + "/mocked/ExtractCommonFactorsRuleFunctionTest/" @@ -44,7 +43,7 @@ public class ExtractCommonFactorsRuleFunctionTest { public static void beforeClass() throws Exception { FeConstants.default_scheduler_interval_millisecond = 10; FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); dorisAssert = new DorisAssert(); dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME_1 diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/AnotherDemoTest.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/AnotherDemoTest.java index a84a2e53b70eb1..ab40979b73b13c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/AnotherDemoTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/AnotherDemoTest.java @@ -17,44 +17,29 @@ package org.apache.doris.utframe; -import com.google.common.collect.ImmutableMap; import org.apache.doris.analysis.CreateDbStmt; import org.apache.doris.analysis.CreateTableStmt; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; -import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.OlapTable; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; -import org.apache.doris.common.Pair; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.PlanFragment; import org.apache.doris.planner.Planner; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.StmtExecutor; -import org.apache.doris.system.Backend; -import org.apache.doris.system.SystemInfoService; -import org.apache.doris.thrift.TNetworkAddress; -import org.apache.doris.utframe.MockedBackendFactory.DefaultBeThriftServiceImpl; -import org.apache.doris.utframe.MockedBackendFactory.DefaultHeartbeatServiceImpl; -import org.apache.doris.utframe.MockedBackendFactory.DefaultPBackendServiceImpl; import org.apache.doris.utframe.MockedFrontend.EnvVarNotSetException; import org.apache.doris.utframe.MockedFrontend.FeStartException; import org.apache.doris.utframe.MockedFrontend.NotInitException; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; -import java.nio.file.Files; import java.util.List; -import java.util.Map; import java.util.UUID; /* @@ -82,7 +67,7 @@ public class AnotherDemoTest { public static void beforeClass() throws EnvVarNotSetException, IOException, FeStartException, NotInitException, DdlException, InterruptedException { FeConstants.default_scheduler_interval_millisecond = 10; - UtFrameUtils.createMinDorisCluster(runningDir, 1); + UtFrameUtils.createDorisCluster(runningDir, 1); } @AfterClass diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/DemoTest.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/DemoTest.java index 906d6c1f8222cb..e3aeb20e110068 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/DemoTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/DemoTest.java @@ -65,7 +65,7 @@ public class DemoTest { public static void beforeClass() throws EnvVarNotSetException, IOException, FeStartException, NotInitException, DdlException, InterruptedException { FeConstants.default_scheduler_interval_millisecond = 10; - UtFrameUtils.createMinDorisCluster(runningDir); + UtFrameUtils.createDorisCluster(runningDir); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/UtFrameUtils.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/UtFrameUtils.java index 8b416dc9638592..3f2ffcac46e4d1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/UtFrameUtils.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/UtFrameUtils.java @@ -150,12 +150,12 @@ public static int startFEServer(String runningDir) throws EnvVarNotSetException, return fe_rpc_port; } - public static void createMinDorisCluster(String runningDir) throws InterruptedException, NotInitException, + public static void createDorisCluster(String runningDir) throws InterruptedException, NotInitException, IOException, DdlException, EnvVarNotSetException, FeStartException { - createMinDorisCluster(runningDir, 1); + createDorisCluster(runningDir, 1); } - public static void createMinDorisCluster(String runningDir, int backendNum) throws EnvVarNotSetException, IOException, + public static void createDorisCluster(String runningDir, int backendNum) throws EnvVarNotSetException, IOException, FeStartException, NotInitException, DdlException, InterruptedException { int fe_rpc_port = startFEServer(runningDir); for (int i = 0; i < backendNum; i++) {