InternalSchemaInitializer.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.catalog;
import org.apache.doris.analysis.AlterClause;
import org.apache.doris.analysis.AlterTableStmt;
import org.apache.doris.analysis.ColumnDef;
import org.apache.doris.analysis.ColumnNullableType;
import org.apache.doris.analysis.CreateTableStmt;
import org.apache.doris.analysis.DbName;
import org.apache.doris.analysis.DistributionDesc;
import org.apache.doris.analysis.DropTableStmt;
import org.apache.doris.analysis.HashDistributionDesc;
import org.apache.doris.analysis.KeysDesc;
import org.apache.doris.analysis.ModifyColumnClause;
import org.apache.doris.analysis.ModifyPartitionClause;
import org.apache.doris.analysis.PartitionDesc;
import org.apache.doris.analysis.RangePartitionDesc;
import org.apache.doris.analysis.TableName;
import org.apache.doris.analysis.TypeDef;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.ha.FrontendNodeType;
import org.apache.doris.nereids.trees.plans.commands.AlterTableCommand;
import org.apache.doris.nereids.trees.plans.commands.CreateDatabaseCommand;
import org.apache.doris.nereids.trees.plans.commands.info.AddColumnsOp;
import org.apache.doris.nereids.trees.plans.commands.info.AlterTableOp;
import org.apache.doris.nereids.trees.plans.commands.info.ReorderColumnsOp;
import org.apache.doris.nereids.trees.plans.commands.info.TableNameInfo;
import org.apache.doris.plugin.audit.AuditLoader;
import org.apache.doris.statistics.StatisticConstants;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
public class InternalSchemaInitializer extends Thread {
private static final Logger LOG = LogManager.getLogger(InternalSchemaInitializer.class);
public InternalSchemaInitializer() {
super("InternalSchemaInitializer");
}
public void run() {
if (!FeConstants.enableInternalSchemaDb) {
return;
}
modifyColumnStatsTblSchema();
while (!created()) {
try {
FrontendNodeType feType = Env.getCurrentEnv().getFeType();
if (feType.equals(FrontendNodeType.INIT) || feType.equals(FrontendNodeType.UNKNOWN)) {
LOG.warn("FE is not ready");
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
continue;
}
Thread.currentThread()
.join(Config.resource_not_ready_sleep_seconds * 1000L);
createDb();
createTbl();
} catch (Throwable e) {
LOG.warn("Statistics storage initiated failed, will try again later", e);
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException ex) {
LOG.info("Sleep interrupted. {}", ex.getMessage());
}
}
}
LOG.info("Internal schema is initialized");
Optional<Database> op
= Env.getCurrentEnv().getInternalCatalog().getDb(StatisticConstants.DB_NAME);
if (!op.isPresent()) {
LOG.warn("Internal DB got deleted!");
return;
}
Database database = op.get();
modifyTblReplicaCount(database, StatisticConstants.TABLE_STATISTIC_TBL_NAME);
modifyTblReplicaCount(database, StatisticConstants.PARTITION_STATISTIC_TBL_NAME);
modifyTblReplicaCount(database, AuditLoader.AUDIT_LOG_TABLE);
}
public void modifyColumnStatsTblSchema() {
while (true) {
try {
Table table = findStatsTable();
if (table == null) {
break;
}
table.writeLock();
try {
doSchemaChange(table);
break;
} finally {
table.writeUnlock();
}
} catch (Throwable t) {
LOG.warn("Failed to do schema change for stats table. Try again later.", t);
}
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException t) {
// IGNORE
}
}
}
public Table findStatsTable() {
// 1. check database exist
Optional<Database> dbOpt = Env.getCurrentEnv().getInternalCatalog().getDb(FeConstants.INTERNAL_DB_NAME);
if (!dbOpt.isPresent()) {
return null;
}
// 2. check table exist
Database db = dbOpt.get();
Optional<Table> tableOp = db.getTable(StatisticConstants.TABLE_STATISTIC_TBL_NAME);
return tableOp.orElse(null);
}
public void doSchemaChange(Table table) throws UserException {
List<AlterClause> clauses = getModifyColumnClauses(table);
if (!clauses.isEmpty()) {
TableName tableName = new TableName(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, table.getName());
AlterTableStmt alter = new AlterTableStmt(tableName, clauses);
Env.getCurrentEnv().alterTable(alter);
}
}
public List<AlterClause> getModifyColumnClauses(Table table) {
List<AlterClause> clauses = Lists.newArrayList();
for (Column col : table.fullSchema) {
if (col.isKey() && col.getType().isVarchar()
&& col.getType().getLength() < StatisticConstants.MAX_NAME_LEN) {
TypeDef typeDef = new TypeDef(
ScalarType.createVarchar(StatisticConstants.MAX_NAME_LEN), col.isAllowNull());
ColumnNullableType nullableType =
col.isAllowNull() ? ColumnNullableType.NULLABLE : ColumnNullableType.NOT_NULLABLE;
ColumnDef columnDef = new ColumnDef(col.getName(), typeDef, true, null,
nullableType, -1, new ColumnDef.DefaultValue(false, null), "");
try {
columnDef.analyze(true);
} catch (AnalysisException e) {
LOG.warn("Failed to analyze column {}", col.getName());
continue;
}
ModifyColumnClause clause = new ModifyColumnClause(columnDef, null, null, Maps.newHashMap());
clause.setColumn(columnDef.toColumn());
clauses.add(clause);
}
}
return clauses;
}
@VisibleForTesting
public static void modifyTblReplicaCount(Database database, String tblName) {
if (Config.isCloudMode()
|| Config.min_replication_num_per_tablet >= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM
|| Config.max_replication_num_per_tablet < StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
return;
}
while (true) {
int backendNum = Env.getCurrentSystemInfo().getStorageBackendNumFromDiffHosts(true);
if (FeConstants.runningUnitTest) {
backendNum = Env.getCurrentSystemInfo().getAllBackendIds().size();
}
if (backendNum >= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
try {
OlapTable tbl = (OlapTable) StatisticsUtil.findTable(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, tblName);
tbl.writeLock();
try {
if (tbl.getTableProperty().getReplicaAllocation().getTotalReplicaNum()
>= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
return;
}
if (!tbl.isPartitionedTable()) {
Map<String, String> props = new HashMap<>();
props.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "tag.location.default: "
+ StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
Env.getCurrentEnv().modifyTableReplicaAllocation(database, tbl, props);
} else {
TableName tableName = new TableName(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, tbl.getName());
// 1. modify table's default replica num
Map<String, String> props = new HashMap<>();
props.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM,
"" + StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
Env.getCurrentEnv().modifyTableDefaultReplicaAllocation(database, tbl, props);
// 2. modify each partition's replica num
List<AlterClause> clauses = Lists.newArrayList();
props.clear();
props.put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM,
"" + StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
clauses.add(ModifyPartitionClause.createStarClause(props, false));
AlterTableStmt alter = new AlterTableStmt(tableName, clauses);
Env.getCurrentEnv().alterTable(alter);
}
} finally {
tbl.writeUnlock();
}
break;
} catch (Throwable t) {
LOG.warn("Failed to scale replica of stats tbl:{} to 3", tblName, t);
}
}
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException t) {
// IGNORE
}
}
}
@VisibleForTesting
public static void createTbl() throws UserException {
// statistics
Env.getCurrentEnv().getInternalCatalog().createTable(
buildStatisticsTblStmt(StatisticConstants.TABLE_STATISTIC_TBL_NAME,
Lists.newArrayList("id", "catalog_id", "db_id", "tbl_id", "idx_id", "col_id", "part_id")));
Env.getCurrentEnv().getInternalCatalog().createTable(
buildStatisticsTblStmt(StatisticConstants.PARTITION_STATISTIC_TBL_NAME,
Lists.newArrayList("catalog_id", "db_id", "tbl_id", "idx_id", "part_name", "part_id",
"col_id")));
// audit table
Env.getCurrentEnv().getInternalCatalog().createTable(buildAuditTblStmt());
}
@VisibleForTesting
public static void createDb() {
CreateDatabaseCommand command = new CreateDatabaseCommand(true,
new DbName("internal", FeConstants.INTERNAL_DB_NAME), null);
try {
Env.getCurrentEnv().createDb(command);
} catch (DdlException e) {
LOG.warn("Failed to create database: {}, will try again later",
FeConstants.INTERNAL_DB_NAME, e);
}
}
private static CreateTableStmt buildStatisticsTblStmt(String statsTableName, List<String> uniqueKeys)
throws UserException {
TableName tableName = new TableName("", FeConstants.INTERNAL_DB_NAME, statsTableName);
String engineName = "olap";
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys);
DistributionDesc distributionDesc = new HashDistributionDesc(
StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, uniqueKeys);
Map<String, String> properties = new HashMap<String, String>() {
{
put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, String.valueOf(
Math.max(1, Config.min_replication_num_per_tablet)));
}
};
PropertyAnalyzer.getInstance().rewriteForceProperties(properties);
CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
tableName, InternalSchema.getCopiedSchema(statsTableName),
engineName, keysDesc, null, distributionDesc,
properties, null, "Doris internal statistics table, DO NOT MODIFY IT", null);
StatisticsUtil.analyze(createTableStmt);
return createTableStmt;
}
private static CreateTableStmt buildAuditTblStmt() throws UserException {
TableName tableName = new TableName("",
FeConstants.INTERNAL_DB_NAME, AuditLoader.AUDIT_LOG_TABLE);
String engineName = "olap";
ArrayList<String> dupKeys = Lists.newArrayList("query_id", "time", "client_ip");
KeysDesc keysDesc = new KeysDesc(KeysType.DUP_KEYS, dupKeys);
// partition
PartitionDesc partitionDesc = new RangePartitionDesc(Lists.newArrayList("time"), Lists.newArrayList());
// distribution
int bucketNum = 2;
DistributionDesc distributionDesc = new HashDistributionDesc(bucketNum, Lists.newArrayList("query_id"));
Map<String, String> properties = new HashMap<String, String>() {
{
put("dynamic_partition.time_unit", "DAY");
put("dynamic_partition.start", "-30");
put("dynamic_partition.end", "3");
put("dynamic_partition.prefix", "p");
put("dynamic_partition.buckets", String.valueOf(bucketNum));
put("dynamic_partition.enable", "true");
put("replication_num", String.valueOf(Math.max(1,
Config.min_replication_num_per_tablet)));
}
};
PropertyAnalyzer.getInstance().rewriteForceProperties(properties);
CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
tableName, InternalSchema.getCopiedSchema(AuditLoader.AUDIT_LOG_TABLE),
engineName, keysDesc, partitionDesc, distributionDesc,
properties, null, "Doris internal audit table, DO NOT MODIFY IT", null);
StatisticsUtil.analyze(createTableStmt);
return createTableStmt;
}
private boolean created() {
// 1. check database exist
Optional<Database> optionalDatabase =
Env.getCurrentEnv().getInternalCatalog()
.getDb(FeConstants.INTERNAL_DB_NAME);
if (!optionalDatabase.isPresent()) {
return false;
}
Database db = optionalDatabase.get();
Optional<Table> optionalTable = db.getTable(StatisticConstants.TABLE_STATISTIC_TBL_NAME);
if (!optionalTable.isPresent()) {
return false;
}
// 2. check statistic tables
Table statsTbl = optionalTable.get();
Optional<Column> optionalColumn =
statsTbl.fullSchema.stream().filter(c -> c.getName().equals("count")).findFirst();
if (!optionalColumn.isPresent() || !optionalColumn.get().isAllowNull()) {
try {
Env.getCurrentEnv().getInternalCatalog()
.dropTable(new DropTableStmt(true, new TableName(null,
StatisticConstants.DB_NAME, StatisticConstants.TABLE_STATISTIC_TBL_NAME), true));
} catch (Exception e) {
LOG.warn("Failed to drop outdated table", e);
}
return false;
}
optionalTable = db.getTable(StatisticConstants.PARTITION_STATISTIC_TBL_NAME);
if (!optionalTable.isPresent()) {
return false;
}
// 3. check audit table
optionalTable = db.getTable(AuditLoader.AUDIT_LOG_TABLE);
if (!optionalTable.isPresent()) {
return false;
}
// 4. check and update audit table schema
OlapTable auditTable = (OlapTable) optionalTable.get();
// 5. check if we need to add new columns
return alterAuditSchemaIfNeeded(auditTable);
}
private boolean alterAuditSchemaIfNeeded(OlapTable auditTable) {
List<ColumnDef> expectedSchema = InternalSchema.AUDIT_SCHEMA;
List<String> expectedColumnNames = expectedSchema.stream()
.map(ColumnDef::getName)
.map(String::toLowerCase)
.collect(Collectors.toList());
List<Column> currentColumns = auditTable.getBaseSchema();
List<String> currentColumnNames = currentColumns.stream()
.map(Column::getName)
.map(String::toLowerCase)
.collect(Collectors.toList());
// check if all expected columns are exists and in the right order
if (currentColumnNames.size() >= expectedColumnNames.size()
&& expectedColumnNames.equals(currentColumnNames.subList(0, expectedColumnNames.size()))) {
return true;
}
List<AlterTableOp> alterClauses = Lists.newArrayList();
// add new columns
List<Column> addColumns = Lists.newArrayList();
for (ColumnDef expected : expectedSchema) {
if (!currentColumnNames.contains(expected.getName().toLowerCase())) {
addColumns.add(new Column(expected.getName(), expected.getType(), expected.isAllowNull()));
}
}
if (!addColumns.isEmpty()) {
AddColumnsOp addColumnsOp = new AddColumnsOp(null, Maps.newHashMap(), addColumns);
alterClauses.add(addColumnsOp);
}
// reorder columns
List<String> removedColumnNames = Lists.newArrayList(currentColumnNames);
removedColumnNames.removeAll(expectedColumnNames);
List<String> newColumnOrders = Lists.newArrayList(expectedColumnNames);
newColumnOrders.addAll(removedColumnNames);
ReorderColumnsOp reorderColumnsOp = new ReorderColumnsOp(newColumnOrders, null, Maps.newHashMap());
alterClauses.add(reorderColumnsOp);
TableNameInfo auditTableName = new TableNameInfo(InternalCatalog.INTERNAL_CATALOG_NAME,
FeConstants.INTERNAL_DB_NAME, AuditLoader.AUDIT_LOG_TABLE);
AlterTableCommand alterTableCommand = new AlterTableCommand(auditTableName, alterClauses);
try {
Env.getCurrentEnv().alterTable(alterTableCommand);
} catch (Exception e) {
LOG.warn("Failed to alter audit table schema", e);
return false;
}
return true;
}
}