InternalSchemaInitializer.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.catalog;
import org.apache.doris.analysis.AlterClause;
import org.apache.doris.analysis.AlterTableStmt;
import org.apache.doris.analysis.ColumnDef;
import org.apache.doris.analysis.ColumnNullableType;
import org.apache.doris.analysis.ColumnPosition;
import org.apache.doris.analysis.CreateTableStmt;
import org.apache.doris.analysis.DbName;
import org.apache.doris.analysis.DistributionDesc;
import org.apache.doris.analysis.DropTableStmt;
import org.apache.doris.analysis.HashDistributionDesc;
import org.apache.doris.analysis.KeysDesc;
import org.apache.doris.analysis.ModifyColumnClause;
import org.apache.doris.analysis.ModifyPartitionClause;
import org.apache.doris.analysis.PartitionDesc;
import org.apache.doris.analysis.RangePartitionDesc;
import org.apache.doris.analysis.TableName;
import org.apache.doris.analysis.TypeDef;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.ha.FrontendNodeType;
import org.apache.doris.nereids.trees.plans.commands.CreateDatabaseCommand;
import org.apache.doris.plugin.audit.AuditLoader;
import org.apache.doris.statistics.StatisticConstants;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
public class InternalSchemaInitializer extends Thread {
private static final Logger LOG = LogManager.getLogger(InternalSchemaInitializer.class);
public InternalSchemaInitializer() {
super("InternalSchemaInitializer");
}
public void run() {
if (!FeConstants.enableInternalSchemaDb) {
return;
}
modifyColumnStatsTblSchema();
while (!created()) {
try {
FrontendNodeType feType = Env.getCurrentEnv().getFeType();
if (feType.equals(FrontendNodeType.INIT) || feType.equals(FrontendNodeType.UNKNOWN)) {
LOG.warn("FE is not ready");
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
continue;
}
Thread.currentThread()
.join(Config.resource_not_ready_sleep_seconds * 1000L);
createDb();
createTbl();
} catch (Throwable e) {
LOG.warn("Statistics storage initiated failed, will try again later", e);
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException ex) {
LOG.info("Sleep interrupted. {}", ex.getMessage());
}
}
}
LOG.info("Internal schema is initialized");
Optional<Database> op
= Env.getCurrentEnv().getInternalCatalog().getDb(StatisticConstants.DB_NAME);
if (!op.isPresent()) {
LOG.warn("Internal DB got deleted!");
return;
}
Database database = op.get();
modifyTblReplicaCount(database, StatisticConstants.TABLE_STATISTIC_TBL_NAME);
modifyTblReplicaCount(database, StatisticConstants.PARTITION_STATISTIC_TBL_NAME);
modifyTblReplicaCount(database, AuditLoader.AUDIT_LOG_TABLE);
}
public void modifyColumnStatsTblSchema() {
while (true) {
try {
Table table = findStatsTable();
if (table == null) {
break;
}
table.writeLock();
try {
doSchemaChange(table);
break;
} finally {
table.writeUnlock();
}
} catch (Throwable t) {
LOG.warn("Failed to do schema change for stats table. Try again later.", t);
}
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException t) {
// IGNORE
}
}
}
public Table findStatsTable() {
// 1. check database exist
Optional<Database> dbOpt = Env.getCurrentEnv().getInternalCatalog().getDb(FeConstants.INTERNAL_DB_NAME);
if (!dbOpt.isPresent()) {
return null;
}
// 2. check table exist
Database db = dbOpt.get();
Optional<Table> tableOp = db.getTable(StatisticConstants.TABLE_STATISTIC_TBL_NAME);
return tableOp.orElse(null);
}
public void doSchemaChange(Table table) throws UserException {
List<AlterClause> clauses = getModifyColumnClauses(table);
if (!clauses.isEmpty()) {
TableName tableName = new TableName(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, table.getName());
AlterTableStmt alter = new AlterTableStmt(tableName, clauses);
Env.getCurrentEnv().alterTable(alter);
}
}
public List<AlterClause> getModifyColumnClauses(Table table) {
List<AlterClause> clauses = Lists.newArrayList();
for (Column col : table.fullSchema) {
if (col.isKey() && col.getType().isVarchar()
&& col.getType().getLength() < StatisticConstants.MAX_NAME_LEN) {
TypeDef typeDef = new TypeDef(
ScalarType.createVarchar(StatisticConstants.MAX_NAME_LEN), col.isAllowNull());
ColumnNullableType nullableType =
col.isAllowNull() ? ColumnNullableType.NULLABLE : ColumnNullableType.NOT_NULLABLE;
ColumnDef columnDef = new ColumnDef(col.getName(), typeDef, true, null,
nullableType, -1, new ColumnDef.DefaultValue(false, null), "");
try {
columnDef.analyze(true);
} catch (AnalysisException e) {
LOG.warn("Failed to analyze column {}", col.getName());
continue;
}
ModifyColumnClause clause = new ModifyColumnClause(columnDef, null, null, Maps.newHashMap());
clause.setColumn(columnDef.toColumn());
clauses.add(clause);
}
}
return clauses;
}
@VisibleForTesting
public static void modifyTblReplicaCount(Database database, String tblName) {
if (Config.isCloudMode()
|| Config.min_replication_num_per_tablet >= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM
|| Config.max_replication_num_per_tablet < StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
return;
}
while (true) {
int backendNum = Env.getCurrentSystemInfo().getStorageBackendNumFromDiffHosts(true);
if (FeConstants.runningUnitTest) {
backendNum = Env.getCurrentSystemInfo().getAllBackendIds().size();
}
if (backendNum >= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
try {
OlapTable tbl = (OlapTable) StatisticsUtil.findTable(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, tblName);
tbl.writeLock();
try {
if (tbl.getTableProperty().getReplicaAllocation().getTotalReplicaNum()
>= StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM) {
return;
}
if (!tbl.isPartitionedTable()) {
Map<String, String> props = new HashMap<>();
props.put(PropertyAnalyzer.PROPERTIES_REPLICATION_ALLOCATION, "tag.location.default: "
+ StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
Env.getCurrentEnv().modifyTableReplicaAllocation(database, tbl, props);
} else {
TableName tableName = new TableName(InternalCatalog.INTERNAL_CATALOG_NAME,
StatisticConstants.DB_NAME, tbl.getName());
// 1. modify table's default replica num
Map<String, String> props = new HashMap<>();
props.put("default." + PropertyAnalyzer.PROPERTIES_REPLICATION_NUM,
"" + StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
Env.getCurrentEnv().modifyTableDefaultReplicaAllocation(database, tbl, props);
// 2. modify each partition's replica num
List<AlterClause> clauses = Lists.newArrayList();
props.clear();
props.put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM,
"" + StatisticConstants.STATISTIC_INTERNAL_TABLE_REPLICA_NUM);
clauses.add(ModifyPartitionClause.createStarClause(props, false));
AlterTableStmt alter = new AlterTableStmt(tableName, clauses);
Env.getCurrentEnv().alterTable(alter);
}
} finally {
tbl.writeUnlock();
}
break;
} catch (Throwable t) {
LOG.warn("Failed to scale replica of stats tbl:{} to 3", tblName, t);
}
}
try {
Thread.sleep(Config.resource_not_ready_sleep_seconds * 1000);
} catch (InterruptedException t) {
// IGNORE
}
}
}
@VisibleForTesting
public static void createTbl() throws UserException {
// statistics
Env.getCurrentEnv().getInternalCatalog().createTable(
buildStatisticsTblStmt(StatisticConstants.TABLE_STATISTIC_TBL_NAME,
Lists.newArrayList("id", "catalog_id", "db_id", "tbl_id", "idx_id", "col_id", "part_id")));
Env.getCurrentEnv().getInternalCatalog().createTable(
buildStatisticsTblStmt(StatisticConstants.PARTITION_STATISTIC_TBL_NAME,
Lists.newArrayList("catalog_id", "db_id", "tbl_id", "idx_id", "part_name", "part_id",
"col_id")));
// audit table
Env.getCurrentEnv().getInternalCatalog().createTable(buildAuditTblStmt());
}
@VisibleForTesting
public static void createDb() {
CreateDatabaseCommand command = new CreateDatabaseCommand(true,
new DbName("internal", FeConstants.INTERNAL_DB_NAME), null);
try {
Env.getCurrentEnv().createDb(command);
} catch (DdlException e) {
LOG.warn("Failed to create database: {}, will try again later",
FeConstants.INTERNAL_DB_NAME, e);
}
}
private static CreateTableStmt buildStatisticsTblStmt(String statsTableName, List<String> uniqueKeys)
throws UserException {
TableName tableName = new TableName("", FeConstants.INTERNAL_DB_NAME, statsTableName);
String engineName = "olap";
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys);
DistributionDesc distributionDesc = new HashDistributionDesc(
StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, uniqueKeys);
Map<String, String> properties = new HashMap<String, String>() {
{
put(PropertyAnalyzer.PROPERTIES_REPLICATION_NUM, String.valueOf(
Math.max(1, Config.min_replication_num_per_tablet)));
}
};
PropertyAnalyzer.getInstance().rewriteForceProperties(properties);
CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
tableName, InternalSchema.getCopiedSchema(statsTableName),
engineName, keysDesc, null, distributionDesc,
properties, null, "Doris internal statistics table, DO NOT MODIFY IT", null);
StatisticsUtil.analyze(createTableStmt);
return createTableStmt;
}
private static CreateTableStmt buildAuditTblStmt() throws UserException {
TableName tableName = new TableName("",
FeConstants.INTERNAL_DB_NAME, AuditLoader.AUDIT_LOG_TABLE);
String engineName = "olap";
ArrayList<String> dupKeys = Lists.newArrayList("query_id", "time", "client_ip");
KeysDesc keysDesc = new KeysDesc(KeysType.DUP_KEYS, dupKeys);
// partition
PartitionDesc partitionDesc = new RangePartitionDesc(Lists.newArrayList("time"), Lists.newArrayList());
// distribution
int bucketNum = 2;
DistributionDesc distributionDesc = new HashDistributionDesc(bucketNum, Lists.newArrayList("query_id"));
Map<String, String> properties = new HashMap<String, String>() {
{
put("dynamic_partition.time_unit", "DAY");
put("dynamic_partition.start", "-30");
put("dynamic_partition.end", "3");
put("dynamic_partition.prefix", "p");
put("dynamic_partition.buckets", String.valueOf(bucketNum));
put("dynamic_partition.enable", "true");
put("replication_num", String.valueOf(Math.max(1,
Config.min_replication_num_per_tablet)));
}
};
PropertyAnalyzer.getInstance().rewriteForceProperties(properties);
CreateTableStmt createTableStmt = new CreateTableStmt(true, false,
tableName, InternalSchema.getCopiedSchema(AuditLoader.AUDIT_LOG_TABLE),
engineName, keysDesc, partitionDesc, distributionDesc,
properties, null, "Doris internal audit table, DO NOT MODIFY IT", null);
StatisticsUtil.analyze(createTableStmt);
return createTableStmt;
}
private boolean created() {
// 1. check database exist
Optional<Database> optionalDatabase =
Env.getCurrentEnv().getInternalCatalog()
.getDb(FeConstants.INTERNAL_DB_NAME);
if (!optionalDatabase.isPresent()) {
return false;
}
Database db = optionalDatabase.get();
Optional<Table> optionalStatsTbl = db.getTable(StatisticConstants.TABLE_STATISTIC_TBL_NAME);
if (!optionalStatsTbl.isPresent()) {
return false;
}
// 2. check statistic tables
Table statsTbl = optionalStatsTbl.get();
Optional<Column> optionalColumn =
statsTbl.fullSchema.stream().filter(c -> c.getName().equals("count")).findFirst();
if (!optionalColumn.isPresent() || !optionalColumn.get().isAllowNull()) {
try {
Env.getCurrentEnv().getInternalCatalog()
.dropTable(new DropTableStmt(true, new TableName(null,
StatisticConstants.DB_NAME, StatisticConstants.TABLE_STATISTIC_TBL_NAME), true));
} catch (Exception e) {
LOG.warn("Failed to drop outdated table", e);
}
return false;
}
optionalStatsTbl = db.getTable(StatisticConstants.PARTITION_STATISTIC_TBL_NAME);
if (!optionalStatsTbl.isPresent()) {
return false;
}
// 3. check audit table
optionalStatsTbl = db.getTable(AuditLoader.AUDIT_LOG_TABLE);
if (!optionalStatsTbl.isPresent()) {
return false;
}
// 4. check and update audit table schema
OlapTable auditTable = (OlapTable) optionalStatsTbl.get();
List<ColumnDef> expectedSchema = InternalSchema.AUDIT_SCHEMA;
// 5. check if we need to add new columns
List<AlterClause> alterClauses = Lists.newArrayList();
for (int i = 0; i < expectedSchema.size(); i++) {
ColumnDef def = expectedSchema.get(i);
if (auditTable.getColumn(def.getName()) == null) {
// add column if it doesn't exist
try {
ColumnDef columnDef = new ColumnDef(def.getName(), def.getTypeDef(), def.isAllowNull());
// find the previous column name to determine the position
String afterColumn = null;
if (i > 0) {
for (int j = i - 1; j >= 0; j--) {
String prevColName = expectedSchema.get(j).getName();
if (auditTable.getColumn(prevColName) != null) {
afterColumn = prevColName;
break;
}
}
}
ColumnPosition position = afterColumn == null ? ColumnPosition.FIRST :
new ColumnPosition(afterColumn);
ModifyColumnClause clause = new ModifyColumnClause(columnDef, position, null,
Maps.newHashMap());
clause.setColumn(columnDef.toColumn());
alterClauses.add(clause);
} catch (Exception e) {
LOG.warn("Failed to create alter clause for column: " + def.getName(), e);
return false;
}
}
}
// apply schema changes if needed
if (!alterClauses.isEmpty()) {
try {
TableName tableName = new TableName(InternalCatalog.INTERNAL_CATALOG_NAME,
FeConstants.INTERNAL_DB_NAME, AuditLoader.AUDIT_LOG_TABLE);
AlterTableStmt alterStmt = new AlterTableStmt(tableName, alterClauses);
Env.getCurrentEnv().alterTable(alterStmt);
} catch (Exception e) {
LOG.warn("Failed to alter audit table schema", e);
return false;
}
}
return true;
}
}