InternalSqlTemplate.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.statistics.util;
import org.apache.doris.common.InvalidFormatException;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Template for building internal query SQL statements.
* e.g.:
* - "SELECT ${column} FROM ${table}"
* - ${column} and ${table} will be replaced with the actual executed table and column.
*/
public class InternalSqlTemplate {
/** Sample query or full query of statistics. */
public enum QueryType {
FULL,
SAMPLE
}
/** common parameters: tableName, columnName, partitionName */
public static final String TABLE = "table";
public static final String PARTITION = "partition";
public static final String COLUMN = "column";
public static final String PERCENT = "percent";
/** -------------------------- for statistics begin -------------------------- */
public static final String MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value, MAX(${column})"
+ " AS max_value, NDV(${column}) AS ndv FROM ${table};";
public static final String PARTITION_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value,"
+ " MAX(${column}) AS max_value, NDV(${column}) AS ndv FROM ${table} PARTITIONS (${partition});";
public static final String ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table};";
public static final String PARTITION_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table} PARTITION"
+ " (${partition});";
public static final String MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size,"
+ " AVG(LENGTH(${column})) AS avg_size FROM ${table};";
public static final String PARTITION_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column}))"
+ " AS max_size, AVG(LENGTH(${column})) AS avg_size FROM ${table} PARTITIONS (${partition});";
public static final String NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM ${table}"
+ " WHERE ${column} IS NULL;";
public static final String PARTITION_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM"
+ " ${table} PARTITIONS (${partition}) WHERE ${column} IS NULL;";
// Sample SQL
public static final String SAMPLE_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value, MAX(${column})"
+ " AS max_value, NDV(${column}) AS ndv FROM ${table} TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_PARTITION_MIN_MAX_NDV_VALUE_SQL = "SELECT MIN(${column}) AS min_value,"
+ " MAX(${column}) AS max_value, NDV(${column}) AS ndv FROM ${table} PARTITIONS (${partition})"
+ " TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table}"
+ " TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_PARTITION_ROW_COUNT_SQL = "SELECT COUNT(1) AS row_count FROM ${table}"
+ " PARTITIONS (${partition}) TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column})) AS max_size,"
+ " AVG(LENGTH(${column})) AS avg_size FROM ${table} TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_PARTITION_MAX_AVG_SIZE_SQL = "SELECT MAX(LENGTH(${column}))"
+ " AS max_size, AVG(LENGTH(${column})) AS avg_size FROM ${table} PARTITIONS (${partition})"
+ " TABLESAMPLE(${percent} PERCENT);";
public static final String SAMPLE_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM ${table}"
+ " TABLESAMPLE(${percent} PERCENT) WHERE ${column} IS NULL;";
public static final String SAMPLE_PARTITION_NUM_NULLS_SQL = "SELECT COUNT(1) AS num_nulls FROM"
+ " ${table} PARTITIONS (${partition}) TABLESAMPLE(${percent} PERCENT) WHERE ${column} IS NULL;";
/** ---------------------------- for statistics end ---------------------------- */
private static final Logger LOG = LogManager.getLogger(InternalSqlTemplate.class);
private static final Pattern PATTERN = Pattern.compile("\\$\\{\\w+\\}");
/**
* Concatenate SQL statements based on templates and parameters. e.g.:
* template and parameters:
* 'SELECT ${col} FROM ${table} WHERE id = ${id};',
* parameters: {col=colName, table=tableName, id=1}
* result sql: 'SELECT colName FROM tableName WHERE id = 1;
* <p>
*
* @param template sql template
* @param params k,v parameter, if without parameter, params should be null
* @return SQL statement with parameters concatenated
*/
public static String processTemplate(String template, Map<String, String> params) throws InvalidFormatException {
Set<String> requiredParams = getTemplateParams(template);
if (!checkParams(requiredParams, params)) {
throw new InvalidFormatException("Wrong parameter format. need params: " + requiredParams);
}
Matcher matcher = PATTERN.matcher(template);
StringBuffer sb = new StringBuffer();
while (matcher.find()) {
String param = matcher.group();
String value = params.get(param.substring(2, param.length() - 1));
matcher.appendReplacement(sb, value == null ? "" : value);
}
matcher.appendTail(sb);
if (LOG.isDebugEnabled()) {
LOG.debug("Template:{}, params: {}, SQL: {}", template, params, sb.toString());
}
return sb.toString();
}
public static String buildStatsMinMaxNdvValueSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(MIN_MAX_NDV_VALUE_SQL, params);
}
return processTemplate(SAMPLE_MIN_MAX_NDV_VALUE_SQL, params);
}
public static String buildStatsPartitionMinMaxNdvValueSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(PARTITION_MIN_MAX_NDV_VALUE_SQL, params);
}
return processTemplate(SAMPLE_PARTITION_MIN_MAX_NDV_VALUE_SQL, params);
}
public static String buildStatsRowCountSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(ROW_COUNT_SQL, params);
}
return processTemplate(SAMPLE_ROW_COUNT_SQL, params);
}
public static String buildStatsPartitionRowCountSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(PARTITION_ROW_COUNT_SQL, params);
}
return processTemplate(SAMPLE_PARTITION_ROW_COUNT_SQL, params);
}
public static String buildStatsMaxAvgSizeSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(MAX_AVG_SIZE_SQL, params);
}
return processTemplate(SAMPLE_MAX_AVG_SIZE_SQL, params);
}
// SAMPLE_PARTITION_MAX_AVG_SIZE_SQL
public static String buildStatsPartitionMaxAvgSizeSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(PARTITION_MAX_AVG_SIZE_SQL, params);
}
return processTemplate(SAMPLE_PARTITION_MAX_AVG_SIZE_SQL, params);
}
public static String buildStatsNumNullsSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(NUM_NULLS_SQL, params);
}
return processTemplate(SAMPLE_NUM_NULLS_SQL, params);
}
public static String buildStatsPartitionNumNullsSql(Map<String, String> params, QueryType queryType)
throws InvalidFormatException {
if (queryType == QueryType.FULL) {
return processTemplate(PARTITION_NUM_NULLS_SQL, params);
}
return processTemplate(SAMPLE_PARTITION_NUM_NULLS_SQL, params);
}
private static Set<String> getTemplateParams(String template) {
Matcher matcher = PATTERN.matcher(template);
Set<String> requiredParams = Sets.newHashSet();
while (matcher.find()) {
String param = matcher.group();
String value = param.substring(2, param.length() - 1);
requiredParams.add(value);
}
return requiredParams;
}
private static boolean checkParams(Set<String> requiredParams, Map<String, String> params) {
if (params != null && !params.isEmpty()) {
Set<String> paramsSet = params.keySet();
return paramsSet.containsAll(requiredParams);
} else {
return requiredParams == null;
}
}
}