FileFormatUtils.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common.util;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.FeNameFormat;
import com.google.common.base.Strings;
import java.util.List;
import java.util.Optional;
import java.util.regex.Matcher;
public class FileFormatUtils {
public static boolean isCsv(String formatStr) {
return FileFormatConstants.FORMAT_CSV.equalsIgnoreCase(formatStr)
|| FileFormatConstants.FORMAT_CSV_WITH_NAMES.equalsIgnoreCase(formatStr)
|| FileFormatConstants.FORMAT_CSV_WITH_NAMES_AND_TYPES.equalsIgnoreCase(formatStr)
|| FileFormatConstants.FORMAT_HIVE_TEXT.equalsIgnoreCase(formatStr);
}
// public for unit test
public static void parseCsvSchema(List<Column> csvSchema, String csvSchemaStr)
throws AnalysisException {
if (Strings.isNullOrEmpty(csvSchemaStr)) {
return;
}
// the schema str is like: "k1:int;k2:bigint;k3:varchar(20);k4:datetime(6)"
String[] schemaStrs = csvSchemaStr.split(";");
try {
for (String schemaStr : schemaStrs) {
String[] kv = schemaStr.replace(" ", "").split(":");
if (kv.length != 2) {
throw new AnalysisException("invalid csv schema: " + csvSchemaStr);
}
Column column = null;
String name = kv[0].toLowerCase();
FeNameFormat.checkColumnName(name);
String type = kv[1].toLowerCase();
if (type.equals("tinyint")) {
column = new Column(name, PrimitiveType.TINYINT, true);
} else if (type.equals("smallint")) {
column = new Column(name, PrimitiveType.SMALLINT, true);
} else if (type.equals("int")) {
column = new Column(name, PrimitiveType.INT, true);
} else if (type.equals("bigint")) {
column = new Column(name, PrimitiveType.BIGINT, true);
} else if (type.equals("largeint")) {
column = new Column(name, PrimitiveType.LARGEINT, true);
} else if (type.equals("float")) {
column = new Column(name, PrimitiveType.FLOAT, true);
} else if (type.equals("double")) {
column = new Column(name, PrimitiveType.DOUBLE, true);
} else if (type.startsWith("decimal")) {
// regex decimal(p, s)
Matcher matcher = FileFormatConstants.DECIMAL_TYPE_PATTERN.matcher(type);
if (!matcher.find()) {
throw new AnalysisException("invalid decimal type: " + type);
}
int precision = Integer.parseInt(matcher.group(1));
int scale = Integer.parseInt(matcher.group(2));
column = new Column(name, ScalarType.createDecimalV3Type(precision, scale), false, null, true, null,
"");
} else if (type.equals("date")) {
column = new Column(name, ScalarType.createDateType(), false, null, true, null, "");
} else if (type.startsWith("datetime")) {
int scale = 0;
if (!type.equals("datetime")) {
// regex datetime(s)
Matcher matcher = FileFormatConstants.DATETIME_TYPE_PATTERN.matcher(type);
if (!matcher.find()) {
throw new AnalysisException("invalid datetime type: " + type);
}
scale = Integer.parseInt(matcher.group(1));
}
column = new Column(name, ScalarType.createDatetimeV2Type(scale), false, null, true, null, "");
} else if (type.equals("string")) {
column = new Column(name, PrimitiveType.STRING, true);
} else if (type.equals("boolean")) {
column = new Column(name, PrimitiveType.BOOLEAN, true);
} else {
throw new AnalysisException("unsupported column type: " + type);
}
csvSchema.add(column);
}
} catch (Exception e) {
throw new AnalysisException("invalid csv schema: " + e.getMessage());
}
}
public static Optional<String> getFileFormatBySuffix(String filename) {
String fileString = filename.toLowerCase();
if (fileString.endsWith(".avro")) {
return Optional.of("avro");
} else if (fileString.endsWith(".orc")) {
return Optional.of("orc");
} else if (fileString.endsWith(".parquet")) {
return Optional.of("parquet");
} else {
// Unable to get file format from file path
return Optional.empty();
}
}
}