FileFormatProperties.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.property.fileformat;

import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.thrift.TFileAttributes;
import org.apache.doris.thrift.TFileCompressType;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TResultFileSinkOptions;
import org.apache.doris.thrift.TTextSerdeType;

import java.util.Map;

public abstract class FileFormatProperties {
    public static final String PROP_FORMAT = "format";
    public static final String FORMAT_PARQUET = "parquet";
    public static final String FORMAT_CSV = "csv";
    public static final String FORMAT_CSV_WITH_NAMES = "csv_with_names";
    public static final String FORMAT_CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types";
    public static final String FORMAT_HIVE_TEXT = "hive_text";
    public static final String FORMAT_ORC = "orc";
    public static final String FORMAT_JSON = "json";
    public static final String FORMAT_AVRO = "avro";
    public static final String FORMAT_WAL = "wal";
    public static final String FORMAT_ARROW = "arrow";
    public static final String PROP_COMPRESS_TYPE = "compress_type";

    protected String formatName;
    protected TFileFormatType fileFormatType;

    protected TFileCompressType compressionType;

    public FileFormatProperties(TFileFormatType fileFormatType, String formatName) {
        this.fileFormatType = fileFormatType;
        this.formatName = formatName;
    }

    /**
     * Analyze user properties
     * @param formatProperties properties specified by user
     * @param isRemoveOriginProperty if this param is set to true, then this method would remove the origin property
     * @throws AnalysisException
     */
    public abstract void analyzeFileFormatProperties(
            Map<String, String> formatProperties, boolean isRemoveOriginProperty)
            throws AnalysisException;

    /**
     * generate TResultFileSinkOptions according to the properties of specified file format
     * You must call method `analyzeFileFormatProperties` once before calling method `toTResultFileSinkOptions`
     */
    public abstract void fullTResultFileSinkOptions(TResultFileSinkOptions sinkOptions);

    /**
     * generate TFileAttributes according to the properties of specified file format
     * You must call method `analyzeFileFormatProperties` once before calling method `toTFileAttributes`
     */
    public abstract TFileAttributes toTFileAttributes();

    public static FileFormatProperties createFileFormatProperties(String formatString) {
        if (formatString == null) {
            throw new AnalysisException("formatString can not be null");
        }
        switch (formatString.toLowerCase()) {
            case FORMAT_CSV:
                return new CsvFileFormatProperties(formatString);
            case FORMAT_HIVE_TEXT:
                return new CsvFileFormatProperties(CsvFileFormatProperties.DEFAULT_HIVE_TEXT_COLUMN_SEPARATOR,
                        TTextSerdeType.HIVE_TEXT_SERDE, formatString);
            case FORMAT_CSV_WITH_NAMES:
                return new CsvFileFormatProperties(FORMAT_CSV_WITH_NAMES, formatString);
            case FORMAT_CSV_WITH_NAMES_AND_TYPES:
                return new CsvFileFormatProperties(FORMAT_CSV_WITH_NAMES_AND_TYPES, formatString);
            case FORMAT_PARQUET:
                return new ParquetFileFormatProperties();
            case FORMAT_ORC:
                return new OrcFileFormatProperties();
            case FORMAT_JSON:
                return new JsonFileFormatProperties();
            case FORMAT_AVRO:
                return new AvroFileFormatProperties();
            case FORMAT_WAL:
                return new WalFileFormatProperties();
            case FORMAT_ARROW:
                return new ArrowFileFormatProperties();
            default:
                throw new AnalysisException("format:" + formatString + " is not supported.");
        }
    }

    public static FileFormatProperties createFileFormatProperties(Map<String, String> formatProperties)
            throws AnalysisException {
        String formatString = formatProperties.getOrDefault(PROP_FORMAT, "csv");
        return createFileFormatProperties(formatString);
    }

    protected String getOrDefault(Map<String, String> props, String key, String defaultValue,
            boolean isRemove) {
        String value = props.getOrDefault(key, defaultValue);
        if (isRemove) {
            props.remove(key);
        }
        return value;
    }

    public TFileFormatType getFileFormatType() {
        return fileFormatType;
    }

    public TFileCompressType getCompressionType() {
        return compressionType;
    }

    public String getFormatName() {
        return formatName;
    }
}