BaseExternalTableDataSink.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/DataSink.java
// and modified by Doris
package org.apache.doris.planner;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.FsBroker;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.nereids.trees.plans.commands.insert.InsertCommandContext;
import org.apache.doris.thrift.TDataSink;
import org.apache.doris.thrift.TFileCompressType;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TNetworkAddress;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
public abstract class BaseExternalTableDataSink extends DataSink {
protected TDataSink tDataSink;
@Override
protected TDataSink toThrift() {
return tDataSink;
}
@Override
public PlanNodeId getExchNodeId() {
return null;
}
@Override
public DataPartition getOutputPartition() {
return DataPartition.RANDOM;
}
/**
* File format types supported by the current table
*/
protected abstract Set<TFileFormatType> supportedFileFormatTypes();
protected List<TNetworkAddress> getBrokerAddresses(String bindBroker) throws AnalysisException {
List<FsBroker> brokers;
if (bindBroker != null) {
brokers = Env.getCurrentEnv().getBrokerMgr().getBrokers(bindBroker);
} else {
brokers = Env.getCurrentEnv().getBrokerMgr().getAllBrokers();
}
if (brokers == null || brokers.isEmpty()) {
throw new AnalysisException("No alive broker.");
}
Collections.shuffle(brokers);
return brokers.stream().map(broker -> new TNetworkAddress(broker.host, broker.port))
.collect(Collectors.toList());
}
protected TFileFormatType getTFileFormatType(String format) throws AnalysisException {
TFileFormatType fileFormatType = TFileFormatType.FORMAT_UNKNOWN;
String lowerCase = format.toLowerCase();
if (lowerCase.contains("orc")) {
fileFormatType = TFileFormatType.FORMAT_ORC;
} else if (lowerCase.contains("parquet")) {
fileFormatType = TFileFormatType.FORMAT_PARQUET;
} else if (lowerCase.contains("text")) {
fileFormatType = TFileFormatType.FORMAT_CSV_PLAIN;
}
if (!supportedFileFormatTypes().contains(fileFormatType)) {
throw new AnalysisException("Unsupported input format type: " + format);
}
return fileFormatType;
}
protected TFileCompressType getTFileCompressType(String compressType) {
if ("snappy".equalsIgnoreCase(compressType)) {
return TFileCompressType.SNAPPYBLOCK;
} else if ("lz4".equalsIgnoreCase(compressType)) {
return TFileCompressType.LZ4BLOCK;
} else if ("lzo".equalsIgnoreCase(compressType)) {
return TFileCompressType.LZO;
} else if ("zlib".equalsIgnoreCase(compressType)) {
return TFileCompressType.ZLIB;
} else if ("zstd".equalsIgnoreCase(compressType)) {
return TFileCompressType.ZSTD;
} else if ("gzip".equalsIgnoreCase(compressType)) {
return TFileCompressType.GZ;
} else if ("bzip2".equalsIgnoreCase(compressType)) {
return TFileCompressType.BZ2;
} else if ("uncompressed".equalsIgnoreCase(compressType)) {
return TFileCompressType.PLAIN;
} else {
// try to use plain type to decompress parquet or orc file
return TFileCompressType.PLAIN;
}
}
/**
* check sink params and generate thrift data sink to BE
* @param insertCtx insert info context
* @throws AnalysisException if source file format cannot be read
*/
public abstract void bindDataSink(Optional<InsertCommandContext> insertCtx) throws AnalysisException;
}