FilesetScanNode.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.planner;
import org.apache.doris.analysis.TupleDescriptor;
import org.apache.doris.catalog.FilesetTable;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.FederationBackendPolicy;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.system.Backend;
import org.apache.doris.thrift.TExplainLevel;
import org.apache.doris.thrift.TExternalScanRange;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TFileRangeDesc;
import org.apache.doris.thrift.TFileScanNode;
import org.apache.doris.thrift.TFileScanRange;
import org.apache.doris.thrift.TFileScanRangeParams;
import org.apache.doris.thrift.TFileScanSlotInfo;
import org.apache.doris.thrift.TNetworkAddress;
import org.apache.doris.thrift.TPlanNode;
import org.apache.doris.thrift.TPlanNodeType;
import org.apache.doris.thrift.TScanRange;
import org.apache.doris.thrift.TScanRangeLocation;
import org.apache.doris.thrift.TScanRangeLocations;
import org.apache.doris.thrift.TTableFormatFileDesc;
import com.google.common.collect.Lists;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Scan node for FilesetTable.
* The BE FilesetReader is invoked via the file_scanner when
* table_format_type == "fileset".
*/
public class FilesetScanNode extends ScanNode {
private static final Logger LOG = LogManager.getLogger(FilesetScanNode.class);
private final FilesetTable filesetTable;
public FilesetScanNode(PlanNodeId id, TupleDescriptor desc, ScanContext scanContext) {
super(id, desc, "SCAN FILESET", scanContext);
this.filesetTable = (FilesetTable) desc.getTable();
}
@Override
public void finalizeForNereids() throws UserException {
createScanRangeLocations();
}
@Override
protected void toThrift(TPlanNode msg) {
msg.node_type = TPlanNodeType.FILE_SCAN_NODE;
TFileScanNode fileScanNode = new TFileScanNode();
fileScanNode.setTupleId(desc.getId().asInt());
fileScanNode.setTableName(filesetTable.getName());
msg.file_scan_node = fileScanNode;
}
@Override
protected void createScanRangeLocations() throws UserException {
FederationBackendPolicy backendPolicy = new FederationBackendPolicy();
backendPolicy.init();
int numBEs = backendPolicy.numBackends();
// Each shard independently lists the full directory and
// then keeps only its hash-partition, so too many shards on one BE
// cause S3 throttling (503 Slow Down) and extreme long-tail latency.
int parallelPerBE = Math.min(getParallelExecInstanceNum(), 4);
int totalShards = parallelPerBE * numBEs;
// Build shared TFileScanRangeParams (same for all shards)
TFileScanRangeParams scanParams = new TFileScanRangeParams();
scanParams.setDestTupleId(desc.getId().asInt());
scanParams.setNumOfColumnsFromFile(desc.getTable().getBaseSchema(false).size());
scanParams.setSrcTupleId(-1);
for (org.apache.doris.analysis.SlotDescriptor slot : desc.getSlots()) {
TFileScanSlotInfo slotInfo = new TFileScanSlotInfo();
slotInfo.setSlotId(slot.getId().asInt());
slotInfo.setIsFileSlot(true);
scanParams.addToRequiredSlots(slotInfo);
}
scanRangeLocations = Lists.newArrayListWithCapacity(totalShards);
for (int shardId = 0; shardId < totalShards; shardId++) {
// Per-shard fileset_params with shard routing info
Map<String, String> filesetParams = new HashMap<>(filesetTable.getBackendProperties());
filesetParams.put("table_path", filesetTable.getTablePath());
filesetParams.put("file_type", filesetTable.getFileType().name());
filesetParams.put("file_pattern", filesetTable.getFilePattern());
filesetParams.put("shard_id", String.valueOf(shardId));
filesetParams.put("total_shards", String.valueOf(totalShards));
TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc();
tableFormatFileDesc.setTableFormatType("fileset");
tableFormatFileDesc.setFilesetParams(filesetParams);
TFileRangeDesc rangeDesc = new TFileRangeDesc();
rangeDesc.setPath(filesetTable.getTablePath());
rangeDesc.setStartOffset(0);
rangeDesc.setSize(-1);
rangeDesc.setFileType(filesetTable.getFileType());
rangeDesc.setFormatType(TFileFormatType.FORMAT_MULTIDATA);
rangeDesc.setTableFormatParams(tableFormatFileDesc);
TFileScanRange fileScanRange = new TFileScanRange();
fileScanRange.addToRanges(rangeDesc);
fileScanRange.setParams(scanParams);
TExternalScanRange externalScanRange = new TExternalScanRange();
externalScanRange.setFileScanRange(fileScanRange);
TScanRange scanRange = new TScanRange();
scanRange.setExtScanRange(externalScanRange);
// Round-robin assignment across BEs
Backend be = backendPolicy.getNextBe();
scanBackendIds.add(be.getId());
TScanRangeLocations locations = new TScanRangeLocations();
TScanRangeLocation location = new TScanRangeLocation();
location.setServer(new TNetworkAddress(be.getHost(), be.getBePort()));
location.setBackendId(be.getId());
locations.addToLocations(location);
locations.setScanRange(scanRange);
scanRangeLocations.add(locations);
}
}
private int getParallelExecInstanceNum() {
ConnectContext context = ConnectContext.get();
if (context != null) {
return Math.max(context.getSessionVariable()
.getParallelExecInstanceNum(scanContext.getClusterName()), 1);
}
return 1;
}
@Override
public List<TScanRangeLocations> getScanRangeLocations(long maxScanRangeLength) {
return scanRangeLocations;
}
@Override
public int getNumInstances() {
return scanRangeLocations != null ? scanRangeLocations.size() : 1;
}
@Override
public int getScanRangeNum() {
return scanRangeLocations != null ? scanRangeLocations.size() : 1;
}
@Override
public String getNodeExplainString(String prefix, TExplainLevel detailLevel) {
StringBuilder output = new StringBuilder();
output.append(prefix).append("TABLE: ").append(filesetTable.getName()).append("\n");
output.append(prefix).append("LOCATION: ").append(filesetTable.getLocation()).append("\n");
int shards = scanRangeLocations != null ? scanRangeLocations.size() : 1;
output.append(prefix).append("SHARDS: ").append(shards)
.append(" (").append(numScanBackends()).append(" BEs)").append("\n");
output.append(prefix).append(String.format("cardinality=%s", cardinality)).append("\n");
return output.toString();
}
}