ExternalUtil.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource;
import org.apache.doris.analysis.SlotDescriptor;
import org.apache.doris.catalog.ArrayType;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.MapType;
import org.apache.doris.catalog.StructField;
import org.apache.doris.catalog.StructType;
import org.apache.doris.catalog.Type;
import org.apache.doris.thrift.TFileScanRangeParams;
import org.apache.doris.thrift.schema.external.TArrayField;
import org.apache.doris.thrift.schema.external.TField;
import org.apache.doris.thrift.schema.external.TFieldPtr;
import org.apache.doris.thrift.schema.external.TMapField;
import org.apache.doris.thrift.schema.external.TNestedField;
import org.apache.doris.thrift.schema.external.TSchema;
import org.apache.doris.thrift.schema.external.TStructField;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class ExternalUtil {
private static TField getExternalSchema(Column column) {
TField root = new TField();
root.setName(column.getName());
root.setId(column.getUniqueId());
root.setIsOptional(column.isAllowNull());
root.setType(column.getType().toColumnTypeThrift());
TNestedField nestedField = new TNestedField();
if (column.getType().isStructType()) {
nestedField.setStructField(getExternalSchema(column.getChildren()));
root.setNestedField(nestedField);
} else if (column.getType().isArrayType()) {
TArrayField listField = new TArrayField();
TFieldPtr fieldPtr = new TFieldPtr();
fieldPtr.setFieldPtr(getExternalSchema(column.getChildren().get(0)));
listField.setItemField(fieldPtr);
nestedField.setArrayField(listField);
root.setNestedField(nestedField);
} else if (column.getType().isMapType()) {
TMapField mapField = new TMapField();
TFieldPtr keyPtr = new TFieldPtr();
keyPtr.setFieldPtr(getExternalSchema(column.getChildren().get(0)));
mapField.setKeyField(keyPtr);
TFieldPtr valuePtr = new TFieldPtr();
valuePtr.setFieldPtr(getExternalSchema(column.getChildren().get(1)));
mapField.setValueField(valuePtr);
nestedField.setMapField(mapField);
root.setNestedField(nestedField);
}
return root;
}
private static TStructField getExternalSchema(List<Column> columns) {
TStructField structField = new TStructField();
for (Column child : columns) {
TFieldPtr fieldPtr = new TFieldPtr();
fieldPtr.setFieldPtr(getExternalSchema(child));
structField.addToFields(fieldPtr);
}
return structField;
}
public static void initSchemaInfo(TFileScanRangeParams params, Long schemaId, List<Column> columns) {
params.setCurrentSchemaId(schemaId);
TSchema tSchema = new TSchema();
tSchema.setSchemaId(schemaId);
tSchema.setRootField(getExternalSchema(columns));
params.addToHistorySchemaInfo(tSchema);
}
/**
* Initialize schema info based on SlotDescriptors, only including columns that are actually needed.
* For nested columns, only include sub-columns that are accessed according to pruned type.
*
* @param params TFileScanRangeParams to fill
* @param schemaId Schema ID
* @param slots List of SlotDescriptors that are actually needed
* @param nameMapping NameMapping from Iceberg table properties (can be null and empty.)
*/
public static void initSchemaInfo(TFileScanRangeParams params, Long schemaId,
List<SlotDescriptor> slots, Map<Integer, List<String>> nameMapping) {
params.setCurrentSchemaId(schemaId);
TSchema tSchema = new TSchema();
tSchema.setSchemaId(schemaId);
tSchema.setRootField(getExternalSchema(slots, nameMapping));
params.addToHistorySchemaInfo(tSchema);
}
private static TStructField getExternalSchema(List<SlotDescriptor> slots,
Map<Integer, List<String>> nameMapping) {
TStructField structField = new TStructField();
for (SlotDescriptor slot : slots) {
String colName = slot.getColumn().getName();
if (colName.startsWith(Column.GLOBAL_ROWID_COL)) {
continue;
}
TFieldPtr fieldPtr = new TFieldPtr();
TField field = getExternalSchema(slot.getType(), slot.getColumn(), nameMapping);
fieldPtr.setFieldPtr(field);
structField.addToFields(fieldPtr);
}
return structField;
}
private static TField getExternalSchema(Type columnType, Column dorisColumn,
Map<Integer, List<String>> nameMapping) {
TField root = new TField();
root.setName(dorisColumn.getName());
root.setId(dorisColumn.getUniqueId());
root.setIsOptional(dorisColumn.isAllowNull());
root.setType(dorisColumn.getType().toColumnTypeThrift());
if (nameMapping != null && nameMapping.containsKey(dorisColumn.getUniqueId())) {
// for iceberg set name mapping.
root.setNameMapping(new ArrayList<>(nameMapping.get(dorisColumn.getUniqueId())));
}
TNestedField nestedField = new TNestedField();
if (columnType.isStructType()) {
StructType dorisStructType = (StructType) columnType;
TStructField structField = new TStructField();
Map<String, Column> subNameToSubColumn = new HashMap<>();
for (int i = 0; i < dorisColumn.getChildren().size(); i++) {
Column subColumn = dorisColumn.getChildren().get(i);
subNameToSubColumn.put(subColumn.getName(), subColumn);
}
for (StructField subField : dorisStructType.getFields()) {
TFieldPtr fieldPtr = new TFieldPtr();
Column subColumn = subNameToSubColumn.get(subField.getName());
fieldPtr.setFieldPtr(getExternalSchema(subField.getType(), subColumn, nameMapping));
structField.addToFields(fieldPtr);
}
nestedField.setStructField(structField);
root.setNestedField(nestedField);
} else if (columnType.isArrayType()) {
ArrayType dorisArrayType = (ArrayType) columnType;
TArrayField listField = new TArrayField();
TFieldPtr fieldPtr = new TFieldPtr();
fieldPtr.setFieldPtr(getExternalSchema(
dorisArrayType.getItemType(), dorisColumn.getChildren().get(0), nameMapping));
listField.setItemField(fieldPtr);
nestedField.setArrayField(listField);
root.setNestedField(nestedField);
} else if (columnType.isMapType()) {
MapType dorisMapType = (MapType) columnType;
TMapField mapField = new TMapField();
TFieldPtr keyPtr = new TFieldPtr();
keyPtr.setFieldPtr(getExternalSchema(
dorisMapType.getKeyType(), dorisColumn.getChildren().get(0), nameMapping));
mapField.setKeyField(keyPtr);
TFieldPtr valuePtr = new TFieldPtr();
valuePtr.setFieldPtr(getExternalSchema(
dorisMapType.getKeyType(), dorisColumn.getChildren().get(1), nameMapping));
mapField.setValueField(valuePtr);
nestedField.setMapField(mapField);
root.setNestedField(nestedField);
}
return root;
}
}