TupleDescriptor.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/TupleDescriptor.java
// and modified by Doris
package org.apache.doris.analysis;
import org.apache.doris.catalog.ColumnStats;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.thrift.TTupleDescriptor;
import com.google.common.base.Joiner;
import com.google.common.base.MoreObjects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
public class TupleDescriptor {
private static final Logger LOG = LogManager.getLogger(TupleDescriptor.class);
private final TupleId id;
private final String debugName; // debug only
private final ArrayList<SlotDescriptor> slots;
// underlying table, if there is one
private TableIf table;
// underlying table, if there is one
private TableRef ref;
// All legal aliases of this tuple.
private String[] aliases;
// If true, requires that aliases_.length() == 1. However, aliases_.length() == 1
// does not imply an explicit alias because nested collection refs have only a
// single implicit alias.
private boolean hasExplicitAlias;
// if false, this tuple doesn't need to be materialized
private boolean isMaterialized = true;
private int byteSize; // of all slots plus null indicators
// This cardinality is only used to mock slot ndv.
// Only tuple of olap scan node has this value.
private long cardinality;
private float avgSerializedSize; // in bytes; includes serialization overhead
private int tableId = -1;
public TupleDescriptor(TupleId id) {
this.id = id;
this.slots = new ArrayList<SlotDescriptor>();
this.debugName = "";
this.cardinality = -1;
}
public TupleDescriptor(TupleId id, String debugName) {
this.id = id;
this.slots = new ArrayList<SlotDescriptor>();
this.debugName = debugName;
this.cardinality = -1;
}
public void addSlot(SlotDescriptor desc) {
desc.setSlotOffset(slots.size());
slots.add(desc);
}
public TupleId getId() {
return id;
}
public TableRef getRef() {
return ref;
}
public void setRef(TableRef tableRef) {
ref = tableRef;
}
public ArrayList<SlotDescriptor> getSlots() {
return slots;
}
public void setTableId(int id) {
tableId = id;
}
/**
* get slot desc by slot id.
*
* @param slotId slot id
* @return this slot's desc
*/
public SlotDescriptor getSlot(int slotId) {
for (SlotDescriptor slotDesc : slots) {
if (slotDesc.getId().asInt() == slotId) {
return slotDesc;
}
}
return null;
}
public long getCardinality() {
return cardinality;
}
public void setCardinality(long cardinality) {
this.cardinality = cardinality;
}
public ArrayList<SlotDescriptor> getMaterializedSlots() {
ArrayList<SlotDescriptor> result = Lists.newArrayList();
for (SlotDescriptor slot : slots) {
if (slot.isMaterialized()) {
result.add(slot);
}
}
return result;
}
public ArrayList<SlotId> getMaterializedSlotIds() {
ArrayList<SlotId> result = Lists.newArrayList();
for (SlotDescriptor slot : slots) {
if (slot.isMaterialized()) {
result.add(slot.getId());
}
}
return result;
}
public ArrayList<SlotId> getAllSlotIds() {
ArrayList<SlotId> result = Lists.newArrayList();
for (SlotDescriptor slot : slots) {
result.add(slot.getId());
}
return result;
}
/**
* Return slot descriptor corresponding to column referenced in the context
* of tupleDesc, or null if no such reference exists.
*/
public SlotDescriptor getColumnSlot(String columnName) {
for (SlotDescriptor slotDesc : slots) {
if (slotDesc.getColumn() != null && slotDesc.getColumn().getName().equalsIgnoreCase(columnName)) {
return slotDesc;
}
}
return null;
}
public boolean hasVariantCol() {
for (SlotDescriptor slotDesc : slots) {
if (slotDesc.getColumn() != null && slotDesc.getColumn().getType().isVariantType()) {
return true;
}
}
return false;
}
public TableIf getTable() {
return table;
}
public void setTable(TableIf tbl) {
table = tbl;
}
public int getByteSize() {
return byteSize;
}
public void setIsMaterialized(boolean value) {
isMaterialized = value;
}
public boolean isMaterialized() {
return isMaterialized;
}
public float getAvgSerializedSize() {
return avgSerializedSize;
}
public void setAliases(String[] aliases, boolean hasExplicitAlias) {
this.aliases = aliases;
this.hasExplicitAlias = hasExplicitAlias;
}
public boolean hasExplicitAlias() {
return hasExplicitAlias;
}
public String getAlias() {
return (aliases != null) ? aliases[0] : null;
}
public String getLastAlias() {
return (aliases != null) ? aliases[aliases.length - 1] : null;
}
public TableName getAliasAsName() {
return (aliases != null) ? new TableName(aliases[0]) : null;
}
public TTupleDescriptor toThrift() {
TTupleDescriptor ttupleDesc = new TTupleDescriptor(id.asInt(), 0, 0);
if (table != null && table.getId() >= 0) {
ttupleDesc.setTableId((int) table.getId());
}
if (tableId > 0) {
ttupleDesc.setTableId(tableId);
}
return ttupleDesc;
}
/**
* This function is mainly used to calculate the statistics of the tuple and the layout information.
* Generally, it occurs after the plan node materializes the slot and before calculating the plan node statistics.
* PlanNode.init() {
* materializedSlot();
* tupleDesc.computeStatAndMemLayout();
* computeStat();
* }
*/
public void computeStatAndMemLayout() {
computeStat();
computeMemLayout();
}
/**
* This function is mainly used to evaluate the statistics of the tuple,
* such as the average size of each row.
* This function will be used before the computeStat() of the plan node
* and is the pre-work for evaluating the statistics of the plan node.
*
* This function is theoretically only called once when the plan node is init.
* However, the current code structure is relatively confusing
* In order to ensure that even if it is wrongly called a second time, no error will occur,
* so it will be initialized again at the beginning of the function.
*
* @deprecated In the future this function will be changed to a private function.
*/
@Deprecated
public void computeStat() {
// init stat
avgSerializedSize = 0;
// compute stat
for (SlotDescriptor d : slots) {
if (!d.isMaterialized()) {
continue;
}
ColumnStats stats = d.getStats();
if (stats.hasAvgSerializedSize()) {
avgSerializedSize += d.getStats().getAvgSerializedSize();
} else {
// TODO: for computed slots, try to come up with stats estimates
avgSerializedSize += d.getType().getSlotSize();
}
}
}
/**
* @deprecated In the future this function will be changed to a private function.
*/
@Deprecated
public void computeMemLayout() {
// sort slots by size
List<List<SlotDescriptor>> slotsBySize = Lists.newArrayListWithCapacity(PrimitiveType.getMaxSlotSize());
for (int i = 0; i <= PrimitiveType.getMaxSlotSize(); ++i) {
slotsBySize.add(new ArrayList<SlotDescriptor>());
}
// populate slotsBySize; also compute avgSerializedSize
for (SlotDescriptor d : slots) {
if (d.isMaterialized()) {
slotsBySize.get(d.getType().getSlotSize()).add(d);
}
}
// we shouldn't have anything of size 0
Preconditions.checkState(slotsBySize.get(0).isEmpty());
// slotIdx is the index into the resulting tuple struct. The first (smallest) field
// is 0, next is 1, etc.
int slotIdx = 0;
for (int slotSize = 1; slotSize <= PrimitiveType.getMaxSlotSize(); ++slotSize) {
if (slotsBySize.get(slotSize).isEmpty()) {
continue;
}
for (SlotDescriptor d : slotsBySize.get(slotSize)) {
d.setByteSize(slotSize);
d.setSlotIdx(slotIdx++);
byteSize += slotSize;
}
}
}
/**
* Returns true if tuples of type 'this' can be assigned to tuples of type 'desc'
* (checks that both have the same number of slots and that slots are of the same type)
*/
public boolean isCompatible(TupleDescriptor desc) {
if (slots.size() != desc.slots.size()) {
return false;
}
for (int i = 0; i < slots.size(); ++i) {
if (slots.get(i).getType() != desc.slots.get(i).getType()) {
return false;
}
}
return true;
}
/**
* Materialize all slots.
*/
public void materializeSlots() {
for (SlotDescriptor slot : slots) {
slot.setIsMaterialized(true);
}
}
public void getTableIdToColumnNames(Map<Long, Set<String>> tableIdToColumnNames) {
for (SlotDescriptor slotDescriptor : slots) {
if (!slotDescriptor.isMaterialized()) {
continue;
}
if (slotDescriptor.getColumn() != null) {
TupleDescriptor parent = slotDescriptor.getParent();
Preconditions.checkState(parent != null);
TableIf table = parent.getTable();
Preconditions.checkState(table != null);
Long tableId = table.getId();
Set<String> columnNames = tableIdToColumnNames.get(tableId);
if (columnNames == null) {
columnNames = new TreeSet<>(String.CASE_INSENSITIVE_ORDER);
tableIdToColumnNames.put(tableId, columnNames);
}
columnNames.add(slotDescriptor.getColumn().getName());
} else {
for (Expr expr : slotDescriptor.getSourceExprs()) {
expr.getTableIdToColumnNames(tableIdToColumnNames);
}
}
}
}
public Set<String> getColumnNames() {
Map<Long, Set<String>> columnNamesInQueryOutput = Maps.newHashMap();
getTableIdToColumnNames(columnNamesInQueryOutput);
Set<String> columnNames = Sets.newHashSet();
for (Set<String> names : columnNamesInQueryOutput.values()) {
columnNames.addAll(names);
}
return columnNames;
}
@Override
public String toString() {
String tblStr = (table == null ? "null" : table.getName());
List<String> slotStrings = Lists.newArrayList();
for (SlotDescriptor slot : slots) {
slotStrings.add(slot.debugString());
}
return MoreObjects.toStringHelper(this).add("id", id.asInt()).add("tbl", tblStr)
.add("is_materialized", isMaterialized).add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]")
.toString();
}
public String debugString() {
// TODO(zc):
// String tblStr = (getTable() == null ? "null" : getTable().getFullName());
String tblStr = (getTable() == null ? "null" : getTable().getName());
List<String> slotStrings = Lists.newArrayList();
for (SlotDescriptor slot : slots) {
slotStrings.add(slot.debugString());
}
return MoreObjects.toStringHelper(this)
.add("id", id.asInt())
.add("name", debugName)
.add("tbl", tblStr)
.add("is_materialized", isMaterialized)
.add("slots", "[" + Joiner.on(", ").join(slotStrings) + "]")
.toString();
}
public String getExplainString() {
StringBuilder builder = new StringBuilder();
String prefix = " ";
String tblStr = (getTable() == null ? "null" : getTable().getName());
builder.append(MoreObjects.toStringHelper(this)
.add("id", id.asInt())
.add("tbl", tblStr));
builder.append("\n");
for (SlotDescriptor slot : slots) {
if (slot.isMaterialized()) {
builder.append(slot.getExplainString(prefix)).append("\n");
}
}
return builder.toString();
}
}