PaimonExternalTable.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.paimon;
import org.apache.doris.analysis.TableScanParams;
import org.apache.doris.analysis.TableSnapshot;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.MTMV;
import org.apache.doris.catalog.PartitionItem;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.common.DdlException;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.metacache.EngineMtmvSupport;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
import org.apache.doris.datasource.mvcc.MvccTable;
import org.apache.doris.datasource.systable.PaimonSysTable;
import org.apache.doris.datasource.systable.SysTable;
import org.apache.doris.mtmv.MTMVBaseTableIf;
import org.apache.doris.mtmv.MTMVRefreshContext;
import org.apache.doris.mtmv.MTMVRelatedTableIf;
import org.apache.doris.mtmv.MTMVSnapshotIf;
import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.BaseAnalysisTask;
import org.apache.doris.statistics.ExternalAnalysisTask;
import org.apache.doris.thrift.THiveTable;
import org.apache.doris.thrift.TTableDescriptor;
import org.apache.doris.thrift.TTableType;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.Snapshot;
import org.apache.paimon.partition.Partition;
import org.apache.paimon.schema.TableSchema;
import org.apache.paimon.table.DataTable;
import org.apache.paimon.table.Table;
import org.apache.paimon.table.source.Split;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
public class PaimonExternalTable extends ExternalTable implements MTMVRelatedTableIf, MTMVBaseTableIf, MvccTable {
private static final Logger LOG = LogManager.getLogger(PaimonExternalTable.class);
public PaimonExternalTable(long id, String name, String remoteName, PaimonExternalCatalog catalog,
PaimonExternalDatabase db) {
super(id, name, remoteName, catalog, db, TableType.PAIMON_EXTERNAL_TABLE);
}
public String getPaimonCatalogType() {
return ((PaimonExternalCatalog) catalog).getCatalogType();
}
protected synchronized void makeSureInitialized() {
super.makeSureInitialized();
if (!objectCreated) {
objectCreated = true;
}
}
public Table getPaimonTable(Optional<MvccSnapshot> snapshot) {
// MTMV scenario gets table from snapshot cache, normal query from latest table cache.
return snapshot.isPresent()
? getOrFetchSnapshotCacheValue(snapshot).getSnapshot().getTable()
: PaimonUtils.getPaimonTable(this);
}
private PaimonSnapshotCacheValue getPaimonSnapshotCacheValue(Optional<TableSnapshot> tableSnapshot,
Optional<TableScanParams> scanParams) {
makeSureInitialized();
// Current limitation: cannot specify both table snapshot and scan parameters simultaneously.
if (tableSnapshot.isPresent() || (scanParams.isPresent() && scanParams.get().isTag())) {
// If a snapshot is specified,
// use the specified snapshot and the corresponding schema(not the latest
// schema).
try {
Table baseTable = getBasePaimonTable();
DataTable dataTable = (DataTable) baseTable;
Snapshot snapshot;
Map<String, String> scanOptions = new HashMap<>();
if (tableSnapshot.isPresent()) {
TableSnapshot snapshotOpt = tableSnapshot.get();
String value = snapshotOpt.getValue();
if (snapshotOpt.getType() == TableSnapshot.VersionType.TIME) {
snapshot = PaimonUtil.getPaimonSnapshotByTimestamp(
dataTable, value, PaimonUtil.isDigitalString(value));
scanOptions.put(CoreOptions.SCAN_SNAPSHOT_ID.key(), String.valueOf(snapshot.id()));
} else {
if (PaimonUtil.isDigitalString(value)) {
snapshot = PaimonUtil.getPaimonSnapshotBySnapshotId(dataTable, value);
scanOptions.put(CoreOptions.SCAN_SNAPSHOT_ID.key(), String.valueOf(snapshot.id()));
} else {
snapshot = PaimonUtil.getPaimonSnapshotByTag(dataTable, value);
scanOptions.put(CoreOptions.SCAN_TAG_NAME.key(), value);
}
}
} else {
String tagName = PaimonUtil.extractBranchOrTagName(scanParams.get());
snapshot = PaimonUtil.getPaimonSnapshotByTag(dataTable, tagName);
scanOptions.put(CoreOptions.SCAN_TAG_NAME.key(), tagName);
}
Table scanTable = baseTable.copy(scanOptions);
PaimonSchemaCacheValue schema = PaimonMetadataCache.buildSchemaCacheValue(catalog, scanTable,
snapshot.schemaId());
return new PaimonSnapshotCacheValue(PaimonPartitionInfo.EMPTY,
new PaimonSnapshot(snapshot.id(), snapshot.schemaId(), scanTable), schema);
} catch (Exception e) {
LOG.warn("Failed to get Paimon snapshot for table {}", getOrBuildNameMapping().getFullLocalName(), e);
throw new RuntimeException(
"Failed to get Paimon snapshot: " + (e.getMessage() == null ? "unknown cause" : e.getMessage()),
e);
}
} else if (scanParams.isPresent() && scanParams.get().isBranch()) {
try {
Table baseTable = getBasePaimonTable();
String branch = PaimonUtil.resolvePaimonBranch(scanParams.get(), baseTable);
Table table = ((PaimonExternalCatalog) catalog).getPaimonTable(getOrBuildNameMapping(), branch, null);
long latestSnapshotId = table.latestSnapshot().map(Snapshot::id)
.orElse(PaimonSnapshot.INVALID_SNAPSHOT_ID);
// Branches in Paimon can have independent schemas and snapshots.
// TODO: Add time travel support for paimon branch tables.
DataTable dataTable = (DataTable) table;
Long schemaId = dataTable.schemaManager().latest().map(TableSchema::id).orElse(0L);
PaimonSchemaCacheValue schema = PaimonMetadataCache.buildSchemaCacheValue(catalog, dataTable, schemaId);
return new PaimonSnapshotCacheValue(PaimonPartitionInfo.EMPTY,
new PaimonSnapshot(latestSnapshotId, schemaId, dataTable), schema);
} catch (Exception e) {
LOG.warn("Failed to get Paimon branch for table {}", getOrBuildNameMapping().getFullLocalName(), e);
throw new RuntimeException(
"Failed to get Paimon branch: " + (e.getMessage() == null ? "unknown cause" : e.getMessage()),
e);
}
} else {
// Otherwise, use the latest snapshot and the latest schema.
return PaimonUtils.getLatestSnapshotCacheValue(this);
}
}
@Override
public TTableDescriptor toThrift() {
List<Column> schema = getFullSchema();
if (!PaimonExternalCatalog.PAIMON_HMS.equals(getPaimonCatalogType())
&& !PaimonExternalCatalog.PAIMON_FILESYSTEM.equals(getPaimonCatalogType())
&& !PaimonExternalCatalog.PAIMON_DLF.equals(getPaimonCatalogType())
&& !PaimonExternalCatalog.PAIMON_REST.equals(getPaimonCatalogType())) {
throw new IllegalArgumentException(
"Currently only supports hms/dlf/rest/filesystem catalog, do not support :"
+ getPaimonCatalogType());
}
THiveTable tHiveTable = new THiveTable(dbName, name, new HashMap<>());
TTableDescriptor tTableDescriptor = new TTableDescriptor(getId(), TTableType.HIVE_TABLE, schema.size(), 0,
getName(), dbName);
tTableDescriptor.setHiveTable(tHiveTable);
return tTableDescriptor;
}
@Override
public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
makeSureInitialized();
return new ExternalAnalysisTask(info);
}
@Override
public long fetchRowCount() {
makeSureInitialized();
long rowCount = 0;
List<Split> splits = getBasePaimonTable().newReadBuilder().newScan().plan().splits();
for (Split split : splits) {
rowCount += split.rowCount();
}
if (rowCount == 0) {
LOG.info("Paimon table {} row count is 0, return -1", name);
}
return rowCount > 0 ? rowCount : UNKNOWN_ROW_COUNT;
}
@Override
public void beforeMTMVRefresh(MTMV mtmv) throws DdlException {
}
@Override
public Map<String, PartitionItem> getAndCopyPartitionItems(Optional<MvccSnapshot> snapshot) {
return EngineMtmvSupport.getAndCopyPartitionItems(this, snapshot);
}
@Override
public PartitionType getPartitionType(Optional<MvccSnapshot> snapshot) {
return !isPartitionInvalid(snapshot) && !getPartitionColumns(snapshot).isEmpty()
? PartitionType.LIST
: PartitionType.UNPARTITIONED;
}
@Override
public Set<String> getPartitionColumnNames(Optional<MvccSnapshot> snapshot) {
return getPartitionColumns(snapshot).stream()
.map(c -> c.getName().toLowerCase()).collect(Collectors.toSet());
}
@Override
public List<Column> getPartitionColumns(Optional<MvccSnapshot> snapshot) {
if (isPartitionInvalid(snapshot)) {
return Collections.emptyList();
}
return getPaimonSchemaCacheValue(snapshot).getPartitionColumns();
}
public boolean isPartitionInvalid(Optional<MvccSnapshot> snapshot) {
PaimonSnapshotCacheValue paimonSnapshotCacheValue = getOrFetchSnapshotCacheValue(snapshot);
return paimonSnapshotCacheValue.getPartitionInfo().isPartitionInvalid();
}
@Override
public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context,
Optional<MvccSnapshot> snapshot)
throws AnalysisException {
return EngineMtmvSupport.getPartitionSnapshot(this, partitionName, snapshot);
}
@Override
public MTMVSnapshotIf getTableSnapshot(MTMVRefreshContext context, Optional<MvccSnapshot> snapshot)
throws AnalysisException {
return getTableSnapshot(snapshot);
}
public Map<String, Partition> getPartitionSnapshot(
Optional<MvccSnapshot> snapshot) {
try {
return EngineMtmvSupport.getPaimonPartitionSnapshot(this, snapshot);
} catch (AnalysisException e) {
throw new RuntimeException(e);
}
}
@Override
public MTMVSnapshotIf getTableSnapshot(Optional<MvccSnapshot> snapshot) throws AnalysisException {
return EngineMtmvSupport.getTableSnapshot(this, snapshot);
}
@Override
public long getNewestUpdateVersionOrTime() {
return getPaimonSnapshotCacheValue(Optional.empty(), Optional.empty()).getPartitionInfo().getNameToPartition()
.values().stream()
.mapToLong(Partition::lastFileCreationTime).max().orElse(0);
}
@Override
public boolean isPartitionColumnAllowNull() {
// Paimon will write to the 'null' partition regardless of whether it is' null or 'null'.
// The logic is inconsistent with Doris' empty partition logic, so it needs to return false.
// However, when Spark creates Paimon tables, specifying 'not null' does not take effect.
// In order to successfully create the materialized view, false is returned here.
// The cost is that Paimon partition writes a null value, and the materialized view cannot detect this data.
return true;
}
@Override
public MvccSnapshot loadSnapshot(Optional<TableSnapshot> tableSnapshot, Optional<TableScanParams> scanParams) {
return new PaimonMvccSnapshot(getPaimonSnapshotCacheValue(tableSnapshot, scanParams));
}
@Override
public Map<String, PartitionItem> getNameToPartitionItems(Optional<MvccSnapshot> snapshot) {
return EngineMtmvSupport.getPartitionItems(this, snapshot);
}
@Override
public boolean supportInternalPartitionPruned() {
return true;
}
@Override
public List<Column> getFullSchema() {
makeSureInitialized();
return super.getFullSchema();
}
@Override
public long resolveSchemaVersionToken(Optional<MvccSnapshot> snapshot) {
MvccSnapshot mvccSnapshot = snapshot.orElse(null);
if (mvccSnapshot instanceof PaimonMvccSnapshot) {
return ((PaimonMvccSnapshot) mvccSnapshot).getSnapshotCacheValue().getSnapshot().getSchemaId();
}
return CURRENT_SCHEMA_VERSION_TOKEN;
}
@Override
public Optional<SchemaCacheValue> loadSchemaByVersion(long versionToken) {
makeSureInitialized();
setUpdateTime(System.currentTimeMillis());
long schemaId = versionToken == CURRENT_SCHEMA_VERSION_TOKEN
? getOrFetchSnapshotCacheValue(Optional.empty()).getSnapshot().getSchemaId()
: versionToken;
try {
return Optional.of(PaimonMetadataCache.buildSchemaCacheValue(catalog, getBasePaimonTable(), schemaId));
} catch (Exception e) {
throw new CacheException("failed to initSchema for: %s.%s.%s.%s",
e, getCatalog().getName(), getDbName(), getName(), schemaId);
}
}
private PaimonSchemaCacheValue getPaimonSchemaCacheValue(Optional<MvccSnapshot> snapshot) {
PaimonSnapshotCacheValue snapshotCacheValue = getOrFetchSnapshotCacheValue(snapshot);
return PaimonUtils.getSchemaCacheValue(this, snapshotCacheValue);
}
private PaimonSnapshotCacheValue getOrFetchSnapshotCacheValue(Optional<MvccSnapshot> snapshot) {
return snapshot.isPresent()
? ((PaimonMvccSnapshot) snapshot.get()).getSnapshotCacheValue()
// Use new lazy-loading snapshot cache API
: PaimonUtils.getSnapshotCacheValue(snapshot, this);
}
@Override
public Map<String, SysTable> getSupportedSysTables() {
makeSureInitialized();
return PaimonSysTable.SUPPORTED_SYS_TABLES;
}
@Override
public String getComment() {
Table table = getBasePaimonTable();
return table.comment().isPresent() ? table.comment().get() : "";
}
public Map<String, String> getTableProperties() {
Table table = getBasePaimonTable();
if (table instanceof DataTable) {
DataTable dataTable = (DataTable) table;
Map<String, String> properties = new LinkedHashMap<>(dataTable.coreOptions().toMap());
if (!dataTable.primaryKeys().isEmpty()) {
properties.put(CoreOptions.PRIMARY_KEY.key(), String.join(",", dataTable.primaryKeys()));
}
return properties;
} else {
return Collections.emptyMap();
}
}
@Override
public boolean isPartitionedTable() {
makeSureInitialized();
return !getBasePaimonTable().partitionKeys().isEmpty();
}
private Table getBasePaimonTable() {
return PaimonUtils.getPaimonTable(this);
}
}