HudiExternalMetaCache.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.hudi;
import org.apache.doris.catalog.Env;
import org.apache.doris.common.Config;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.NameMapping;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.TablePartitionValues;
import org.apache.doris.datasource.TablePartitionValues.TablePartitionKey;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
import org.apache.doris.datasource.metacache.AbstractExternalMetaCache;
import org.apache.doris.datasource.metacache.CacheSpec;
import org.apache.doris.datasource.metacache.MetaCacheEntry;
import org.apache.doris.datasource.metacache.MetaCacheEntryDef;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;
/**
* Hudi engine implementation of {@link AbstractExternalMetaCache}.
*
* <p>Registered entries:
* <ul>
* <li>{@code partition}: mutable {@link TablePartitionValues} holder keyed by table and
* partition column types</li>
* <li>{@code fs_view}: {@link HoodieTableFileSystemView} for timeline-aware file listing</li>
* <li>{@code meta_client}: {@link HoodieTableMetaClient} for table metadata access</li>
* <li>{@code schema}: Hudi schema cache keyed by table identity + timestamp</li>
* </ul>
*
* <p>Partition cache values are updated in place with read/write locks to avoid rebuilding
* large partition maps on every request.
*
* <p>Invalidation behavior:
* <ul>
* <li>db/table invalidation clears all four entries for matching keys</li>
* <li>partition-level invalidation currently falls back to table-level invalidation</li>
* </ul>
*/
public class HudiExternalMetaCache extends AbstractExternalMetaCache {
private static final Logger LOG = LogManager.getLogger(HudiExternalMetaCache.class);
public static final String ENGINE = "hudi";
public static final String ENTRY_PARTITION = "partition";
public static final String ENTRY_FS_VIEW = "fs_view";
public static final String ENTRY_META_CLIENT = "meta_client";
public static final String ENTRY_SCHEMA = "schema";
private static final CacheSpec SCHEMA_CACHE_SPEC = CacheSpec.fromTtlValue(
null, Config.external_cache_expire_time_seconds_after_access, Config.max_external_schema_cache_num);
private final MetaCacheEntryDef<TablePartitionKey, TablePartitionValues> partitionEntryDef;
private final MetaCacheEntryDef<HudiFsViewCacheKey, HoodieTableFileSystemView> fsViewEntryDef;
private final MetaCacheEntryDef<HudiMetaClientCacheKey, HoodieTableMetaClient> metaClientEntryDef;
private final MetaCacheEntryDef<HudiSchemaCacheKey, SchemaCacheValue> schemaEntryDef;
public HudiExternalMetaCache(ExecutorService refreshExecutor) {
super(ENGINE, refreshExecutor);
partitionEntryDef = MetaCacheEntryDef.of(ENTRY_PARTITION, TablePartitionKey.class, TablePartitionValues.class,
key -> new TablePartitionValues(), DEFAULT_ENTRY_CACHE_SPEC);
fsViewEntryDef = MetaCacheEntryDef.of(ENTRY_FS_VIEW, HudiFsViewCacheKey.class, HoodieTableFileSystemView.class,
this::createFsView, DEFAULT_ENTRY_CACHE_SPEC);
metaClientEntryDef = MetaCacheEntryDef.of(ENTRY_META_CLIENT, HudiMetaClientCacheKey.class,
HoodieTableMetaClient.class, this::createHoodieTableMetaClient, DEFAULT_ENTRY_CACHE_SPEC);
schemaEntryDef = MetaCacheEntryDef.of(ENTRY_SCHEMA, HudiSchemaCacheKey.class, SchemaCacheValue.class,
this::loadSchemaCacheValue, SCHEMA_CACHE_SPEC);
registerMetaCacheEntryDef(partitionEntryDef);
registerMetaCacheEntryDef(fsViewEntryDef);
registerMetaCacheEntryDef(metaClientEntryDef);
registerMetaCacheEntryDef(schemaEntryDef);
}
public HoodieTableMetaClient getHoodieTableMetaClient(
long catalogId, NameMapping nameMapping, String hudiBasePath, Configuration conf) {
return metaClientEntry(catalogId).get(HudiMetaClientCacheKey.of(nameMapping, hudiBasePath, conf));
}
public HoodieTableFileSystemView getFsView(
long catalogId, String dbName, String tbName, HoodieTableMetaClient hudiClient) {
return fsViewEntry(catalogId).get(HudiFsViewCacheKey.of(dbName, tbName, hudiClient));
}
public HudiSchemaCacheValue getHudiSchemaCacheValue(NameMapping nameMapping, long timestamp) {
SchemaCacheValue schemaCacheValue = schemaEntry(nameMapping.getCtlId())
.get(new HudiSchemaCacheKey(nameMapping, timestamp));
return (HudiSchemaCacheValue) schemaCacheValue;
}
public TablePartitionValues getSnapshotPartitionValues(HMSExternalTable table,
HoodieTableMetaClient tableMetaClient, String timestamp, boolean useHiveSyncPartition) {
TablePartitionValues partitionValues = new TablePartitionValues();
Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
return partitionValues;
}
HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
Option<HoodieInstant> lastInstant = timeline.lastInstant();
if (!lastInstant.isPresent()) {
return partitionValues;
}
long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
if (Long.parseLong(timestamp) == lastTimestamp) {
return getPartitionValues(table, tableMetaClient, useHiveSyncPartition);
}
List<String> partitionNameAndValues = HudiPartitionUtils.getPartitionNamesBeforeOrEquals(timeline, timestamp);
List<String> partitionNames = Arrays.asList(partitionColumns.get());
partitionValues.addPartitions(partitionNameAndValues,
partitionNameAndValues.stream().map(p -> HudiPartitionUtils.parsePartitionValues(partitionNames, p))
.collect(Collectors.toList()), table.getHudiPartitionColumnTypes(Long.parseLong(timestamp)),
Collections.nCopies(partitionNameAndValues.size(), 0L));
partitionValues.setLastUpdateTimestamp(Long.parseLong(timestamp));
return partitionValues;
}
public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTableMetaClient tableMetaClient,
boolean useHiveSyncPartition) throws CacheException {
TablePartitionValues partitionValues = new TablePartitionValues();
Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
return partitionValues;
}
HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
Option<HoodieInstant> lastInstant = timeline.lastInstant();
if (!lastInstant.isPresent()) {
return partitionValues;
}
try {
long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
partitionValues = partitionEntry(table.getCatalog().getId()).get(
new TablePartitionKey(table.getDbName(), table.getName(),
table.getHudiPartitionColumnTypes(lastTimestamp)));
partitionValues.readLock().lock();
try {
long lastUpdateTimestamp = partitionValues.getLastUpdateTimestamp();
if (lastTimestamp <= lastUpdateTimestamp) {
return partitionValues;
}
} finally {
partitionValues.readLock().unlock();
}
partitionValues.writeLock().lock();
try {
HMSExternalCatalog catalog = (HMSExternalCatalog) table.getCatalog();
List<String> partitionNames;
if (useHiveSyncPartition) {
partitionNames = catalog.getClient()
.listPartitionNames(table.getRemoteDbName(), table.getRemoteName());
partitionNames = partitionNames.stream()
.map(FileUtils::unescapePathName)
.collect(Collectors.toList());
if (partitionNames.size() == 0) {
LOG.warn("Failed to get partitions from hms api, switch it from hudi api.");
partitionNames = HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
}
} else {
partitionNames = HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
}
List<String> partitionColumnsList = Arrays.asList(partitionColumns.get());
partitionValues.cleanPartitions();
partitionValues.addPartitions(partitionNames,
partitionNames.stream()
.map(p -> HudiPartitionUtils.parsePartitionValues(partitionColumnsList, p))
.collect(Collectors.toList()), table.getHudiPartitionColumnTypes(lastTimestamp),
Collections.nCopies(partitionNames.size(), 0L));
partitionValues.setLastUpdateTimestamp(lastTimestamp);
return partitionValues;
} finally {
partitionValues.writeLock().unlock();
}
} catch (Exception e) {
LOG.warn("Failed to get hudi partitions", e);
throw new CacheException("Failed to get hudi partitions: " + Util.getRootCauseMessage(e), e);
}
}
@Override
public void invalidateDb(long catalogId, String dbName) {
partitionEntry(catalogId).invalidateIf(key -> key.getDbName().equals(dbName));
fsViewEntry(catalogId).invalidateIf(key -> key.getDbName().equals(dbName));
metaClientEntry(catalogId).invalidateIf(
key -> key.getNameMapping().getLocalDbName().equals(dbName));
schemaEntry(catalogId).invalidateIf(
key -> key.getNameMapping().getLocalDbName().equals(dbName));
}
@Override
public void invalidateTable(long catalogId, String dbName, String tableName) {
partitionEntry(catalogId).invalidateIf(
key -> key.getDbName().equals(dbName) && key.getTblName().equals(tableName));
fsViewEntry(catalogId).invalidateIf(
key -> key.getDbName().equals(dbName) && key.getTbName().equals(tableName));
metaClientEntry(catalogId).invalidateIf(key -> key.getNameMapping().getLocalDbName().equals(dbName)
&& key.getNameMapping().getLocalTblName().equals(tableName));
schemaEntry(catalogId).invalidateIf(key -> key.getNameMapping().getLocalDbName().equals(dbName)
&& key.getNameMapping().getLocalTblName().equals(tableName));
}
@Override
public void invalidatePartitions(long catalogId, String dbName, String tableName, List<String> partitions) {
invalidateTable(catalogId, dbName, tableName);
}
private MetaCacheEntry<TablePartitionKey, TablePartitionValues> partitionEntry(long catalogId) {
return entry(catalogId, partitionEntryDef);
}
private MetaCacheEntry<HudiFsViewCacheKey, HoodieTableFileSystemView> fsViewEntry(long catalogId) {
return entry(catalogId, fsViewEntryDef);
}
private MetaCacheEntry<HudiMetaClientCacheKey, HoodieTableMetaClient> metaClientEntry(long catalogId) {
return entry(catalogId, metaClientEntryDef);
}
private MetaCacheEntry<HudiSchemaCacheKey, SchemaCacheValue> schemaEntry(long catalogId) {
return entry(catalogId, schemaEntryDef);
}
private HoodieTableFileSystemView createFsView(HudiFsViewCacheKey key) {
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
HoodieLocalEngineContext ctx = new HoodieLocalEngineContext(key.getClient().getStorageConf());
return FileSystemViewManager.createInMemoryFileSystemView(ctx, key.getClient(), metadataConfig);
}
private HoodieTableMetaClient createHoodieTableMetaClient(HudiMetaClientCacheKey key) {
LOG.debug("create hudi table meta client for {}.{}", key.getNameMapping().getFullLocalName());
HadoopStorageConfiguration hadoopStorageConfiguration = new HadoopStorageConfiguration(key.getConf());
return HiveMetaStoreClientHelper.ugiDoAs(
key.getConf(),
() -> HoodieTableMetaClient.builder()
.setConf(hadoopStorageConfiguration)
.setBasePath(key.getHudiBasePath())
.build());
}
private SchemaCacheValue loadSchemaCacheValue(HudiSchemaCacheKey key) {
ExternalTable dorisTable = findExternalTable(key.getNameMapping());
return dorisTable.initSchemaAndUpdateTime(key).orElseThrow(() ->
new CacheException("failed to load hudi schema cache value for: %s.%s.%s, timestamp: %s",
null, key.getNameMapping().getCtlId(), key.getNameMapping().getLocalDbName(),
key.getNameMapping().getLocalTblName(), key.getTimestamp()));
}
@Override
protected Map<String, String> catalogPropertyCompatibilityMap() {
return Collections.singletonMap(
ExternalCatalog.SCHEMA_CACHE_TTL_SECOND,
"meta.cache." + ENGINE + "." + ENTRY_SCHEMA + ".ttl-second");
}
private ExternalTable findExternalTable(NameMapping nameMapping) {
CatalogIf<?> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(nameMapping.getCtlId());
if (!(catalog instanceof ExternalCatalog)) {
throw new CacheException("catalog %s is not external when loading hudi schema cache",
null, nameMapping.getCtlId());
}
ExternalCatalog externalCatalog = (ExternalCatalog) catalog;
return externalCatalog.getDb(nameMapping.getLocalDbName())
.flatMap(db -> db.getTable(nameMapping.getLocalTblName()))
.orElseThrow(() -> new CacheException(
"table %s.%s.%s not found when loading hudi schema cache",
null, nameMapping.getCtlId(), nameMapping.getLocalDbName(),
nameMapping.getLocalTblName()));
}
}