HudiExternalMetaCache.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.hudi;

import org.apache.doris.catalog.Env;
import org.apache.doris.common.Config;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.NameMapping;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.TablePartitionValues;
import org.apache.doris.datasource.TablePartitionValues.TablePartitionKey;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
import org.apache.doris.datasource.metacache.AbstractExternalMetaCache;
import org.apache.doris.datasource.metacache.CacheSpec;
import org.apache.doris.datasource.metacache.MetaCacheEntry;
import org.apache.doris.datasource.metacache.MetaCacheEntryDef;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;

/**
 * Hudi engine implementation of {@link AbstractExternalMetaCache}.
 *
 * <p>Registered entries:
 * <ul>
 *   <li>{@code partition}: mutable {@link TablePartitionValues} holder keyed by table and
 *   partition column types</li>
 *   <li>{@code fs_view}: {@link HoodieTableFileSystemView} for timeline-aware file listing</li>
 *   <li>{@code meta_client}: {@link HoodieTableMetaClient} for table metadata access</li>
 *   <li>{@code schema}: Hudi schema cache keyed by table identity + timestamp</li>
 * </ul>
 *
 * <p>Partition cache values are updated in place with read/write locks to avoid rebuilding
 * large partition maps on every request.
 *
 * <p>Invalidation behavior:
 * <ul>
 *   <li>db/table invalidation clears all four entries for matching keys</li>
 *   <li>partition-level invalidation currently falls back to table-level invalidation</li>
 * </ul>
 */
public class HudiExternalMetaCache extends AbstractExternalMetaCache {
    private static final Logger LOG = LogManager.getLogger(HudiExternalMetaCache.class);

    public static final String ENGINE = "hudi";
    public static final String ENTRY_PARTITION = "partition";
    public static final String ENTRY_FS_VIEW = "fs_view";
    public static final String ENTRY_META_CLIENT = "meta_client";
    public static final String ENTRY_SCHEMA = "schema";

    private static final CacheSpec SCHEMA_CACHE_SPEC = CacheSpec.fromTtlValue(
            null, Config.external_cache_expire_time_seconds_after_access, Config.max_external_schema_cache_num);

    private final MetaCacheEntryDef<TablePartitionKey, TablePartitionValues> partitionEntryDef;
    private final MetaCacheEntryDef<HudiFsViewCacheKey, HoodieTableFileSystemView> fsViewEntryDef;
    private final MetaCacheEntryDef<HudiMetaClientCacheKey, HoodieTableMetaClient> metaClientEntryDef;
    private final MetaCacheEntryDef<HudiSchemaCacheKey, SchemaCacheValue> schemaEntryDef;

    public HudiExternalMetaCache(ExecutorService refreshExecutor) {
        super(ENGINE, refreshExecutor);
        partitionEntryDef = MetaCacheEntryDef.of(ENTRY_PARTITION, TablePartitionKey.class, TablePartitionValues.class,
                key -> new TablePartitionValues(), DEFAULT_ENTRY_CACHE_SPEC);
        fsViewEntryDef = MetaCacheEntryDef.of(ENTRY_FS_VIEW, HudiFsViewCacheKey.class, HoodieTableFileSystemView.class,
                this::createFsView, DEFAULT_ENTRY_CACHE_SPEC);
        metaClientEntryDef = MetaCacheEntryDef.of(ENTRY_META_CLIENT, HudiMetaClientCacheKey.class,
                HoodieTableMetaClient.class, this::createHoodieTableMetaClient, DEFAULT_ENTRY_CACHE_SPEC);
        schemaEntryDef = MetaCacheEntryDef.of(ENTRY_SCHEMA, HudiSchemaCacheKey.class, SchemaCacheValue.class,
                this::loadSchemaCacheValue, SCHEMA_CACHE_SPEC);
        registerMetaCacheEntryDef(partitionEntryDef);
        registerMetaCacheEntryDef(fsViewEntryDef);
        registerMetaCacheEntryDef(metaClientEntryDef);
        registerMetaCacheEntryDef(schemaEntryDef);
    }

    public HoodieTableMetaClient getHoodieTableMetaClient(
            long catalogId, NameMapping nameMapping, String hudiBasePath, Configuration conf) {
        return metaClientEntry(catalogId).get(HudiMetaClientCacheKey.of(nameMapping, hudiBasePath, conf));
    }

    public HoodieTableFileSystemView getFsView(
            long catalogId, String dbName, String tbName, HoodieTableMetaClient hudiClient) {
        return fsViewEntry(catalogId).get(HudiFsViewCacheKey.of(dbName, tbName, hudiClient));
    }

    public HudiSchemaCacheValue getHudiSchemaCacheValue(NameMapping nameMapping, long timestamp) {
        SchemaCacheValue schemaCacheValue = schemaEntry(nameMapping.getCtlId())
                .get(new HudiSchemaCacheKey(nameMapping, timestamp));
        return (HudiSchemaCacheValue) schemaCacheValue;
    }

    public TablePartitionValues getSnapshotPartitionValues(HMSExternalTable table,
            HoodieTableMetaClient tableMetaClient, String timestamp, boolean useHiveSyncPartition) {
        TablePartitionValues partitionValues = new TablePartitionValues();
        Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
        if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
            return partitionValues;
        }
        HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
        Option<HoodieInstant> lastInstant = timeline.lastInstant();
        if (!lastInstant.isPresent()) {
            return partitionValues;
        }
        long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
        if (Long.parseLong(timestamp) == lastTimestamp) {
            return getPartitionValues(table, tableMetaClient, useHiveSyncPartition);
        }
        List<String> partitionNameAndValues = HudiPartitionUtils.getPartitionNamesBeforeOrEquals(timeline, timestamp);
        List<String> partitionNames = Arrays.asList(partitionColumns.get());
        partitionValues.addPartitions(partitionNameAndValues,
                partitionNameAndValues.stream().map(p -> HudiPartitionUtils.parsePartitionValues(partitionNames, p))
                        .collect(Collectors.toList()), table.getHudiPartitionColumnTypes(Long.parseLong(timestamp)),
                Collections.nCopies(partitionNameAndValues.size(), 0L));
        partitionValues.setLastUpdateTimestamp(Long.parseLong(timestamp));
        return partitionValues;
    }

    public TablePartitionValues getPartitionValues(HMSExternalTable table, HoodieTableMetaClient tableMetaClient,
            boolean useHiveSyncPartition) throws CacheException {
        TablePartitionValues partitionValues = new TablePartitionValues();
        Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
        if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
            return partitionValues;
        }
        HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
        Option<HoodieInstant> lastInstant = timeline.lastInstant();
        if (!lastInstant.isPresent()) {
            return partitionValues;
        }
        try {
            long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
            partitionValues = partitionEntry(table.getCatalog().getId()).get(
                    new TablePartitionKey(table.getDbName(), table.getName(),
                            table.getHudiPartitionColumnTypes(lastTimestamp)));
            partitionValues.readLock().lock();
            try {
                long lastUpdateTimestamp = partitionValues.getLastUpdateTimestamp();
                if (lastTimestamp <= lastUpdateTimestamp) {
                    return partitionValues;
                }
            } finally {
                partitionValues.readLock().unlock();
            }

            partitionValues.writeLock().lock();
            try {
                HMSExternalCatalog catalog = (HMSExternalCatalog) table.getCatalog();
                List<String> partitionNames;
                if (useHiveSyncPartition) {
                    partitionNames = catalog.getClient()
                            .listPartitionNames(table.getRemoteDbName(), table.getRemoteName());
                    partitionNames = partitionNames.stream()
                            .map(FileUtils::unescapePathName)
                            .collect(Collectors.toList());
                    if (partitionNames.size() == 0) {
                        LOG.warn("Failed to get partitions from hms api, switch it from hudi api.");
                        partitionNames = HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
                    }
                } else {
                    partitionNames = HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
                }
                List<String> partitionColumnsList = Arrays.asList(partitionColumns.get());
                partitionValues.cleanPartitions();
                partitionValues.addPartitions(partitionNames,
                        partitionNames.stream()
                                .map(p -> HudiPartitionUtils.parsePartitionValues(partitionColumnsList, p))
                                .collect(Collectors.toList()), table.getHudiPartitionColumnTypes(lastTimestamp),
                        Collections.nCopies(partitionNames.size(), 0L));
                partitionValues.setLastUpdateTimestamp(lastTimestamp);
                return partitionValues;
            } finally {
                partitionValues.writeLock().unlock();
            }
        } catch (Exception e) {
            LOG.warn("Failed to get hudi partitions", e);
            throw new CacheException("Failed to get hudi partitions: " + Util.getRootCauseMessage(e), e);
        }
    }

    @Override
    public void invalidateDb(long catalogId, String dbName) {
        partitionEntry(catalogId).invalidateIf(key -> key.getDbName().equals(dbName));
        fsViewEntry(catalogId).invalidateIf(key -> key.getDbName().equals(dbName));
        metaClientEntry(catalogId).invalidateIf(
                key -> key.getNameMapping().getLocalDbName().equals(dbName));
        schemaEntry(catalogId).invalidateIf(
                key -> key.getNameMapping().getLocalDbName().equals(dbName));
    }

    @Override
    public void invalidateTable(long catalogId, String dbName, String tableName) {
        partitionEntry(catalogId).invalidateIf(
                key -> key.getDbName().equals(dbName) && key.getTblName().equals(tableName));
        fsViewEntry(catalogId).invalidateIf(
                key -> key.getDbName().equals(dbName) && key.getTbName().equals(tableName));
        metaClientEntry(catalogId).invalidateIf(key -> key.getNameMapping().getLocalDbName().equals(dbName)
                && key.getNameMapping().getLocalTblName().equals(tableName));
        schemaEntry(catalogId).invalidateIf(key -> key.getNameMapping().getLocalDbName().equals(dbName)
                && key.getNameMapping().getLocalTblName().equals(tableName));
    }

    @Override
    public void invalidatePartitions(long catalogId, String dbName, String tableName, List<String> partitions) {
        invalidateTable(catalogId, dbName, tableName);
    }

    private MetaCacheEntry<TablePartitionKey, TablePartitionValues> partitionEntry(long catalogId) {
        return entry(catalogId, partitionEntryDef);
    }

    private MetaCacheEntry<HudiFsViewCacheKey, HoodieTableFileSystemView> fsViewEntry(long catalogId) {
        return entry(catalogId, fsViewEntryDef);
    }

    private MetaCacheEntry<HudiMetaClientCacheKey, HoodieTableMetaClient> metaClientEntry(long catalogId) {
        return entry(catalogId, metaClientEntryDef);
    }

    private MetaCacheEntry<HudiSchemaCacheKey, SchemaCacheValue> schemaEntry(long catalogId) {
        return entry(catalogId, schemaEntryDef);
    }

    private HoodieTableFileSystemView createFsView(HudiFsViewCacheKey key) {
        HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
        HoodieLocalEngineContext ctx = new HoodieLocalEngineContext(key.getClient().getStorageConf());
        return FileSystemViewManager.createInMemoryFileSystemView(ctx, key.getClient(), metadataConfig);
    }

    private HoodieTableMetaClient createHoodieTableMetaClient(HudiMetaClientCacheKey key) {
        LOG.debug("create hudi table meta client for {}.{}", key.getNameMapping().getFullLocalName());
        HadoopStorageConfiguration hadoopStorageConfiguration = new HadoopStorageConfiguration(key.getConf());
        return HiveMetaStoreClientHelper.ugiDoAs(
                key.getConf(),
                () -> HoodieTableMetaClient.builder()
                        .setConf(hadoopStorageConfiguration)
                        .setBasePath(key.getHudiBasePath())
                        .build());
    }

    private SchemaCacheValue loadSchemaCacheValue(HudiSchemaCacheKey key) {
        ExternalTable dorisTable = findExternalTable(key.getNameMapping());
        return dorisTable.initSchemaAndUpdateTime(key).orElseThrow(() ->
                new CacheException("failed to load hudi schema cache value for: %s.%s.%s, timestamp: %s",
                        null, key.getNameMapping().getCtlId(), key.getNameMapping().getLocalDbName(),
                        key.getNameMapping().getLocalTblName(), key.getTimestamp()));
    }

    @Override
    protected Map<String, String> catalogPropertyCompatibilityMap() {
        return Collections.singletonMap(
                ExternalCatalog.SCHEMA_CACHE_TTL_SECOND,
                "meta.cache." + ENGINE + "." + ENTRY_SCHEMA + ".ttl-second");
    }

    private ExternalTable findExternalTable(NameMapping nameMapping) {
        CatalogIf<?> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(nameMapping.getCtlId());
        if (!(catalog instanceof ExternalCatalog)) {
            throw new CacheException("catalog %s is not external when loading hudi schema cache",
                    null, nameMapping.getCtlId());
        }
        ExternalCatalog externalCatalog = (ExternalCatalog) catalog;
        return externalCatalog.getDb(nameMapping.getLocalDbName())
                .flatMap(db -> db.getTable(nameMapping.getLocalTblName()))
                .orElseThrow(() -> new CacheException(
                        "table %s.%s.%s not found when loading hudi schema cache",
                        null, nameMapping.getCtlId(), nameMapping.getLocalDbName(),
                        nameMapping.getLocalTblName()));
    }
}