HudiExternalMetaCache.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.hudi;

import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.NameMapping;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.TablePartitionValues;
import org.apache.doris.datasource.hive.HMSExternalCatalog;
import org.apache.doris.datasource.hive.HMSExternalTable;
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
import org.apache.doris.datasource.metacache.AbstractExternalMetaCache;
import org.apache.doris.datasource.metacache.MetaCacheEntryDef;
import org.apache.doris.datasource.metacache.MetaCacheEntryInvalidation;

import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.storage.hadoop.HadoopStorageConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;

/**
 * Hudi engine implementation of {@link AbstractExternalMetaCache}.
 *
 * <p>Registered entries:
 * <ul>
 *   <li>{@code partition}: partition metadata keyed by table identity + snapshot timestamp + mode</li>
 *   <li>{@code fs_view}: {@link HoodieTableFileSystemView} keyed by {@link NameMapping}</li>
 *   <li>{@code meta_client}: {@link HoodieTableMetaClient} keyed by {@link NameMapping}</li>
 *   <li>{@code schema}: Hudi schema cache keyed by table identity + timestamp</li>
 * </ul>
 *
 * <p>Invalidation behavior:
 * <ul>
 *   <li>db/table invalidation clears all four entries for matching keys</li>
 *   <li>partition-level invalidation currently falls back to table-level invalidation</li>
 * </ul>
 */
public class HudiExternalMetaCache extends AbstractExternalMetaCache {
    private static final Logger LOG = LogManager.getLogger(HudiExternalMetaCache.class);

    public static final String ENGINE = "hudi";
    public static final String ENTRY_PARTITION = "partition";
    public static final String ENTRY_FS_VIEW = "fs_view";
    public static final String ENTRY_META_CLIENT = "meta_client";
    public static final String ENTRY_SCHEMA = "schema";

    private final EntryHandle<HudiPartitionCacheKey, TablePartitionValues> partitionEntry;
    private final EntryHandle<HudiFsViewCacheKey, HoodieTableFileSystemView> fsViewEntry;
    private final EntryHandle<HudiMetaClientCacheKey, HoodieTableMetaClient> metaClientEntry;
    private final EntryHandle<HudiSchemaCacheKey, SchemaCacheValue> schemaEntry;

    public HudiExternalMetaCache(ExecutorService refreshExecutor) {
        super(ENGINE, refreshExecutor);
        partitionEntry = registerEntry(MetaCacheEntryDef.of(ENTRY_PARTITION, HudiPartitionCacheKey.class,
                TablePartitionValues.class, this::loadPartitionValuesCacheValue, defaultEntryCacheSpec(),
                MetaCacheEntryInvalidation.forNameMapping(HudiPartitionCacheKey::getNameMapping)));
        fsViewEntry = registerEntry(MetaCacheEntryDef.of(ENTRY_FS_VIEW, HudiFsViewCacheKey.class,
                HoodieTableFileSystemView.class, this::createFsView, defaultEntryCacheSpec(),
                MetaCacheEntryInvalidation.forNameMapping(HudiFsViewCacheKey::getNameMapping)));
        metaClientEntry = registerEntry(MetaCacheEntryDef.of(ENTRY_META_CLIENT, HudiMetaClientCacheKey.class,
                HoodieTableMetaClient.class, this::createHoodieTableMetaClient, defaultEntryCacheSpec(),
                MetaCacheEntryInvalidation.forNameMapping(HudiMetaClientCacheKey::getNameMapping)));
        schemaEntry = registerEntry(MetaCacheEntryDef.of(ENTRY_SCHEMA, HudiSchemaCacheKey.class,
                SchemaCacheValue.class, this::loadSchemaCacheValue, defaultSchemaCacheSpec(),
                MetaCacheEntryInvalidation.forNameMapping(HudiSchemaCacheKey::getNameMapping)));
    }

    public HoodieTableMetaClient getHoodieTableMetaClient(NameMapping nameMapping) {
        return metaClientEntry.get(nameMapping.getCtlId()).get(HudiMetaClientCacheKey.of(nameMapping));
    }

    public HoodieTableFileSystemView getFsView(NameMapping nameMapping) {
        return fsViewEntry.get(nameMapping.getCtlId()).get(HudiFsViewCacheKey.of(nameMapping));
    }

    public HudiSchemaCacheValue getHudiSchemaCacheValue(NameMapping nameMapping, long timestamp) {
        SchemaCacheValue schemaCacheValue = schemaEntry.get(nameMapping.getCtlId())
                .get(new HudiSchemaCacheKey(nameMapping, timestamp));
        return (HudiSchemaCacheValue) schemaCacheValue;
    }

    public TablePartitionValues getSnapshotPartitionValues(HMSExternalTable table,
            String timestamp, boolean useHiveSyncPartition) {
        return partitionEntry.get(table.getCatalog().getId()).get(
                HudiPartitionCacheKey.of(table.getOrBuildNameMapping(), Long.parseLong(timestamp),
                        useHiveSyncPartition));
    }

    public TablePartitionValues getPartitionValues(HMSExternalTable table, boolean useHiveSyncPartition)
            throws CacheException {
        HoodieTableMetaClient tableMetaClient = getHoodieTableMetaClient(table.getOrBuildNameMapping());
        TablePartitionValues emptyPartitionValues = new TablePartitionValues();
        Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
        if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
            return emptyPartitionValues;
        }
        HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
        Option<HoodieInstant> lastInstant = timeline.lastInstant();
        if (!lastInstant.isPresent()) {
            return emptyPartitionValues;
        }
        long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
        return partitionEntry.get(table.getCatalog().getId()).get(
                HudiPartitionCacheKey.of(table.getOrBuildNameMapping(), lastTimestamp, useHiveSyncPartition));
    }

    private HoodieTableFileSystemView createFsView(HudiFsViewCacheKey key) {
        HoodieTableMetaClient tableMetaClient = metaClientEntry.get(key.getNameMapping().getCtlId())
                .get(HudiMetaClientCacheKey.of(key.getNameMapping()));
        HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder().build();
        HoodieLocalEngineContext ctx = new HoodieLocalEngineContext(tableMetaClient.getStorageConf());
        return FileSystemViewManager.createInMemoryFileSystemView(ctx, tableMetaClient, metadataConfig);
    }

    private HoodieTableMetaClient createHoodieTableMetaClient(HudiMetaClientCacheKey key) {
        LOG.debug("create hudi table meta client for {}.{}", key.getNameMapping().getFullLocalName());
        HMSExternalTable hudiTable = findHudiTable(key.getNameMapping());
        HadoopStorageConfiguration hadoopStorageConfiguration =
                new HadoopStorageConfiguration(hudiTable.getCatalog().getConfiguration());
        return HiveMetaStoreClientHelper.ugiDoAs(
                hudiTable.getCatalog().getConfiguration(),
                () -> HoodieTableMetaClient.builder()
                        .setConf(hadoopStorageConfiguration)
                        .setBasePath(hudiTable.getRemoteTable().getSd().getLocation())
                        .build());
    }

    private TablePartitionValues loadPartitionValuesCacheValue(HudiPartitionCacheKey key) {
        HMSExternalTable hudiTable = findHudiTable(key.getNameMapping());
        HoodieTableMetaClient tableMetaClient = getHoodieTableMetaClient(key.getNameMapping());
        return loadPartitionValues(hudiTable, tableMetaClient, key.getTimestamp(), key.isUseHiveSyncPartition());
    }

    private TablePartitionValues loadPartitionValues(HMSExternalTable table, HoodieTableMetaClient tableMetaClient,
            long timestamp, boolean useHiveSyncPartition) {
        try {
            TablePartitionValues partitionValues = new TablePartitionValues();
            Option<String[]> partitionColumns = tableMetaClient.getTableConfig().getPartitionFields();
            if (!partitionColumns.isPresent() || partitionColumns.get().length == 0) {
                return partitionValues;
            }
            HoodieTimeline timeline = tableMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
            List<String> partitionNames = loadPartitionNames(table, tableMetaClient, timeline, timestamp,
                    useHiveSyncPartition);
            List<String> partitionColumnsList = Arrays.asList(partitionColumns.get());
            partitionValues.addPartitions(partitionNames,
                    partitionNames.stream()
                            .map(partition -> HudiPartitionUtils.parsePartitionValues(partitionColumnsList, partition))
                            .collect(Collectors.toList()),
                    table.getHudiPartitionColumnTypes(timestamp),
                    Collections.nCopies(partitionNames.size(), 0L));
            partitionValues.setLastUpdateTimestamp(timestamp);
            return partitionValues;
        } catch (Exception e) {
            LOG.warn("Failed to get hudi partitions", e);
            throw new CacheException("Failed to get hudi partitions: " + Util.getRootCauseMessage(e), e);
        }
    }

    private List<String> loadPartitionNames(HMSExternalTable table, HoodieTableMetaClient tableMetaClient,
            HoodieTimeline timeline, long timestamp, boolean useHiveSyncPartition) throws Exception {
        Option<HoodieInstant> lastInstant = timeline.lastInstant();
        if (!lastInstant.isPresent()) {
            return Collections.emptyList();
        }
        long lastTimestamp = Long.parseLong(lastInstant.get().requestedTime());
        if (timestamp != lastTimestamp) {
            return HudiPartitionUtils.getPartitionNamesBeforeOrEquals(timeline, String.valueOf(timestamp));
        }
        if (!useHiveSyncPartition) {
            return HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
        }
        HMSExternalCatalog catalog = (HMSExternalCatalog) table.getCatalog();
        List<String> partitionNames = catalog.getClient()
                .listPartitionNames(table.getRemoteDbName(), table.getRemoteName());
        partitionNames = partitionNames.stream().map(FileUtils::unescapePathName).collect(Collectors.toList());
        if (partitionNames.isEmpty()) {
            LOG.warn("Failed to get partitions from hms api, switch it from hudi api.");
            return HudiPartitionUtils.getAllPartitionNames(tableMetaClient);
        }
        return partitionNames;
    }

    private HMSExternalTable findHudiTable(NameMapping nameMapping) {
        ExternalTable dorisTable = findExternalTable(nameMapping, ENGINE);
        if (!(dorisTable instanceof HMSExternalTable)) {
            throw new CacheException("table %s.%s.%s is not hms external table when loading hudi cache",
                    null, nameMapping.getCtlId(), nameMapping.getLocalDbName(), nameMapping.getLocalTblName());
        }
        return (HMSExternalTable) dorisTable;
    }

    private SchemaCacheValue loadSchemaCacheValue(HudiSchemaCacheKey key) {
        ExternalTable dorisTable = findExternalTable(key.getNameMapping(), ENGINE);
        return dorisTable.initSchemaAndUpdateTime(key).orElseThrow(() ->
                new CacheException("failed to load hudi schema cache value for: %s.%s.%s, timestamp: %s",
                        null, key.getNameMapping().getCtlId(), key.getNameMapping().getLocalDbName(),
                        key.getNameMapping().getLocalTblName(), key.getTimestamp()));
    }

    @Override
    protected Map<String, String> catalogPropertyCompatibilityMap() {
        return singleCompatibilityMap(ExternalCatalog.SCHEMA_CACHE_TTL_SECOND, ENTRY_SCHEMA);
    }
}