HiveEngineCache.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.hive;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.common.Config;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.ExternalTable;
import org.apache.doris.datasource.SchemaCacheValue;
import org.apache.doris.datasource.metacache.CacheModule;
import org.apache.doris.datasource.metacache.CacheSpec;
import org.apache.doris.datasource.metacache.EngineMetaCache;
import org.apache.doris.datasource.metacache.EnginePartitionInfo;
import org.apache.doris.datasource.metacache.EngineSnapshot;
import org.apache.doris.datasource.mvcc.MvccSnapshot;
import com.github.benmanes.caffeine.cache.stats.CacheStats;
import com.google.common.collect.ImmutableList;
import org.apache.commons.collections4.CollectionUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
/**
* Engine-level cache adapter for Hive.
*
* <p>Wraps the existing {@link HiveMetaStoreCache} into the unified
* {@link EngineMetaCache} framework. Engine-specific operations like
* incremental partition management ({@code addPartitionsCache},
* {@code dropPartitionsCache}, {@code refreshAffectedPartitions}) and
* file listing ({@code getFilesByPartitions}) remain accessible through
* {@link #getMetaStoreCache()}.
*/
public class HiveEngineCache implements EngineMetaCache {
public static final String ENGINE_TYPE = "hive";
private static final String PARTITION_VALUES_MODULE_NAME = "partition-values";
private static final String PARTITION_MODULE_NAME = "partition";
private static final String FILE_MODULE_NAME = "file";
private final HiveMetaStoreCache metaStoreCache;
private List<CacheModule> cacheModules = ImmutableList.of();
public HiveEngineCache(HiveMetaStoreCache metaStoreCache) {
this.metaStoreCache = Objects.requireNonNull(metaStoreCache, "metaStoreCache cannot be null");
}
@Override
public String getEngineType() {
return ENGINE_TYPE;
}
@Override
public void init(ExternalCatalog catalog) {
ExternalCatalog nonNullCatalog = Objects.requireNonNull(catalog, "catalog cannot be null");
this.cacheModules = ImmutableList.of(
new HivePartitionValuesModule(nonNullCatalog, metaStoreCache),
new HivePartitionModule(nonNullCatalog, metaStoreCache),
new HiveFileModule(nonNullCatalog, metaStoreCache));
}
@Override
public List<CacheModule> getCacheModules() {
return cacheModules;
}
@Override
public void invalidateTable(ExternalTable table) {
// Use HiveMetaStoreCache's table invalidation which requires NameMapping
metaStoreCache.invalidateTableCache(table.getOrBuildNameMapping());
}
@Override
public EnginePartitionInfo getPartitionInfo(ExternalTable table, Optional<MvccSnapshot> snapshot) {
HMSExternalTable hmsTable = asHiveTable(table);
return new HivePartition(hmsTable.getHivePartitionValues(snapshot));
}
@Override
public EngineSnapshot getSnapshot(ExternalTable table, Optional<MvccSnapshot> snapshot) {
HMSExternalTable hmsTable = asHiveTable(table);
if (hmsTable.getPartitionType(snapshot) == PartitionType.UNPARTITIONED) {
return new HiveSnapshotMeta(hmsTable.getName(), hmsTable.getLastDdlTime());
}
HiveMetaStoreCache.HivePartitionValues partitionValues = hmsTable.getHivePartitionValues(snapshot);
List<org.apache.doris.datasource.hive.HivePartition> partitions = metaStoreCache.getAllPartitionsWithCache(
hmsTable, new ArrayList<>(partitionValues.getPartitionValuesMap().values()));
if (CollectionUtils.isEmpty(partitions)) {
return new HiveSnapshotMeta(hmsTable.getName(), 0L);
}
org.apache.doris.datasource.hive.HivePartition maxPartition = null;
long maxTimestamp = 0L;
List<Column> partitionColumns = hmsTable.getPartitionColumns(snapshot);
for (org.apache.doris.datasource.hive.HivePartition hivePartition : partitions) {
long currentTimestamp = hivePartition.getLastModifiedTime();
if (currentTimestamp > maxTimestamp) {
maxTimestamp = currentTimestamp;
maxPartition = hivePartition;
}
}
if (maxPartition == null) {
return new HiveSnapshotMeta(hmsTable.getName(), 0L);
}
return new HiveSnapshotMeta(maxPartition.getPartitionName(partitionColumns), maxTimestamp);
}
@Override
public SchemaCacheValue getSchema(ExternalTable table, long schemaVersion) {
throw new UnsupportedOperationException("Hive does not support getSchema via engine cache");
}
public HiveMetaStoreCache getMetaStoreCache() {
return metaStoreCache;
}
private HMSExternalTable asHiveTable(ExternalTable table) {
if (!(table instanceof HMSExternalTable)) {
throw new IllegalArgumentException("Expected HMSExternalTable for hive engine cache, but got "
+ table.getClass().getSimpleName());
}
HMSExternalTable hmsTable = (HMSExternalTable) table;
if (hmsTable.getDlaType() != HMSExternalTable.DLAType.HIVE) {
throw new IllegalArgumentException("Expected HIVE DLA type for hive engine cache, but got "
+ hmsTable.getDlaType() + ", table=" + hmsTable.getNameWithFullQualifiers());
}
return hmsTable;
}
public static final class HivePartition implements EnginePartitionInfo {
private final HiveMetaStoreCache.HivePartitionValues partitionValues;
public HivePartition(HiveMetaStoreCache.HivePartitionValues partitionValues) {
this.partitionValues = partitionValues;
}
public HiveMetaStoreCache.HivePartitionValues getPartitionValues() {
return partitionValues;
}
}
public static final class HiveSnapshotMeta implements EngineSnapshot {
private final String partitionName;
private final long timestamp;
public HiveSnapshotMeta(String partitionName, long timestamp) {
this.partitionName = partitionName;
this.timestamp = timestamp;
}
public String getPartitionName() {
return partitionName;
}
public long getTimestamp() {
return timestamp;
}
}
private static class HivePartitionValuesModule implements CacheModule {
private final HiveMetaStoreCache metaStoreCache;
private final CacheSpec cacheSpec;
HivePartitionValuesModule(ExternalCatalog catalog, HiveMetaStoreCache metaStoreCache) {
this.metaStoreCache = metaStoreCache;
this.cacheSpec = CacheSpec.fromUnifiedProperties(catalog.getProperties(),
ENGINE_TYPE, PARTITION_VALUES_MODULE_NAME, true,
Config.external_cache_expire_time_seconds_after_access, Config.max_hive_partition_table_cache_num);
}
@Override
public String getName() {
return PARTITION_VALUES_MODULE_NAME;
}
@Override
public CacheSpec getSpec() {
return cacheSpec;
}
@Override
public void invalidateAll() {
metaStoreCache.invalidateAll();
}
@Override
public void invalidateDb(String dbName) {
metaStoreCache.invalidateDbCache(dbName);
}
@Override
public void invalidateTable(String dbName, String tableName) {
// Table-level invalidation is handled by HiveEngineCache.invalidateTable()
// which uses the proper NameMapping. This is a no-op to avoid double invalidation.
}
@Override
public CacheStats getStats() {
return metaStoreCache.getPartitionValuesCacheStats();
}
@Override
public long getEstimatedSize() {
return metaStoreCache.getPartitionValuesCacheEstimatedSize();
}
}
private static class HivePartitionModule implements CacheModule {
private final HiveMetaStoreCache metaStoreCache;
private final CacheSpec cacheSpec;
HivePartitionModule(ExternalCatalog catalog, HiveMetaStoreCache metaStoreCache) {
this.metaStoreCache = metaStoreCache;
this.cacheSpec = CacheSpec.fromUnifiedProperties(catalog.getProperties(),
ENGINE_TYPE, PARTITION_MODULE_NAME, true,
Config.external_cache_expire_time_seconds_after_access, Config.max_hive_partition_cache_num);
}
@Override
public String getName() {
return PARTITION_MODULE_NAME;
}
@Override
public CacheSpec getSpec() {
return cacheSpec;
}
@Override
public void invalidateAll() {
// Already handled by partition-values module's invalidateAll()
}
@Override
public void invalidateDb(String dbName) {
// Already handled by partition-values module's invalidateDb()
}
@Override
public void invalidateTable(String dbName, String tableName) {
// Table-level invalidation is handled by HiveEngineCache.invalidateTable()
}
@Override
public CacheStats getStats() {
return metaStoreCache.getPartitionCacheStats();
}
@Override
public long getEstimatedSize() {
return metaStoreCache.getPartitionCacheEstimatedSize();
}
}
private static class HiveFileModule implements CacheModule {
private final HiveMetaStoreCache metaStoreCache;
private final CacheSpec cacheSpec;
HiveFileModule(ExternalCatalog catalog, HiveMetaStoreCache metaStoreCache) {
this.metaStoreCache = metaStoreCache;
this.cacheSpec = CacheSpec.fromUnifiedProperties(catalog.getProperties(),
ENGINE_TYPE, FILE_MODULE_NAME, true,
Config.external_cache_expire_time_seconds_after_access, Config.max_external_file_cache_num);
}
@Override
public String getName() {
return FILE_MODULE_NAME;
}
@Override
public CacheSpec getSpec() {
return cacheSpec;
}
@Override
public void invalidateAll() {
// Already handled by partition-values module's invalidateAll()
}
@Override
public void invalidateDb(String dbName) {
// Already handled by partition-values module's invalidateDb()
}
@Override
public void invalidateTable(String dbName, String tableName) {
// Table-level invalidation is handled by HiveEngineCache.invalidateTable()
}
@Override
public CacheStats getStats() {
return metaStoreCache.getFileCacheStats();
}
@Override
public long getEstimatedSize() {
return metaStoreCache.getFileCacheEstimatedSize();
}
}
}