HudiPartitionProcessor.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.hudi.source;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
public abstract class HudiPartitionProcessor {
public abstract void cleanUp();
public abstract void cleanDatabasePartitions(String dbName);
public abstract void cleanTablePartitions(String dbName, String tblName);
public String[] getPartitionColumns(HoodieTableMetaClient tableMetaClient) {
return tableMetaClient.getTableConfig().getPartitionFields().get();
}
public List<String> getAllPartitionNames(HoodieTableMetaClient tableMetaClient) throws IOException {
HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
.enable(HoodieTableMetadataUtil.isFilesPartitionAvailable(tableMetaClient))
.build();
HoodieTableMetadata newTableMetadata = HoodieTableMetadata.create(
new HudiLocalEngineContext(tableMetaClient.getStorageConf()), tableMetaClient.getStorage(),
metadataConfig,
tableMetaClient.getBasePathV2().toString(), true);
return newTableMetadata.getAllPartitionPaths();
}
public List<String> getPartitionNamesBeforeOrEquals(HoodieTimeline timeline, String timestamp) {
return new ArrayList<>(HoodieTableMetadataUtil.getWritePartitionPaths(
timeline.findInstantsBeforeOrEquals(timestamp).getInstants().stream().map(instant -> {
try {
return TimelineUtils.getCommitMetadata(instant, timeline);
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
}).collect(Collectors.toList())));
}
public List<String> getPartitionNamesInRange(HoodieTimeline timeline, String startTimestamp, String endTimestamp) {
return new ArrayList<>(HoodieTableMetadataUtil.getWritePartitionPaths(
timeline.findInstantsInRange(startTimestamp, endTimestamp).getInstants().stream().map(instant -> {
try {
return TimelineUtils.getCommitMetadata(instant, timeline);
} catch (IOException e) {
throw new RuntimeException(e.getMessage(), e);
}
}).collect(Collectors.toList())));
}
public static List<String> parsePartitionValues(List<String> partitionColumns, String partitionPath) {
if (partitionColumns.size() == 0) {
// This is a non-partitioned table
return Collections.emptyList();
}
String[] partitionFragments = partitionPath.split("/");
if (partitionFragments.length != partitionColumns.size()) {
if (partitionColumns.size() == 1) {
// If the partition column size is not equal to the partition fragment size
// and the partition column size is 1, we map the whole partition path
// to the partition column which can benefit from the partition prune.
String prefix = partitionColumns.get(0) + "=";
String partitionValue;
if (partitionPath.startsWith(prefix)) {
// support hive style partition path
partitionValue = partitionPath.substring(prefix.length());
} else {
partitionValue = partitionPath;
}
// TODO: In hive, the specific characters like '=', '/' will be url encoded
return Collections.singletonList(partitionValue);
} else {
// If the partition column size is not equal to the partition fragments size
// and the partition column size > 1, we do not know how to map the partition
// fragments to the partition columns and therefore return an empty tuple. We
// don't
// fail outright so that in some cases we can fallback to reading the table as
// non-partitioned
// one
throw new RuntimeException("Failed to parse partition values of path: " + partitionPath);
}
} else {
// If partitionSeqs.length == partitionSchema.fields.length
// Append partition name to the partition value if the
// HIVE_STYLE_PARTITIONING is disable.
// e.g. convert "/xx/xx/2021/02" to "/xx/xx/year=2021/month=02"
List<String> partitionValues = new ArrayList<>(partitionFragments.length);
for (int i = 0; i < partitionFragments.length; i++) {
String prefix = partitionColumns.get(i) + "=";
if (partitionFragments[i].startsWith(prefix)) {
partitionValues.add(partitionFragments[i].substring(prefix.length()));
} else {
partitionValues.add(partitionFragments[i]);
}
}
return partitionValues;
}
}
}