PartitionNameParser.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// Copied from
// https://github.com/awslabs/aws-glue-data-catalog-client-for-apache-hive-metastore/blob/branch-3.4.0/
//
package com.amazonaws.glue.catalog.converters;
import com.amazonaws.glue.catalog.exceptions.InvalidPartitionNameException;
import com.google.common.collect.ImmutableSet;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
public class PartitionNameParser {
private static final String PARTITION_DELIMITER = "=";
private static final String PARTITION_NAME_DELIMITER = "/";
private static final char STORE_AS_NUMBER = 'n';
private static final char STORE_AS_STRING = 's';
private static final Set<String> NUMERIC_PARTITION_COLUMN_TYPES = ImmutableSet.of(
"tinyint",
"smallint",
"int",
"bigint"
);
public static String getPartitionName(List<String> partitionColumns, List<String> partitionValues) {
if (hasInvalidValues(partitionColumns, partitionValues) || hasInvalidSize(partitionColumns, partitionValues)) {
throw new IllegalArgumentException("Partition is not well formed. Columns and values do no match.");
}
StringBuilder partitionName = new StringBuilder();
partitionName.append(getPartitionColumnName(partitionColumns.get(0), partitionValues.get(0)));
for (int i = 1; i < partitionColumns.size(); i++) {
partitionName.append(PARTITION_NAME_DELIMITER);
partitionName.append(getPartitionColumnName(partitionColumns.get(i), partitionValues.get(i)));
}
return partitionName.toString();
}
private static boolean hasInvalidValues(List<String> partitionColumns, List<String> partitionValues) {
return partitionColumns == null || partitionValues == null;
}
private static boolean hasInvalidSize(List<String> partitionColumns, List<String> partitionValues) {
return partitionColumns.size() != partitionValues.size();
}
private static String getPartitionColumnName(String partitionColumn, String partitionValue) {
return partitionColumn + "=" + partitionValue;
}
public static LinkedHashMap<String, String> getPartitionColumns(String partitionName) {
LinkedHashMap<String, String> partitionColumns = new LinkedHashMap<>();
String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER);
for(String partition : partitions) {
Entry<String, String> entry = getPartitionColumnValuePair(partition);
partitionColumns.put(entry.getKey(), entry.getValue());
}
return partitionColumns;
}
/*
* Copied from https://github.com/apache/hive/blob/master/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
*/
public static String unescapePathName(String path) {
int len = path.length();
//pre-allocate sb to have enough buffer size, to avoid realloc
StringBuilder sb = new StringBuilder(len);
for (int i = 0; i < len; i++) {
char c = path.charAt(i);
if (c == '%' && i + 2 < len) {
int code = -1;
try {
code = Integer.parseInt(path.substring(i + 1, i + 3), 16);
} catch (Exception e) {
code = -1;
}
if (code >= 0) {
sb.append((char) code);
i += 2;
continue;
}
}
sb.append(c);
}
return sb.toString();
}
private static AbstractMap.SimpleEntry getPartitionColumnValuePair(String partition) {
String column = null;
String value = null;
String[] splitPartition = partition.split(PARTITION_DELIMITER);
if (splitPartition.length == 2) {
column = unescapePathName(splitPartition[0]);
value = unescapePathName(splitPartition[1]);
} else {
throw new InvalidPartitionNameException(partition);
}
return new AbstractMap.SimpleEntry(column, value);
}
public static List<String> getPartitionValuesFromName(String partitionName) {
List<String> partitionValues = new ArrayList<>();
String[] partitions = partitionName.split(PARTITION_NAME_DELIMITER);
for(String partition : partitions) {
Entry<String, String> entry = getPartitionColumnValuePair(partition);
partitionValues.add(entry.getValue());
}
return partitionValues;
}
}