HdfsPropertiesUtils.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.property.storage;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
import org.apache.commons.lang3.StringUtils;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Set;
public class HdfsPropertiesUtils {
private static final String URI_KEY = "uri";
private static final String STANDARD_HDFS_PREFIX = "hdfs://";
private static final String EMPTY_HDFS_PREFIX = "hdfs:///";
private static final String BROKEN_HDFS_PREFIX = "hdfs:/";
private static final String SCHEME_DELIM = "://";
private static final String NONSTANDARD_SCHEME_DELIM = ":/";
public static String validateAndGetUri(Map<String, String> props, String host, String defaultFs,
Set<String> supportSchemas) throws UserException {
if (props.isEmpty()) {
throw new UserException("props is empty");
}
String uriStr = getUri(props);
if (StringUtils.isBlank(uriStr)) {
throw new StoragePropertiesException("props must contain uri");
}
return validateAndNormalizeUri(uriStr, host, defaultFs, supportSchemas);
}
public static boolean validateUriIsHdfsUri(Map<String, String> props,
Set<String> supportSchemas) {
String uriStr = getUri(props);
if (StringUtils.isBlank(uriStr)) {
return false;
}
URI uri = URI.create(uriStr);
String schema = uri.getScheme();
if (StringUtils.isBlank(schema)) {
throw new IllegalArgumentException("Invalid uri: " + uriStr + ", extract schema is null");
}
return isSupportedSchema(schema, supportSchemas);
}
public static String extractDefaultFsFromPath(String filePath) {
if (StringUtils.isBlank(filePath)) {
return null;
}
URI uri = URI.create(filePath);
return uri.getScheme() + "://" + uri.getAuthority();
}
public static String extractDefaultFsFromUri(Map<String, String> props, Set<String> supportSchemas) {
String uriStr = getUri(props);
if (StringUtils.isBlank(uriStr)) {
return null;
}
URI uri = URI.create(uriStr);
if (!isSupportedSchema(uri.getScheme(), supportSchemas)) {
return null;
}
return uri.getScheme() + "://" + uri.getAuthority();
}
public static String convertUrlToFilePath(String uriStr, String host,
String defaultFs, Set<String> supportSchemas) {
return validateAndNormalizeUri(uriStr, host, defaultFs, supportSchemas);
}
public static String convertUrlToFilePath(String uriStr, String host, Set<String> supportSchemas) {
return validateAndNormalizeUri(uriStr, host, null, supportSchemas);
}
/*
* Extracts the URI value from the given properties.
* If multiple URIs are specified (separated by commas), this method returns null.
* Note: Some storage systems may support multiple URIs (e.g., for load balancing or multi-host),
* but in the HDFS scenario, fs.defaultFS only supports a single URI.
* Therefore, such a format is considered invalid for HDFS. so, just return null.
*/
private static String getUri(Map<String, String> props) {
String uriValue = props.entrySet().stream()
.filter(e -> e.getKey().equalsIgnoreCase(URI_KEY))
.map(Map.Entry::getValue)
.filter(StringUtils::isNotBlank)
.findFirst()
.orElse(null);
if (uriValue == null) {
return null;
}
String[] uris = uriValue.split(",");
if (uris.length > 1) {
return null;
}
return uriValue;
}
private static boolean isSupportedSchema(String schema, Set<String> supportSchema) {
return schema != null && supportSchema.contains(schema.toLowerCase());
}
public static String validateAndNormalizeUri(String location, Set<String> supportedSchemas) {
return validateAndNormalizeUri(location, null, null, supportedSchemas);
}
public static String validateAndNormalizeUri(String location, String defaultFs,
String host, Set<String> supportedSchemas) {
if (StringUtils.isBlank(location)) {
throw new IllegalArgumentException("Property 'uri' is required.");
}
if (!(location.contains(SCHEME_DELIM) || location.contains(NONSTANDARD_SCHEME_DELIM))
&& StringUtils.isNotBlank(defaultFs)) {
location = defaultFs + location;
}
try {
// Encode the location string, but keep '/' and ':' unescaped to preserve URI structure
String newLocation = URLEncoder.encode(location, StandardCharsets.UTF_8.name())
.replace("%2F", "/")
.replace("%3A", ":");
URI uri = new URI(newLocation).normalize();
boolean isSupportedSchema = isSupportedSchema(uri.getScheme(), supportedSchemas);
if (!isSupportedSchema) {
throw new IllegalArgumentException("Unsupported schema: " + uri.getScheme());
}
// compatible with 'hdfs:///' or 'hdfs:/'
if (StringUtils.isEmpty(uri.getHost())) {
newLocation = URLDecoder.decode(newLocation, StandardCharsets.UTF_8.name());
if (newLocation.startsWith(BROKEN_HDFS_PREFIX) && !newLocation.startsWith(STANDARD_HDFS_PREFIX)) {
newLocation = newLocation.replace(BROKEN_HDFS_PREFIX, STANDARD_HDFS_PREFIX);
}
if (StringUtils.isNotEmpty(host)) {
// Replace 'hdfs://key/' to 'hdfs://name_service/key/'
// Or hdfs:///abc to hdfs://name_service/abc
if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) {
return newLocation.replace(STANDARD_HDFS_PREFIX, STANDARD_HDFS_PREFIX + host);
} else {
return newLocation.replace(STANDARD_HDFS_PREFIX, STANDARD_HDFS_PREFIX + host + "/");
}
} else {
// 'hdfs://null/' equals the 'hdfs:///'
if (newLocation.startsWith(EMPTY_HDFS_PREFIX)) {
// Do not support hdfs:///location
throw new RuntimeException("Invalid location with empty host: " + newLocation);
} else {
// Replace 'hdfs://key/' to '/key/', try access local NameNode on BE.
return newLocation.replace(STANDARD_HDFS_PREFIX, "/");
}
}
}
// Normal case: decode and return the fully-qualified URI
return URLDecoder.decode(newLocation, StandardCharsets.UTF_8.name());
} catch (URISyntaxException | UnsupportedEncodingException e) {
throw new StoragePropertiesException("Failed to parse URI: " + location, e);
}
}
}