AzurePropertyUtils.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.property.storage;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
import org.apache.commons.lang3.StringUtils;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
public class AzurePropertyUtils {
/**
* Validates and normalizes an Azure Blob Storage URI into a unified {@code s3://}-style format.
* <p>
* This method supports the following URI formats:
* <ul>
* <li>HDFS-style Azure URIs: {@code wasb://}, {@code wasbs://}, {@code abfs://}, {@code abfss://}</li>
* <li>HTTPS-style Azure Blob URLs: {@code https://<account>.blob.core.windows.net/<container>/<path>}</li>
* </ul>
* <p>
* The normalized output will always be in the form of:
* <pre>{@code
* s3://<container>/<path>
* }</pre>
* <p>
* Examples:
* <ul>
* <li>{@code wasbs://container@account.blob.core.windows.net/data/file.txt}
* → {@code s3://container/data/file.txt}</li>
* <li>{@code https://account.blob.core.windows.net/container/file.csv}
* → {@code s3://container/file.csv}</li>
* </ul>
*
* @param path the input Azure URI string to be validated and normalized
* @return a normalized {@code s3://}-style URI
* @throws StoragePropertiesException if the URI is blank, invalid, or unsupported
*/
public static String validateAndNormalizeUri(String path) throws UserException {
if (StringUtils.isBlank(path)) {
throw new StoragePropertiesException("Path cannot be null or empty");
}
String lower = path.toLowerCase();
// Only accept Azure Blob Storage-related URI schemes
if (!(lower.startsWith("wasb://") || lower.startsWith("wasbs://")
|| lower.startsWith("abfs://") || lower.startsWith("abfss://")
|| lower.startsWith("https://") || lower.startsWith("http://")
|| lower.startsWith("s3://"))) {
throw new StoragePropertiesException("Unsupported Azure URI scheme: " + path);
}
return convertToS3Style(path);
}
/**
* Converts an Azure Blob Storage URI into a unified {@code s3://<container>/<path>} format.
* <p>
* This method recognizes both:
* <ul>
* <li>HDFS-style Azure URIs ({@code wasb://}, {@code wasbs://}, {@code abfs://}, {@code abfss://})</li>
* <li>HTTPS-style Azure Blob URLs ({@code https://<account>.blob.core.windows.net/...})</li>
* </ul>
* <p>
* It throws an exception if the URI is invalid or does not match Azure Blob Storage patterns.
*
* @param uri the original Azure URI string
* @return the normalized {@code s3://<container>/<path>} string
* @throws StoragePropertiesException if the URI is invalid or unsupported
*/
private static String convertToS3Style(String uri) {
if (StringUtils.isBlank(uri)) {
throw new StoragePropertiesException("URI is blank");
}
String lowerUri = uri.toLowerCase();
if (lowerUri.startsWith("s3://")) {
return lowerUri;
}
// Handle Azure HDFS-style URIs (wasb://, wasbs://, abfs://, abfss://)
if (lowerUri.startsWith("wasb://") || lowerUri.startsWith("wasbs://")
|| lowerUri.startsWith("abfs://") || lowerUri.startsWith("abfss://")) {
// Example: wasbs://container@account.blob.core.windows.net/path/file.txt
String schemeRemoved = uri.replaceFirst("^[a-z]+s?://", "");
int atIndex = schemeRemoved.indexOf('@');
if (atIndex < 0) {
throw new StoragePropertiesException("Invalid Azure URI, missing '@': " + uri);
}
// Extract container name (before '@')
String container = schemeRemoved.substring(0, atIndex);
// Extract remaining part after '@'
String remainder = schemeRemoved.substring(atIndex + 1);
int slashIndex = remainder.indexOf('/');
// Extract the path part if it exists
String path = (slashIndex != -1) ? remainder.substring(slashIndex + 1) : "";
// Normalize to s3-style URI: s3://<container>/<path>
return StringUtils.isBlank(path)
? String.format("s3://%s", container)
: String.format("s3://%s/%s", container, path);
}
// ② Handle HTTPS/HTTP Azure Blob Storage URLs
if (lowerUri.startsWith("https://") || lowerUri.startsWith("http://")) {
try {
URI parsed = new URI(uri);
String host = parsed.getHost();
String path = parsed.getPath();
if (StringUtils.isBlank(host)) {
throw new StoragePropertiesException("Invalid Azure HTTPS URI, missing host: " + uri);
}
// Typical Azure Blob domain: <account>.blob.core.windows.net
if (!host.contains(".blob.core.windows.net")) {
throw new StoragePropertiesException("Not an Azure Blob URL: " + uri);
}
// Path usually looks like: /<container>/<path>
String[] parts = path.split("/", 3);
if (parts.length < 2) {
throw new StoragePropertiesException("Invalid Azure Blob URL, missing container: " + uri);
}
String container = parts[1];
String remainder = (parts.length == 3) ? parts[2] : "";
// Convert HTTPS URL to s3-style format
return StringUtils.isBlank(remainder)
? String.format("s3://%s", container)
: String.format("s3://%s/%s", container, remainder);
} catch (URISyntaxException e) {
throw new StoragePropertiesException("Invalid HTTPS URI: " + uri, e);
}
}
throw new StoragePropertiesException("Unsupported Azure URI scheme: " + uri);
}
/**
* Extracts and validates the "uri" entry from a properties map.
*
* <p>Example:
* <pre>
* Input : {"uri": "wasb://container@account.blob.core.windows.net/dir/file.txt"}
* Output: "wasb://container@account.blob.core.windows.net/dir/file.txt"
* </pre>
*
* @param props the configuration map expected to contain a "uri" key
* @return the URI string from the map
* @throws StoragePropertiesException if the map is empty or missing the "uri" key
*/
public static String validateAndGetUri(Map<String, String> props) {
if (props == null || props.isEmpty()) {
throw new StoragePropertiesException("Properties map cannot be null or empty");
}
return props.entrySet().stream()
.filter(e -> StorageProperties.URI_KEY.equalsIgnoreCase(e.getKey()))
.map(Map.Entry::getValue)
.findFirst()
.orElseThrow(() -> new StoragePropertiesException("Properties must contain 'uri' key"));
}
}