S3PropertyUtils.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.property.storage;
import org.apache.doris.common.UserException;
import org.apache.doris.common.util.S3URI;
import org.apache.doris.datasource.property.storage.exception.StoragePropertiesException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.Optional;
public class S3PropertyUtils {
private static final Logger LOG = LogManager.getLogger(S3PropertyUtils.class);
private static final String SCHEME_DELIM = "://";
private static final String S3_SCHEME_PREFIX = "s3://";
// S3-compatible schemes that can be converted to s3:// with simple string replacement
// Format: scheme://bucket/key -> s3://bucket/key
private static final String[] SIMPLE_S3_COMPATIBLE_SCHEMES = {
"s3a", "s3n", "oss", "cos", "cosn", "obs", "bos", "gs"
};
/**
* Constructs the S3 endpoint from a given URI in the props map.
*
* @param props the map containing the S3 URI, keyed by URI_KEY
* @param stringUsePathStyle whether to use path-style access ("true"/"false")
* @param stringForceParsingByStandardUri whether to force parsing using the standard URI format ("true"/"false")
* @return the extracted S3 endpoint or null if URI is invalid or parsing fails
* <p>
* Example:
* Input URI: "https://s3.us-west-1.amazonaws.com/my-bucket/my-key"
* Output: "s3.us-west-1.amazonaws.com"
*/
public static String constructEndpointFromUrl(Map<String, String> props,
String stringUsePathStyle,
String stringForceParsingByStandardUri) {
Optional<String> uriOptional = props.entrySet().stream()
.filter(e -> e.getKey().equalsIgnoreCase(StorageProperties.URI_KEY))
.map(Map.Entry::getValue)
.findFirst();
if (!uriOptional.isPresent()) {
return null;
}
String uri = uriOptional.get();
if (StringUtils.isBlank(uri)) {
return null;
}
boolean usePathStyle = Boolean.parseBoolean(stringUsePathStyle);
boolean forceParsingByStandardUri = Boolean.parseBoolean(stringForceParsingByStandardUri);
S3URI s3uri;
try {
s3uri = S3URI.create(uri, usePathStyle, forceParsingByStandardUri);
} catch (UserException e) {
throw new IllegalArgumentException("Invalid S3 URI: " + uri + ",usePathStyle: " + usePathStyle
+ " forceParsingByStandardUri: " + forceParsingByStandardUri, e);
}
return s3uri.getEndpoint().orElse(null);
}
/**
* Extracts the S3 region from a URI in the given props map.
*
* @param props the map containing the S3 URI, keyed by URI_KEY
* @param stringUsePathStyle whether to use path-style access ("true"/"false")
* @param stringForceParsingByStandardUri whether to force parsing using the standard URI format ("true"/"false")
* @return the extracted S3 region or null if URI is invalid or parsing fails
* <p>
* Example:
* Input URI: "https://s3.us-west-1.amazonaws.com/my-bucket/my-key"
* Output: "us-west-1"
*/
public static String constructRegionFromUrl(Map<String, String> props,
String stringUsePathStyle,
String stringForceParsingByStandardUri) {
Optional<String> uriOptional = props.entrySet().stream()
.filter(e -> e.getKey().equalsIgnoreCase(StorageProperties.URI_KEY))
.map(Map.Entry::getValue)
.findFirst();
if (!uriOptional.isPresent()) {
return null;
}
String uri = uriOptional.get();
if (StringUtils.isBlank(uri)) {
return null;
}
boolean usePathStyle = Boolean.parseBoolean(stringUsePathStyle);
boolean forceParsingByStandardUri = Boolean.parseBoolean(stringForceParsingByStandardUri);
S3URI s3uri = null;
try {
s3uri = S3URI.create(uri, usePathStyle, forceParsingByStandardUri);
} catch (UserException e) {
throw new IllegalArgumentException("Invalid S3 URI: " + uri + ",usePathStyle: " + usePathStyle
+ " forceParsingByStandardUri: " + forceParsingByStandardUri, e);
}
return s3uri.getRegion().orElse(null);
}
/**
* Validates and normalizes the given path into a standard S3 URI.
* If the input already starts with a known S3-compatible scheme (s3://, s3a://, oss://, etc.),
* it is returned as-is to avoid expensive regex parsing.
* Otherwise, it is parsed and converted into an S3-compatible URI format.
*
* @param path the raw S3-style path or full URI
* @param stringUsePathStyle whether to use path-style access ("true"/"false")
* @param stringForceParsingByStandardUri whether to force parsing using the standard URI format ("true"/"false")
* @return normalized S3 URI string like "s3://bucket/key"
* @throws UserException if the input path is blank or invalid
* <p>
* Example:
* Input: "https://s3.us-west-1.amazonaws.com/my-bucket/my-key"
* Output: "s3://my-bucket/my-key"
*/
public static String validateAndNormalizeUri(String path,
String stringUsePathStyle,
String stringForceParsingByStandardUri) throws UserException {
if (StringUtils.isBlank(path)) {
throw new StoragePropertiesException("path is null");
}
// Fast path 1: s3:// paths are already in the normalized format expected by BE
if (path.startsWith(S3_SCHEME_PREFIX)) {
return path;
}
// Fast path 2: simple S3-compatible schemes (oss://, cos://, s3a://, etc.)
// can be converted with simple string replacement: scheme://bucket/key -> s3://bucket/key
String normalized = trySimpleSchemeConversion(path);
if (normalized != null) {
return normalized;
}
// Full parsing path: for HTTP URLs and other complex formats
boolean usePathStyle = Boolean.parseBoolean(stringUsePathStyle);
boolean forceParsingByStandardUri = Boolean.parseBoolean(stringForceParsingByStandardUri);
S3URI s3uri = S3URI.create(path, usePathStyle, forceParsingByStandardUri);
return "s3" + S3URI.SCHEME_DELIM + s3uri.getBucket() + S3URI.PATH_DELIM + s3uri.getKey();
}
/**
* Try to convert simple S3-compatible scheme URIs to s3:// format using string replacement.
* This avoids expensive regex parsing for common cases like oss://bucket/key, s3a://bucket/key, etc.
*
* @param path the input path
* @return converted s3:// path if successful, null if the path doesn't match simple pattern
*/
private static String trySimpleSchemeConversion(String path) {
int delimIndex = path.indexOf(SCHEME_DELIM);
if (delimIndex <= 0) {
return null;
}
String scheme = path.substring(0, delimIndex).toLowerCase();
for (String compatibleScheme : SIMPLE_S3_COMPATIBLE_SCHEMES) {
if (compatibleScheme.equals(scheme)) {
String rest = path.substring(delimIndex + SCHEME_DELIM.length());
if (rest.isEmpty() || rest.startsWith(S3URI.PATH_DELIM) || rest.contains(SCHEME_DELIM)) {
return null;
}
// Simple conversion: replace scheme with "s3"
// e.g., "oss://bucket/key" -> "s3://bucket/key"
return S3_SCHEME_PREFIX + rest;
}
}
return null;
}
/**
* Extracts and returns the raw URI string from the given props map.
*
* @param props the map expected to contain a 'uri' entry
* @return the URI string from props
* @throws UserException if the map is empty or does not contain 'uri'
* <p>
* Example:
* Input: {"uri": "s3://my-bucket/my-key"}
* Output: "s3://my-bucket/my-key"
*/
public static String validateAndGetUri(Map<String, String> props) {
if (props.isEmpty()) {
throw new StoragePropertiesException("props is empty");
}
Optional<String> uriOptional = props.entrySet().stream()
.filter(e -> e.getKey().equalsIgnoreCase(StorageProperties.URI_KEY))
.map(Map.Entry::getValue)
.findFirst();
if (!uriOptional.isPresent()) {
throw new StoragePropertiesException("props must contain uri");
}
return uriOptional.get();
}
public static String convertPathToS3(String path) {
try {
URI orig = new URI(path);
URI s3url = new URI("s3", orig.getRawAuthority(),
orig.getRawPath(), orig.getRawQuery(), orig.getRawFragment());
return s3url.toString();
} catch (URISyntaxException e) {
return path;
}
}
}