DefaultConnectorContext.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.connector;

import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.FsBroker;
import org.apache.doris.cloud.security.SecurityChecker;
import org.apache.doris.common.CatalogConfigFileUtils;
import org.apache.doris.common.Config;
import org.apache.doris.common.EnvUtils;
import org.apache.doris.common.util.LocationPath;
import org.apache.doris.connector.api.ConnectorHttpSecurityHook;
import org.apache.doris.connector.spi.ConnectorBrokerAddress;
import org.apache.doris.connector.spi.ConnectorContext;
import org.apache.doris.connector.spi.ConnectorMetaInvalidator;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.datasource.ExternalCatalog;
import org.apache.doris.datasource.credentials.CredentialUtils;
import org.apache.doris.datasource.property.storage.StorageProperties;
import org.apache.doris.filesystem.FileEntry;
import org.apache.doris.filesystem.FileIterator;
import org.apache.doris.filesystem.FileSystem;
import org.apache.doris.filesystem.Location;
import org.apache.doris.fs.FileSystemFactory;
import org.apache.doris.fs.SpiSwitchingFileSystem;
import org.apache.doris.kerberos.ExecutionAuthenticator;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;

/**
 * Default implementation of {@link ConnectorContext}.
 *
 * <p>Provides the minimal catalog-level context that connector providers need
 * during creation. Additional context fields can be added here as the SPI evolves.
 */
public class DefaultConnectorContext implements ConnectorContext {

    private static final Logger LOG = LogManager.getLogger(DefaultConnectorContext.class);

    private static final ExecutionAuthenticator NOOP_AUTH = new ExecutionAuthenticator() {};

    private final String catalogName;
    private final long catalogId;
    private final Map<String, String> environment;
    private final Supplier<ExecutionAuthenticator> authSupplier;
    // Lazily supplies the catalog's static storage-properties map for storage-URI normalization
    // (FIX-URI-NORMALIZE). Invoked at scan time only (catalog fully initialized). Empty for ctors
    // that do not wire it ��� those callers (non-plugin catalogs) never invoke normalizeStorageUri.
    private final Supplier<Map<StorageProperties.Type, StorageProperties>> storagePropertiesSupplier;

    private final ConnectorHttpSecurityHook httpSecurityHook = new ConnectorHttpSecurityHook() {
        @Override
        public void beforeRequest(String url) throws Exception {
            SecurityChecker.getInstance().startSSRFChecking(url);
        }

        @Override
        public void afterRequest() {
            SecurityChecker.getInstance().stopSSRFChecking();
        }
    };

    public DefaultConnectorContext(String catalogName, long catalogId) {
        this(catalogName, catalogId, () -> NOOP_AUTH);
    }

    public DefaultConnectorContext(String catalogName, long catalogId,
            Supplier<ExecutionAuthenticator> authSupplier) {
        this(catalogName, catalogId, authSupplier, Collections::emptyMap);
    }

    public DefaultConnectorContext(String catalogName, long catalogId,
            Supplier<ExecutionAuthenticator> authSupplier,
            Supplier<Map<StorageProperties.Type, StorageProperties>> storagePropertiesSupplier) {
        this.catalogName = Objects.requireNonNull(catalogName, "catalogName");
        this.catalogId = catalogId;
        this.authSupplier = Objects.requireNonNull(authSupplier, "authSupplier");
        this.storagePropertiesSupplier =
                Objects.requireNonNull(storagePropertiesSupplier, "storagePropertiesSupplier");
        this.environment = buildEnvironment();
    }

    @Override
    public String getCatalogName() {
        return catalogName;
    }

    @Override
    public long getCatalogId() {
        return catalogId;
    }

    @Override
    public Map<String, String> getEnvironment() {
        return environment;
    }

    @Override
    public ConnectorHttpSecurityHook getHttpSecurityHook() {
        return httpSecurityHook;
    }

    @Override
    public ConnectorMetaInvalidator getMetaInvalidator() {
        return new ExternalMetaCacheInvalidator(catalogId);
    }

    @Override
    public String sanitizeJdbcUrl(String jdbcUrl) {
        try {
            return SecurityChecker.getInstance().getSafeJdbcUrl(jdbcUrl);
        } catch (Exception e) {
            throw new RuntimeException("JDBC URL security check failed: " + e.getMessage(), e);
        }
    }

    @Override
    public <T> T executeAuthenticated(Callable<T> task) throws Exception {
        return authSupplier.get().execute(task);
    }

    @Override
    public Map<String, String> loadHiveConfResources(String resources) {
        if (Strings.isNullOrEmpty(resources)) {
            return Collections.emptyMap();
        }
        // Reuse the EXACT legacy loader (same hadoop_config_dir base, comma-split, fail-if-missing)
        // so the file-resolution semantics are byte-identical to legacy HMSBaseProperties; only the
        // resolved key/values cross into the connector (no HiveConf/Configuration identity hazard).
        HiveConf hc = CatalogConfigFileUtils.loadHiveConfFromHiveConfDir(resources);
        Map<String, String> out = new HashMap<>();
        for (Map.Entry<String, String> e : hc) {   // HiveConf IS-A Iterable<Map.Entry<String,String>>
            out.put(e.getKey(), e.getValue());
        }
        return out;
    }

    @Override
    public Map<String, String> vendStorageCredentials(Map<String, String> rawVendedCredentials) {
        // Map the per-table vended token to the BE-facing AWS_* properties. Fail-soft (empty) on any
        // error, matching the legacy provider, so a malformed token degrades gracefully rather than
        // killing the scan. The outer try also covers getBackendPropertiesFromStorageMap so the
        // fail-soft boundary is byte-identical to the pre-refactor method; buildVendedStorageMap shares
        // the typed-map build with normalizeStorageUri (single source of truth ��� no drift).
        try {
            Map<StorageProperties.Type, StorageProperties> map = buildVendedStorageMap(rawVendedCredentials);
            return map == null ? Collections.emptyMap()
                    : CredentialUtils.getBackendPropertiesFromStorageMap(map);
        } catch (Exception e) {
            LOG.warn("Failed to normalize vended credentials", e);
            return Collections.emptyMap();
        }
    }

    /**
     * Builds the vended {@link StorageProperties} typed map from a raw per-table token: filter to
     * cloud-storage props, run {@link StorageProperties#createAll} (normalizes arbitrary token key
     * shapes + derives region/endpoint), then index by {@link StorageProperties.Type}. Mirrors the
     * legacy {@code AbstractVendedCredentialsProvider} tail exactly, so the BE-credential overlay
     * ({@link #vendStorageCredentials}) and the URI normalization ({@link #normalizeStorageUri(String,
     * Map)}) derive the SAME credentials from the SAME token ��� no drift. Returns {@code null} when the
     * token is null/empty, yields no cloud-storage props, or normalization throws ��� replicating the
     * legacy provider's "return null ��� Factory falls back to the base/static map" contract.
     */
    private Map<StorageProperties.Type, StorageProperties> buildVendedStorageMap(
            Map<String, String> rawVendedCredentials) {
        if (rawVendedCredentials == null || rawVendedCredentials.isEmpty()) {
            return null;
        }
        try {
            Map<String, String> filtered = CredentialUtils.filterCloudStorageProperties(rawVendedCredentials);
            if (filtered.isEmpty()) {
                return null;
            }
            List<StorageProperties> vended = StorageProperties.createAll(filtered);
            return vended.stream()
                    .collect(Collectors.toMap(StorageProperties::getType, Function.identity()));
        } catch (Exception e) {
            LOG.warn("Failed to normalize vended credentials", e);
            return null;
        }
    }

    @Override
    public Map<String, String> getBackendStorageProperties() {
        // Mirror legacy PaimonScanNode.getLocationProperties(): translate the catalog's parsed
        // StorageProperties map into BE-canonical scan keys (AWS_* for object stores, hadoop/dfs for
        // HDFS) via the SAME CredentialUtils.getBackendPropertiesFromStorageMap legacy/iceberg/hive use
        // ��� single source of truth, no drift. The map is already validated at catalog creation, so this
        // does not throw; an empty map (non-plugin ctor / local-FS warehouse) yields an empty result
        // (no overlay) ��� correct parity, unlike normalizeStorageUri which must fail-loud on a bad path.
        return CredentialUtils.getBackendPropertiesFromStorageMap(storagePropertiesSupplier.get());
    }

    @Override
    public List<org.apache.doris.filesystem.properties.StorageProperties> getStorageProperties() {
        // Hand the connector the catalog's storage bound as typed fe-filesystem StorageProperties
        // (design D-003): the connector derives its Hadoop/HiveConf config and BE creds from these
        // without importing fe-core or any provider. Source the catalog raw map from the existing
        // storage supplier's getOrigProps() (every parsed StorageProperties carries the full catalog
        // map -- StorageProperties.createAll passes it through), then bind it via the live
        // plugin-loaded FileSystemPluginManager. An empty supplier (non-plugin ctor / REST-vended /
        // credential-less warehouse) yields an empty list -- no static storage, correct parity.
        Map<StorageProperties.Type, StorageProperties> typed = storagePropertiesSupplier.get();
        if (typed == null || typed.isEmpty()) {
            return Collections.emptyList();
        }
        Map<String, String> rawCatalogProps = typed.values().iterator().next().getOrigProps();
        if (rawCatalogProps == null || rawCatalogProps.isEmpty()) {
            return Collections.emptyList();
        }
        return FileSystemFactory.bindAllStorageProperties(rawCatalogProps);
    }

    @Override
    public String normalizeStorageUri(String rawUri) {
        // No vended token ��� normalize against the catalog's static storage map (behavior unchanged).
        return normalizeStorageUri(rawUri, null);
    }

    @Override
    public String normalizeStorageUri(String rawUri, Map<String, String> rawVendedCredentials) {
        if (Strings.isNullOrEmpty(rawUri)) {
            return rawUri;
        }
        // Mirror legacy PaimonScanNode's 2-arg LocationPath.of(path, storagePropertiesMap):
        // scheme-normalize (oss/cos/obs/s3a -> s3, OSS bucket.endpoint -> bucket) so BE's
        // scheme-dispatched S3 factory can open the file. The storage map follows legacy
        // VendedCredentialsFactory precedence: when the connector supplies a per-table vended token
        // (REST catalogs, whose static map is empty by design) the VENDED map REPLACES the static map;
        // otherwise the catalog's static storage map is used. Fail-loud (StoragePropertiesException
        // propagates) ��� a path that cannot be normalized would otherwise silently corrupt reads (esp. a
        // deletion-vector path on merge-on-read). Single source of truth: the SAME LocationPath
        // normalization legacy/iceberg/hive use, so no drift.
        Map<StorageProperties.Type, StorageProperties> vended = buildVendedStorageMap(rawVendedCredentials);
        Map<StorageProperties.Type, StorageProperties> effective =
                vended != null ? vended : storagePropertiesSupplier.get();
        return LocationPath.of(rawUri, effective).toStorageLocation().toString();
    }

    @Override
    public String getBackendFileType(String rawUri, Map<String, String> rawVendedCredentials) {
        // Same LocationPath build as normalizeStorageUri (vended-aware), then read the BE file type from
        // it ��� authoritative over the scheme-only default because it also detects a broker-backed path via
        // the storage properties. Returns the TFileType enum NAME (the SPI stays Thrift-free). Mirrors
        // legacy IcebergTableSink.bindDataSink's
        // LocationPath.of(originalLocation, storagePropertiesMap).getTFileTypeForBE().
        Map<StorageProperties.Type, StorageProperties> vended = buildVendedStorageMap(rawVendedCredentials);
        Map<StorageProperties.Type, StorageProperties> effective =
                vended != null ? vended : storagePropertiesSupplier.get();
        return LocationPath.of(rawUri, effective).getTFileTypeForBE().name();
    }

    @Override
    public List<ConnectorBrokerAddress> getBrokerAddresses() {
        // Engine-side resolution of the catalog's broker backend (the connector cannot reach BrokerMgr /
        // bindBrokerName). Mirrors legacy BaseExternalTableDataSink.getBrokerAddresses: the catalog's bound
        // broker name -> getBrokers(name) (or getAllBrokers() when unbound) -> host/port, shuffled for
        // load-balance. Returns empty when none is alive; the connector turns that into a fail-loud
        // "No alive broker." for a FILE_BROKER write (this hook is only consulted for that target).
        CatalogIf<?> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(catalogId);
        String bindBroker = catalog instanceof ExternalCatalog
                ? ((ExternalCatalog) catalog).bindBrokerName() : null;
        List<FsBroker> brokers = bindBroker != null
                ? Env.getCurrentEnv().getBrokerMgr().getBrokers(bindBroker)
                : Env.getCurrentEnv().getBrokerMgr().getAllBrokers();
        if (brokers == null || brokers.isEmpty()) {
            return Collections.emptyList();
        }
        Collections.shuffle(brokers);
        List<ConnectorBrokerAddress> result = new ArrayList<>(brokers.size());
        for (FsBroker broker : brokers) {
            result.add(new ConnectorBrokerAddress(broker.host, broker.port));
        }
        return result;
    }

    @Override
    public void cleanupEmptyManagedLocation(String location, List<String> tableChildDirs) {
        // Engine-side companion to a connector drop: prune the empty directory shells the connector's drop
        // leaves behind. The connector decides WHEN (e.g. iceberg HMS-only) and captures the location before
        // the drop; here we own the fe-filesystem machinery it cannot reach (SpiSwitchingFileSystem from the
        // catalog's storage properties). Best-effort: a missing storage binding or any IO failure is logged,
        // never propagated ��� cleanup is cosmetic and must not fail the completed drop. Conservative: a
        // directory is removed only when it contains no files (deleteEmptyDirectory aborts on the first file).
        if (Strings.isNullOrEmpty(location)) {
            return;
        }
        Map<StorageProperties.Type, StorageProperties> storageProperties = storagePropertiesSupplier.get();
        if (storageProperties == null || storageProperties.isEmpty()) {
            return;
        }
        try (FileSystem fs = new SpiSwitchingFileSystem(storageProperties)) {
            boolean deleted = (tableChildDirs == null || tableChildDirs.isEmpty())
                    ? deleteEmptyDirectory(fs, Location.of(location))
                    : deleteEmptyTableLocation(fs, Location.of(location), tableChildDirs);
            if (deleted) {
                LOG.info("Cleaned empty managed location {}", location);
            } else {
                LOG.info("Skip cleaning managed location {}, it still contains files", location);
            }
        } catch (Exception e) {
            LOG.warn("Failed to clean managed location {} after drop", location, e);
        }
    }

    /**
     * Deletes the engine-format child directories ({@code tableChildDirs}, e.g. iceberg
     * {@code ["data", "metadata"]}) under {@code location} first, then {@code location} itself ��� each only
     * when empty. Port of legacy {@code IcebergMetadataOps.deleteEmptyTableLocation}.
     */
    @VisibleForTesting
    static boolean deleteEmptyTableLocation(FileSystem fs, Location location, List<String> tableChildDirs)
            throws IOException {
        for (String childDir : tableChildDirs) {
            if (!deleteEmptyDirectory(fs, location.resolve(childDir))) {
                return false;
            }
        }
        return deleteEmptyDirectory(fs, location);
    }

    /**
     * Recursively removes {@code location} iff it (transitively) contains no files: it aborts (returns
     * {@code false}) on the first non-directory entry, so live data is never deleted. Port of legacy
     * {@code IcebergMetadataOps.deleteEmptyDirectory}.
     */
    @VisibleForTesting
    static boolean deleteEmptyDirectory(FileSystem fs, Location location) throws IOException {
        if (!fs.exists(location)) {
            return true;
        }
        List<Location> childDirectories = new ArrayList<>();
        try (FileIterator iterator = fs.list(location)) {
            while (iterator.hasNext()) {
                FileEntry entry = iterator.next();
                if (!entry.isDirectory()) {
                    return false;
                }
                childDirectories.add(entry.location());
            }
        }
        for (Location childDirectory : childDirectories) {
            if (!deleteEmptyDirectory(fs, childDirectory)) {
                return false;
            }
        }
        return deleteEmptyDirectoryMarker(fs, location);
    }

    /** Deletes the (empty) directory marker for {@code location}. Port of legacy {@code IcebergMetadataOps}. */
    private static boolean deleteEmptyDirectoryMarker(FileSystem fs, Location location) throws IOException {
        Location directoryMarker = Location.of(withTrailingSlash(location.uri()));
        try {
            fs.delete(directoryMarker, false);
        } catch (IOException e) {
            return !fs.exists(location);
        }
        return !fs.exists(location);
    }

    private static String withTrailingSlash(String uri) {
        return uri.endsWith("/") ? uri : uri + "/";
    }

    private static Map<String, String> buildEnvironment() {
        Map<String, String> env = new HashMap<>();
        String dorisHome = EnvUtils.getDorisHome();
        if (dorisHome != null) {
            env.put("doris_home", dorisHome);
        }
        env.put("jdbc_drivers_dir", Config.jdbc_drivers_dir);
        env.put("force_sqlserver_jdbc_encrypt_false",
                String.valueOf(Config.force_sqlserver_jdbc_encrypt_false));
        env.put("jdbc_driver_secure_path", Config.jdbc_driver_secure_path);
        // HMS metastore client socket-timeout default (C4): the metastore-spi cannot read FE Config
        // (no fe-common dependency), so the FE-configured value is threaded through the environment and
        // applied by HmsMetaStoreProperties.toHiveConfOverrides when the user has not overridden it.
        env.put("hive_metastore_client_timeout_second",
                String.valueOf(Config.hive_metastore_client_timeout_second));
        // The trino-connector plugin runs in an isolated classloader and cannot read FE
        // Config (it would see its own bundled copy with default values). Pass the
        // configured plugin dir through the engine environment instead.
        env.put("trino_connector_plugin_dir", Config.trino_connector_plugin_dir);
        return Collections.unmodifiableMap(env);
    }
}