IcebergHadoopExternalCatalog.java

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.datasource.iceberg;

import org.apache.doris.catalog.HdfsResource;
import org.apache.doris.datasource.CatalogProperty;
import org.apache.doris.datasource.property.PropertyConverter;
import org.apache.doris.datasource.property.storage.HdfsProperties;
import org.apache.doris.datasource.property.storage.StorageProperties;
import org.apache.doris.datasource.property.storage.StorageProperties.Type;

import com.google.common.base.Preconditions;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.CatalogProperties;
import org.apache.iceberg.hadoop.HadoopCatalog;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.Map;

public class IcebergHadoopExternalCatalog extends IcebergExternalCatalog {
    private static final Logger LOG = LogManager.getLogger(IcebergHadoopExternalCatalog.class);

    public IcebergHadoopExternalCatalog(long catalogId, String name, String resource, Map<String, String> props,
                                        String comment) {
        super(catalogId, name, comment);
        props = PropertyConverter.convertToMetaProperties(props);
        String warehouse = props.get(CatalogProperties.WAREHOUSE_LOCATION);
        Preconditions.checkArgument(StringUtils.isNotEmpty(warehouse),
                "Cannot initialize Iceberg HadoopCatalog because 'warehouse' must not be null or empty");
        catalogProperty = new CatalogProperty(resource, props);
        if (StringUtils.startsWith(warehouse, HdfsResource.HDFS_PREFIX)) {
            String nameService = StringUtils.substringBetween(warehouse, HdfsResource.HDFS_FILE_PREFIX, "/");
            if (StringUtils.isEmpty(nameService)) {
                throw new IllegalArgumentException("Unrecognized 'warehouse' location format"
                        + " because name service is required.");
            }
            catalogProperty.addProperty(HdfsResource.HADOOP_FS_NAME, HdfsResource.HDFS_FILE_PREFIX + nameService);
        }
    }

    @Override
    protected void initCatalog() {
        icebergCatalogType = ICEBERG_HADOOP;

        Configuration conf = getConfiguration();
        initS3Param(conf);
        // initialize hadoop catalog
        Map<String, String> catalogProperties = catalogProperty.getProperties();
        String warehouse = catalogProperty.getHadoopProperties().get(CatalogProperties.WAREHOUSE_LOCATION);
        HadoopCatalog hadoopCatalog = new HadoopCatalog();
        hadoopCatalog.setConf(conf);
        catalogProperties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouse);

        // TODO: This is a temporary solution to support Iceberg with HDFS Kerberos authentication.
        // Because currently, DelegateFileIO only support hdfs file operation,
        // and all we want to solve is to use the hdfs file operation in Iceberg to support Kerberos authentication.
        // Later, we should always set FILE_IO_IMPL to DelegateFileIO for all kinds of storages.
        // So, here we strictly check the storage property, if only has one storage property and is kerberos hdfs,
        // then we will use this file io impl.
        Map<StorageProperties.Type, StorageProperties> storagePropertiesMap = catalogProperty.getStoragePropertiesMap();
        if (storagePropertiesMap.size() == 1) {
            HdfsProperties hdfsProperties = (HdfsProperties) storagePropertiesMap.get(Type.HDFS);
            if (hdfsProperties != null && hdfsProperties.isKerberos()) {
                catalogProperties.put(CatalogProperties.FILE_IO_IMPL,
                        "org.apache.doris.datasource.iceberg.fileio.DelegateFileIO");
                LOG.info("use DelegateFileIO for catalog: {}:{}", getName(), getId());
            }
        }

        try {
            this.catalog = preExecutionAuthenticator.execute(() -> {
                hadoopCatalog.initialize(getName(), catalogProperties);
                return hadoopCatalog;
            });
        } catch (Exception e) {
            throw new RuntimeException("Hadoop catalog init error!", e);
        }
    }
}