ImportAction.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.httpv2.restv2;
import org.apache.doris.analysis.BrokerDesc;
import org.apache.doris.common.Config;
import org.apache.doris.common.UserException;
import org.apache.doris.common.parquet.ParquetReader;
import org.apache.doris.common.util.BrokerUtil;
import org.apache.doris.httpv2.entity.ResponseEntityBuilder;
import org.apache.doris.httpv2.rest.RestBaseController;
import org.apache.doris.thrift.TBrokerFileStatus;
import com.google.common.collect.Lists;
import lombok.Getter;
import lombok.Setter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@RestController
@RequestMapping("/rest/v2")
public class ImportAction extends RestBaseController {
private static final Logger LOG = LogManager.getLogger(ImportAction.class);
private static final long MAX_READ_LEN_BYTES = 1024 * 1024; // 1MB
private static final String FORMAT_CSV = "CSV";
private static final String FORMAT_PARQUET = "PARQUET";
private static final String FORMAT_ORC = "ORC";
private static final int MAX_SAMPLE_LINE = 50;
/**
* Request body:
* {
* "fileInfo": {
* "columnSeparator": ",",
* "fileUrl": "hdfs://127.0.0.1:50070/file/test/text*",
* "format": "TXT" // TXT or PARQUET
* },
* "connectInfo": { // Optional
* "brokerName" : "my_broker",
* "brokerProps" : {
* "username" : "yyy",
* "password" : "xxx"
* }
* }
* }
*/
@RequestMapping(path = "/api/import/file_review", method = RequestMethod.POST)
public Object fileReview(@RequestBody FileReviewRequestVo body,
HttpServletRequest request, HttpServletResponse response) {
if (needRedirect(request.getScheme())) {
return redirectToHttps(request);
}
if (Config.enable_all_http_auth) {
executeCheckPassword(request, response);
}
FileInfo fileInfo = body.getFileInfo();
ConnectInfo connectInfo = body.getConnectInfo();
BrokerDesc brokerDesc = new BrokerDesc(connectInfo.getBrokerName(), connectInfo.getBrokerProps());
List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
try {
// get file status
BrokerUtil.parseFile(fileInfo.getFileUrl(), brokerDesc, fileStatuses);
// create response
FileReviewResponseVo reviewResponseVo = createFileReviewResponse(brokerDesc, fileInfo, fileStatuses);
return ResponseEntityBuilder.ok(reviewResponseVo);
} catch (UserException e) {
return ResponseEntityBuilder.okWithCommonError(e.getMessage());
}
}
private FileReviewResponseVo createFileReviewResponse(BrokerDesc brokerDesc, FileInfo fileInfo,
List<TBrokerFileStatus> fileStatuses) throws UserException {
FileReviewResponseVo responseVo = new FileReviewResponseVo();
// set file review statistic
FileReviewStatistic statistic = new FileReviewStatistic();
statistic.setFileNumber(fileStatuses.size());
long totalFileSize = 0;
for (TBrokerFileStatus fStatus : fileStatuses) {
if (fStatus.isDir) {
throw new UserException("Not all matched paths are files: " + fStatus.path);
}
totalFileSize += fStatus.size;
}
statistic.setFileSize(totalFileSize);
responseVo.setReviewStatistic(statistic);
if (fileStatuses.isEmpty()) {
return responseVo;
}
// Begin to preview first file.
TBrokerFileStatus sampleFile = fileStatuses.get(0);
FileSample fileSample = new FileSample();
fileSample.setSampleFileName(sampleFile.path);
if (fileInfo.format.equalsIgnoreCase(FORMAT_CSV)) {
byte[] fileContentBytes = BrokerUtil.readFile(sampleFile.path, brokerDesc, MAX_READ_LEN_BYTES);
parseContent(fileInfo.columnSeparator, "\n", fileContentBytes, fileSample);
} else if (fileInfo.format.equalsIgnoreCase(FORMAT_PARQUET)) {
try {
ParquetReader parquetReader = ParquetReader.create(sampleFile.path, brokerDesc);
parseParquet(parquetReader, fileSample);
} catch (IOException e) {
LOG.warn("failed to get sample data of parquet file: {}", sampleFile.path, e);
throw new UserException("failed to get sample data of parquet file. " + e.getMessage());
}
} else {
throw new UserException("Only support CSV or PARQUET file format");
}
responseVo.setFileSample(fileSample);
return responseVo;
}
private void parseContent(String columnSeparator, String lineDelimiter, byte[] fileContentBytes,
FileSample fileSample) {
List<List<String>> sampleLines = Lists.newArrayList();
int maxColSize = 0;
String content = new String(fileContentBytes);
String[] lines = content.split(lineDelimiter);
for (String line : lines) {
if (sampleLines.size() >= MAX_SAMPLE_LINE) {
break;
}
String[] cols = line.split(columnSeparator);
List<String> row = Lists.newArrayList(cols);
sampleLines.add(row);
maxColSize = Math.max(maxColSize, row.size());
}
fileSample.setFileLineNumber(sampleLines.size());
fileSample.setMaxColumnSize(maxColSize);
fileSample.setSampleFileLines(sampleLines);
return;
}
private void parseParquet(ParquetReader reader, FileSample fileSample) throws IOException {
fileSample.setColNames(reader.getSchema(false));
fileSample.setMaxColumnSize(fileSample.colNames.size());
fileSample.setSampleFileLines(reader.getLines(MAX_SAMPLE_LINE));
fileSample.setFileLineNumber(fileSample.sampleFileLines.size());
}
@Getter
@Setter
public static class FileReviewRequestVo {
private FileInfo fileInfo;
private ConnectInfo connectInfo;
}
@Getter
@Setter
public static class FileInfo {
private String columnSeparator;
private String fileUrl;
private String format;
}
@Getter
@Setter
public static class ConnectInfo {
private String brokerName;
private Map<String, String> brokerProps;
}
@Getter
@Setter
public static class FileReviewResponseVo {
private FileReviewStatistic reviewStatistic;
private FileSample fileSample;
}
@Getter
@Setter
public static class FileReviewStatistic {
private int fileNumber;
private long fileSize;
}
@Getter
@Setter
public static class FileSample {
private String sampleFileName;
private int fileLineNumber;
private int maxColumnSize;
private List<String> colNames;
private List<List<String>> sampleFileLines;
}
}