Separator.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.analysis;
import org.apache.doris.common.AnalysisException;
import com.google.common.base.Strings;
import java.io.StringWriter;
public class Separator implements ParseNode {
private static final String HEX_STRING = "0123456789ABCDEF";
private final String oriSeparator;
private String separator;
public Separator(String separator) {
this.oriSeparator = separator;
this.separator = null;
}
public Separator(String separator, String oriSeparator) {
this.oriSeparator = oriSeparator;
this.separator = separator;
}
public String getOriSeparator() {
return oriSeparator;
}
public String getSeparator() {
return separator;
}
private static byte[] hexStrToBytes(String hexStr) {
String upperHexStr = hexStr.toUpperCase();
int length = upperHexStr.length() / 2;
char[] hexChars = upperHexStr.toCharArray();
byte[] bytes = new byte[length];
for (int i = 0; i < length; i++) {
int pos = i * 2;
bytes[i] = (byte) (charToByte(hexChars[pos]) << 4 | charToByte(hexChars[pos + 1]));
}
return bytes;
}
private static byte charToByte(char c) {
return (byte) HEX_STRING.indexOf(c);
}
public void analyze() throws AnalysisException {
analyze(null);
}
@Override
public void analyze(Analyzer analyzer) throws AnalysisException {
this.separator = convertSeparator(oriSeparator);
}
public static String convertSeparator(String originStr) throws AnalysisException {
if (Strings.isNullOrEmpty(originStr)) {
throw new AnalysisException("Column separator is null or empty");
}
if (originStr.toUpperCase().startsWith("\\X")) {
// convert \x01\x02\x0a to 01020a
String hexStr = originStr.replaceAll("(?i)\\\\X", "");
// check hex str
if (hexStr.isEmpty()) {
throw new AnalysisException("Hex str is empty");
}
for (char hexChar : hexStr.toUpperCase().toCharArray()) {
if (HEX_STRING.indexOf(hexChar) == -1) {
throw new AnalysisException("Hex str format error");
}
}
if (hexStr.length() % 2 != 0) {
throw new AnalysisException("Hex str length error");
}
// transform to separator
StringWriter writer = new StringWriter();
for (byte b : hexStrToBytes(hexStr)) {
writer.append((char) b);
}
return writer.toString();
} else {
return unescape(originStr);
}
}
// unescape some invisible string literal to char, such as "/t" to char '/t'
private static String unescape(String orig) {
StringBuilder sb = new StringBuilder();
boolean hadSlash = false;
for (int i = 0; i < orig.length(); i++) {
char ch = orig.charAt(i);
if (hadSlash) {
hadSlash = false;
switch (ch) {
case '\\':
sb.append('\\');
break;
case '\'':
sb.append('\'');
break;
case '\"':
sb.append('"');
break;
case 'r':
sb.append('\r');
break;
case 't':
sb.append('\t');
break;
case 'n':
sb.append('\n');
break;
default:
sb.append('\\').append(ch);
break;
}
continue;
} else if (ch == '\\') {
if (i == orig.length() - 1) {
sb.append(ch);
} else {
hadSlash = true;
}
continue;
}
sb.append(ch);
}
return sb.toString();
}
public String toSql() {
StringBuilder sb = new StringBuilder();
sb.append("'").append(oriSeparator).append("'");
return sb.toString();
}
@Override
public String toString() {
return toSql();
}
}