MarkDownParser.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.common;
import org.apache.doris.qe.help.HelpTopic;
import com.google.common.collect.Maps;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A simple MarkDownParser to parser the help topic
* eg: sql-reference, sql-functions.
* <p>
* Each topic must have following structure:
* ## Topic Name
* ### Description // required
* ### Example // optional
* ### Keywords // required
* other fields are optional
* <p>
* <p>
* It is allowed to have multi topic in one file.
*/
public class MarkDownParser {
private enum ParseState {
START,
PARSED_H1,
PARSED_H2
}
private static final byte SINGLE_POUND_SIGN = '#';
private Map<String, Map<String, String>> documents;
private List<String> lines;
private int nextToRead;
private ParseState state;
private int headLevel;
private String head;
// Temp map used to store parsed keyValues;
private Map<String, String> keyValues;
public MarkDownParser(List<String> lines) {
this.lines = lines;
documents = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
nextToRead = 0;
state = ParseState.START;
}
public Map<String, Map<String, String>> parse() throws UserException {
while (nextToRead < lines.size()) {
Map.Entry<String, String> keyValue = parseOneItem();
if (keyValue == null) {
// Parse over!
continue;
}
switch (state) {
case START:
if (headLevel == 2) {
head = keyValue.getKey();
keyValues = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
state = ParseState.PARSED_H1;
} else {
// State error
throw new UserException("Head first read is not h1.");
}
break;
case PARSED_H1:
if (headLevel == 2) {
// Empty document, step over, do nothing
documents.put(head, keyValues);
head = keyValue.getKey();
keyValues = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
} else if (headLevel == 3) {
keyValues.put(keyValue.getKey(), keyValue.getValue());
state = ParseState.PARSED_H2;
} else {
throw new UserException("Unknown head level.");
}
break;
case PARSED_H2:
if (headLevel == 2) {
// One document read over.
documents.put(head, keyValues);
head = keyValue.getKey();
keyValues = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
} else if (headLevel == 3) {
keyValues.put(keyValue.getKey(), keyValue.getValue());
} else {
//Ignore headlevel greater than 2 instead of throwing a exception
//State error
//throw new UserException("Unknown head level when parsing head level(2)");
}
break;
default:
// State error
throw new UserException("Unknown parse state.");
}
}
if (head != null) {
documents.put(head, keyValues);
}
checkStructure();
return documents;
}
private void checkStructure() throws DdlException {
for (Map.Entry<String, Map<String, String>> entry : documents.entrySet()) {
Set<String> keys = entry.getValue().keySet();
if (!(keys.contains(HelpTopic.DESCRIPTION)
&& keys.contains(HelpTopic.KEYWORDS))) {
throw new DdlException("Invalid help topic structure. title: " + entry.getKey() + ", keys: " + keys);
}
}
}
private Map.Entry<String, String> parseOneItem() {
// 1. Find the first heading line (start with ##)
while (nextToRead < lines.size() && !lines.get(nextToRead).startsWith("##")) {
nextToRead++;
}
if (nextToRead >= lines.size()) {
return null;
}
// 2. Get the level of this key
String key = lines.get(nextToRead++);
headLevel = 0;
while (headLevel < key.length() && key.charAt(headLevel) == SINGLE_POUND_SIGN) {
headLevel++;
}
// 3. Save all lines within this level until we met next ## or ###
StringBuilder sb = new StringBuilder();
while (nextToRead < lines.size()) {
if (!lines.get(nextToRead).startsWith("##")) {
// content
sb.append(lines.get(nextToRead)).append('\n');
nextToRead++;
} else if (lines.get(nextToRead).startsWith("####")) {
// Ignore head level greater than 3, treat them as normal content
sb.append(lines.get(nextToRead)).append('\n');
nextToRead++;
} else {
// break if we meet next heading
break;
}
}
// Note that multiple line breaks at content's end will be merged to be one,
// and other whitespace characters will be deleted.
// Also, the header in md file is like "## STREAM-LOAD", we need to convert it to "STREAM LOAD",
// so that we can execute "help stream load" to show the help doc.
return Maps.immutableEntry(key.substring(headLevel).trim().replaceAll("-", " "),
processWhitespace(sb));
}
private String processWhitespace(StringBuilder sb) {
int index = sb.length() - 1;
while (index >= 0 && Character.isWhitespace(sb.charAt(index))) {
index--;
}
if (index < sb.length() - 1) {
sb.setLength(index + 1);
sb.append('\n');
}
return sb.toString();
}
}