/root/doris/be/src/util/mustache/mustache.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
2 | | // you may not use this file except in compliance with the License. |
3 | | // You may obtain a copy of the License at |
4 | | // |
5 | | // http://www.apache.org/licenses/LICENSE-2.0 |
6 | | // |
7 | | // Unless required by applicable law or agreed to in writing, software |
8 | | // distributed under the License is distributed on an "AS IS" BASIS, |
9 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
10 | | // See the License for the specific language governing permissions and |
11 | | // limitations under the License. |
12 | | |
13 | | #include "mustache.h" |
14 | | |
15 | | #include <rapidjson/allocators.h> |
16 | | #include <rapidjson/document.h> |
17 | | #include <rapidjson/encodings.h> |
18 | | #include <rapidjson/prettywriter.h> |
19 | | #include <rapidjson/rapidjson.h> |
20 | | #include <strings.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <boost/algorithm/string/classification.hpp> |
24 | | #include <boost/algorithm/string/detail/classification.hpp> |
25 | | #include <boost/algorithm/string/join.hpp> |
26 | | #include <boost/algorithm/string/predicate_facade.hpp> |
27 | | #include <boost/algorithm/string/split.hpp> |
28 | | #include <boost/algorithm/string/trim.hpp> |
29 | | #include <boost/iterator/iterator_facade.hpp> |
30 | | #include <boost/type_index/type_index_facade.hpp> |
31 | | #include <fstream> // IWYU pragma: keep |
32 | | #include <iostream> |
33 | | #include <stack> |
34 | | #include <vector> |
35 | | |
36 | | #include "rapidjson/stringbuffer.h" |
37 | | #include "rapidjson/writer.h" |
38 | | |
39 | | using namespace rapidjson; |
40 | | using namespace boost::algorithm; |
41 | | |
42 | | namespace mustache { |
43 | | |
44 | | // TODO: |
45 | | // # Handle malformed templates better |
46 | | // # Better support for reading templates from files |
47 | | |
48 | | enum TagOperator { |
49 | | SUBSTITUTION, |
50 | | SECTION_START, |
51 | | NEGATED_SECTION_START, |
52 | | PREDICATE_SECTION_START, |
53 | | SECTION_END, |
54 | | PARTIAL, |
55 | | COMMENT, |
56 | | LENGTH, |
57 | | EQUALITY, |
58 | | INEQUALITY, |
59 | | LITERAL, |
60 | | NONE |
61 | | }; |
62 | | |
63 | | struct OpCtx { |
64 | | TagOperator op; |
65 | | std::string tag_name; |
66 | | std::string tag_arg; |
67 | | bool escaped = false; |
68 | | }; |
69 | | |
70 | | struct ContextStack { |
71 | | const Value* value; |
72 | | const ContextStack* parent; |
73 | | }; |
74 | | |
75 | 0 | TagOperator GetOperator(const std::string& tag) { |
76 | 0 | if (tag.size() == 0) return SUBSTITUTION; |
77 | 0 | switch (tag[0]) { |
78 | 0 | case '#': |
79 | 0 | return SECTION_START; |
80 | 0 | case '^': |
81 | 0 | return NEGATED_SECTION_START; |
82 | 0 | case '?': |
83 | 0 | return PREDICATE_SECTION_START; |
84 | 0 | case '/': |
85 | 0 | return SECTION_END; |
86 | 0 | case '>': |
87 | 0 | return PARTIAL; |
88 | 0 | case '!': |
89 | 0 | if (tag.size() == 1 || tag[1] != '=') return COMMENT; |
90 | 0 | return INEQUALITY; |
91 | 0 | case '%': |
92 | 0 | return LENGTH; |
93 | 0 | case '~': |
94 | 0 | return LITERAL; |
95 | 0 | case '=': |
96 | 0 | return EQUALITY; |
97 | 0 | default: |
98 | 0 | return SUBSTITUTION; |
99 | 0 | } |
100 | 0 | } |
101 | | |
102 | | int EvaluateTag(const std::string& document, const std::string& document_root, int idx, |
103 | | const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out); |
104 | | |
105 | | static bool RenderTemplate(const std::string& document, const std::string& document_root, |
106 | | const ContextStack* stack, std::stringstream* out); |
107 | | |
108 | 0 | void EscapeHtml(const std::string& in, std::stringstream* out) { |
109 | 0 | for (const char& c : in) { |
110 | 0 | switch (c) { |
111 | 0 | case '&': |
112 | 0 | (*out) << "&"; |
113 | 0 | break; |
114 | 0 | case '"': |
115 | 0 | (*out) << """; |
116 | 0 | break; |
117 | 0 | case '\'': |
118 | 0 | (*out) << "'"; |
119 | 0 | break; |
120 | 0 | case '<': |
121 | 0 | (*out) << "<"; |
122 | 0 | break; |
123 | 0 | case '>': |
124 | 0 | (*out) << ">"; |
125 | 0 | break; |
126 | 0 | default: |
127 | 0 | (*out) << c; |
128 | 0 | break; |
129 | 0 | } |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | 0 | void Dump(const rapidjson::Value& v) { |
134 | 0 | StringBuffer buffer; |
135 | 0 | Writer<StringBuffer> writer(buffer); |
136 | 0 | v.Accept(writer); |
137 | 0 | std::cout << buffer.GetString() << std::endl; |
138 | 0 | } |
139 | | |
140 | | // Breaks a dotted path into individual components. One wrinkle, which stops this from |
141 | | // being a simple split() is that we allow path components to be quoted, e.g.: "foo".bar, |
142 | | // and any '.' characters inside those quoted sections aren't considered to be |
143 | | // delimiters. This is to allow Json keys that contain periods. |
144 | 0 | void FindJsonPathComponents(const std::string& path, std::vector<std::string>* components) { |
145 | 0 | bool in_quote = false; |
146 | 0 | bool escape_this_char = false; |
147 | 0 | int start = 0; |
148 | 0 | for (int i = start; i < path.size(); ++i) { |
149 | 0 | if (path[i] == '"' && !escape_this_char) in_quote = !in_quote; |
150 | 0 | if (path[i] == '.' && !escape_this_char && !in_quote) { |
151 | | // Current char == delimiter and not escaped and not in a quote pair => found a |
152 | | // component |
153 | 0 | if (i - start > 0) { |
154 | 0 | if (path[start] == '"' && path[(i - 1) - start] == '"') { |
155 | 0 | if (i - start > 3) { |
156 | 0 | components->push_back(path.substr(start + 1, i - (start + 2))); |
157 | 0 | } |
158 | 0 | } else { |
159 | 0 | components->push_back(path.substr(start, i - start)); |
160 | 0 | } |
161 | 0 | start = i + 1; |
162 | 0 | } |
163 | 0 | } |
164 | |
|
165 | 0 | escape_this_char = (path[i] == '\\' && !escape_this_char); |
166 | 0 | } |
167 | |
|
168 | 0 | if (path.size() - start > 0) { |
169 | 0 | if (path[start] == '"' && path[(path.size() - 1) - start] == '"') { |
170 | 0 | if (path.size() - start > 3) { |
171 | 0 | components->push_back(path.substr(start + 1, path.size() - (start + 2))); |
172 | 0 | } |
173 | 0 | } else { |
174 | 0 | components->push_back(path.substr(start, path.size() - start)); |
175 | 0 | } |
176 | 0 | } |
177 | 0 | } |
178 | | |
179 | | // Looks up the json entity at 'path' in 'parent_context', and places it in 'resolved'. If |
180 | | // the entity does not exist (i.e. the path is invalid), 'resolved' will be set to nullptr. |
181 | | void ResolveJsonContext(const std::string& path, const ContextStack* stack, |
182 | 0 | const Value** resolved) { |
183 | 0 | if (path == ".") { |
184 | 0 | *resolved = stack->value; |
185 | 0 | return; |
186 | 0 | } |
187 | 0 | std::vector<std::string> components; |
188 | 0 | FindJsonPathComponents(path, &components); |
189 | | |
190 | | // At each enclosing level of context, try to resolve the path. |
191 | 0 | for (; stack != nullptr; stack = stack->parent) { |
192 | 0 | const Value* cur = stack->value; |
193 | 0 | bool match = true; |
194 | 0 | for (const std::string& c : components) { |
195 | 0 | if (cur->IsObject() && cur->HasMember(c.c_str())) { |
196 | 0 | cur = &(*cur)[c.c_str()]; |
197 | 0 | } else { |
198 | 0 | match = false; |
199 | 0 | break; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | if (match) { |
203 | 0 | *resolved = cur; |
204 | 0 | return; |
205 | 0 | } |
206 | 0 | } |
207 | 0 | *resolved = nullptr; |
208 | 0 | } |
209 | | |
210 | 0 | int FindNextTag(const std::string& document, int idx, OpCtx* op, std::stringstream* out) { |
211 | 0 | op->op = NONE; |
212 | 0 | while (idx < document.size()) { |
213 | 0 | if (document[idx] == '{' && idx < (document.size() - 3) && document[idx + 1] == '{') { |
214 | 0 | if (document[idx + 2] == '{') { |
215 | 0 | idx += 3; |
216 | 0 | op->escaped = true; |
217 | 0 | } else { |
218 | 0 | op->escaped = false; |
219 | 0 | idx += 2; // Now at start of template expression |
220 | 0 | } |
221 | 0 | std::stringstream expr; |
222 | 0 | while (idx < document.size()) { |
223 | 0 | if (document[idx] != '}') { |
224 | 0 | expr << document[idx]; |
225 | 0 | ++idx; |
226 | 0 | } else { |
227 | 0 | if (!op->escaped && idx < document.size() - 1 && document[idx + 1] == '}') { |
228 | 0 | ++idx; |
229 | 0 | break; |
230 | 0 | } else if (op->escaped && idx < document.size() - 2 && |
231 | 0 | document[idx + 1] == '}' && document[idx + 2] == '}') { |
232 | 0 | idx += 2; |
233 | 0 | break; |
234 | 0 | } else { |
235 | 0 | expr << '}'; |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | |
|
240 | 0 | std::string key = expr.str(); |
241 | 0 | trim(key); |
242 | 0 | if (key != ".") trim_if(key, is_any_of(".")); |
243 | 0 | if (key.size() == 0) continue; |
244 | 0 | op->op = GetOperator(key); |
245 | 0 | if (op->op != SUBSTITUTION) { |
246 | 0 | int len = op->op == INEQUALITY ? 2 : 1; |
247 | 0 | key = key.substr(len); |
248 | 0 | trim(key); |
249 | 0 | } |
250 | 0 | if (key.size() == 0) continue; |
251 | | |
252 | 0 | if (op->op == EQUALITY || op->op == INEQUALITY) { |
253 | | // Find an argument |
254 | 0 | std::vector<std::string> components; |
255 | 0 | split(components, key, is_any_of(" ")); |
256 | 0 | key = components[0]; |
257 | 0 | components.erase(components.begin()); |
258 | 0 | op->tag_arg = join(components, " "); |
259 | 0 | } |
260 | |
|
261 | 0 | op->tag_name = key; |
262 | 0 | return ++idx; |
263 | 0 | } else { |
264 | 0 | if (out != nullptr) (*out) << document[idx]; |
265 | 0 | } |
266 | 0 | ++idx; |
267 | 0 | } |
268 | 0 | return idx; |
269 | 0 | } |
270 | | |
271 | | // Evaluates a [PREDICATE_|NEGATED_]SECTION_START / SECTION_END pair by evaluating the tag |
272 | | // in 'parent_context'. False or non-existant values cause the entire section to be |
273 | | // skipped. True values cause the section to be evaluated as though it were a normal |
274 | | // section, but with the parent context being the root context for that section. |
275 | | // |
276 | | // If 'is_negation' is true, the behaviour is the opposite of the above: false values |
277 | | // cause the section to be normally evaluated etc. |
278 | | int EvaluateSection(const std::string& document, const std::string& document_root, int idx, |
279 | | const ContextStack* context_stack, const OpCtx& op_ctx, |
280 | 0 | std::stringstream* out) { |
281 | | // Precondition: idx is the immediate next character after an opening {{ #tag_name }} |
282 | 0 | const Value* context; |
283 | 0 | ResolveJsonContext(op_ctx.tag_name, context_stack, &context); |
284 | | |
285 | | // If we a) cannot resolve the context from the tag name or b) the context evaluates to |
286 | | // false, we should skip the contents of the template until a closing {{/tag_name}}. |
287 | 0 | bool skip_contents = false; |
288 | |
|
289 | 0 | if (op_ctx.op == NEGATED_SECTION_START || op_ctx.op == PREDICATE_SECTION_START || |
290 | 0 | op_ctx.op == SECTION_START) { |
291 | 0 | skip_contents = (context == nullptr || context->IsFalse()); |
292 | | |
293 | | // If the tag is a negative block (i.e. {{^tag_name}}), do the opposite: if the |
294 | | // context exists and is true, skip the contents, else echo them. |
295 | 0 | if (op_ctx.op == NEGATED_SECTION_START) { |
296 | 0 | context = context_stack->value; |
297 | 0 | skip_contents = !skip_contents; |
298 | 0 | } else if (op_ctx.op == PREDICATE_SECTION_START) { |
299 | 0 | context = context_stack->value; |
300 | 0 | } |
301 | 0 | } else if (op_ctx.op == INEQUALITY || op_ctx.op == EQUALITY) { |
302 | 0 | skip_contents = (context == nullptr || !context->IsString() || |
303 | 0 | strcasecmp(context->GetString(), op_ctx.tag_arg.c_str()) != 0); |
304 | 0 | if (op_ctx.op == INEQUALITY) skip_contents = !skip_contents; |
305 | 0 | context = context_stack->value; |
306 | 0 | } |
307 | |
|
308 | 0 | std::vector<const Value*> values; |
309 | 0 | if (!skip_contents && context != nullptr && context->IsArray()) { |
310 | 0 | for (int i = 0; i < context->Size(); ++i) { |
311 | 0 | values.push_back(&(*context)[i]); |
312 | 0 | } |
313 | 0 | } else { |
314 | 0 | values.push_back(skip_contents ? nullptr : context); |
315 | 0 | } |
316 | 0 | if (values.size() == 0) { |
317 | 0 | skip_contents = true; |
318 | 0 | values.push_back(nullptr); |
319 | 0 | } |
320 | |
|
321 | 0 | int start_idx = idx; |
322 | 0 | for (const Value* v : values) { |
323 | 0 | idx = start_idx; |
324 | 0 | std::stack<OpCtx> section_starts; |
325 | 0 | section_starts.push(op_ctx); |
326 | 0 | while (idx < document.size()) { |
327 | 0 | OpCtx next_ctx; |
328 | 0 | idx = FindNextTag(document, idx, &next_ctx, skip_contents ? nullptr : out); |
329 | 0 | if (skip_contents && |
330 | 0 | (next_ctx.op == SECTION_START || next_ctx.op == PREDICATE_SECTION_START || |
331 | 0 | next_ctx.op == NEGATED_SECTION_START)) { |
332 | 0 | section_starts.push(next_ctx); |
333 | 0 | } else if (next_ctx.op == SECTION_END) { |
334 | 0 | if (next_ctx.tag_name != section_starts.top().tag_name) return -1; |
335 | 0 | section_starts.pop(); |
336 | 0 | } |
337 | 0 | if (section_starts.empty()) break; |
338 | | |
339 | | // Don't need to evaluate any templates if we're skipping the contents |
340 | 0 | if (!skip_contents) { |
341 | 0 | ContextStack new_context = {v, context_stack}; |
342 | 0 | idx = EvaluateTag(document, document_root, idx, &new_context, next_ctx, out); |
343 | 0 | } |
344 | 0 | } |
345 | 0 | } |
346 | 0 | return idx; |
347 | 0 | } |
348 | | |
349 | | // Evaluates a SUBSTITUTION tag, by replacing its contents with the value of the tag's |
350 | | // name in 'parent_context'. |
351 | | int EvaluateSubstitution(const std::string& document, const int idx, |
352 | | const ContextStack* context_stack, const OpCtx& op_ctx, |
353 | 0 | std::stringstream* out) { |
354 | 0 | const Value* val; |
355 | 0 | ResolveJsonContext(op_ctx.tag_name, context_stack, &val); |
356 | 0 | if (val == nullptr) return idx; |
357 | 0 | if (val->IsString()) { |
358 | 0 | if (!op_ctx.escaped) { |
359 | 0 | EscapeHtml(val->GetString(), out); |
360 | 0 | } else { |
361 | | // TODO: Triple {{{ means don't escape |
362 | 0 | (*out) << val->GetString(); |
363 | 0 | } |
364 | 0 | } else if (val->IsInt64()) { |
365 | 0 | (*out) << val->GetInt64(); |
366 | 0 | } else if (val->IsInt()) { |
367 | 0 | (*out) << val->GetInt(); |
368 | 0 | } else if (val->IsDouble()) { |
369 | 0 | (*out) << val->GetDouble(); |
370 | 0 | } else if (val->IsBool()) { |
371 | 0 | (*out) << std::boolalpha << val->GetBool(); |
372 | 0 | } |
373 | 0 | return idx; |
374 | 0 | } |
375 | | |
376 | | // Evaluates a LENGTH tag by replacing its contents with the type-dependent 'size' of the |
377 | | // value. |
378 | | int EvaluateLength(const std::string& document, const int idx, const ContextStack* context_stack, |
379 | 0 | const std::string& tag_name, std::stringstream* out) { |
380 | 0 | const Value* val; |
381 | 0 | ResolveJsonContext(tag_name, context_stack, &val); |
382 | 0 | if (val == nullptr) return idx; |
383 | 0 | if (val->IsArray()) { |
384 | 0 | (*out) << val->Size(); |
385 | 0 | } else if (val->IsString()) { |
386 | 0 | (*out) << val->GetStringLength(); |
387 | 0 | }; |
388 | |
|
389 | 0 | return idx; |
390 | 0 | } |
391 | | |
392 | | int EvaluateLiteral(const std::string& document, const int idx, const ContextStack* context_stack, |
393 | 0 | const std::string& tag_name, std::stringstream* out) { |
394 | 0 | const Value* val; |
395 | 0 | ResolveJsonContext(tag_name, context_stack, &val); |
396 | 0 | if (val == nullptr) return idx; |
397 | 0 | if (!val->IsArray() && !val->IsObject()) return idx; |
398 | 0 | StringBuffer strbuf; |
399 | 0 | PrettyWriter<StringBuffer> writer(strbuf); |
400 | 0 | val->Accept(writer); |
401 | 0 | (*out) << strbuf.GetString(); |
402 | 0 | return idx; |
403 | 0 | } |
404 | | |
405 | | // Evaluates a 'partial' template by reading it fully from disk, then rendering it |
406 | | // directly into the current output with the current context. |
407 | | // |
408 | | // TODO: This could obviously be more efficient (and there are lots of file accesses in a |
409 | | // long list context). |
410 | | void EvaluatePartial(const std::string& tag_name, const std::string& document_root, |
411 | 0 | const ContextStack* stack, std::stringstream* out) { |
412 | 0 | std::stringstream ss; |
413 | 0 | ss << document_root << tag_name; |
414 | 0 | std::ifstream tmpl(ss.str().c_str()); |
415 | 0 | if (!tmpl.is_open()) { |
416 | 0 | ss << ".mustache"; |
417 | 0 | tmpl.open(ss.str().c_str()); |
418 | 0 | if (!tmpl.is_open()) return; |
419 | 0 | } |
420 | 0 | std::stringstream file_ss; |
421 | 0 | file_ss << tmpl.rdbuf(); |
422 | 0 | RenderTemplate(file_ss.str(), document_root, stack, out); |
423 | 0 | } |
424 | | |
425 | | // Given a tag name, and its operator, evaluate the tag in the given context and write the |
426 | | // output to 'out'. The heavy-lifting is delegated to specific Evaluate*() |
427 | | // methods. Returns the new cursor position within 'document', or -1 on error. |
428 | | int EvaluateTag(const std::string& document, const std::string& document_root, int idx, |
429 | 0 | const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out) { |
430 | 0 | if (idx == -1) return idx; |
431 | 0 | switch (op_ctx.op) { |
432 | 0 | case SECTION_START: |
433 | 0 | case PREDICATE_SECTION_START: |
434 | 0 | case NEGATED_SECTION_START: |
435 | 0 | case EQUALITY: |
436 | 0 | case INEQUALITY: |
437 | 0 | return EvaluateSection(document, document_root, idx, context, op_ctx, out); |
438 | 0 | case SUBSTITUTION: |
439 | 0 | return EvaluateSubstitution(document, idx, context, op_ctx, out); |
440 | 0 | case COMMENT: |
441 | 0 | return idx; // Ignored |
442 | 0 | case PARTIAL: |
443 | 0 | EvaluatePartial(op_ctx.tag_name, document_root, context, out); |
444 | 0 | return idx; |
445 | 0 | case LENGTH: |
446 | 0 | return EvaluateLength(document, idx, context, op_ctx.tag_name, out); |
447 | 0 | case LITERAL: |
448 | 0 | return EvaluateLiteral(document, idx, context, op_ctx.tag_name, out); |
449 | 0 | case NONE: |
450 | 0 | return idx; // No tag was found |
451 | 0 | case SECTION_END: |
452 | 0 | return idx; |
453 | 0 | default: |
454 | 0 | std::cout << "Unknown tag: " << op_ctx.op << std::endl; |
455 | 0 | return -1; |
456 | 0 | } |
457 | 0 | } |
458 | | |
459 | | static bool RenderTemplate(const std::string& document, const std::string& document_root, |
460 | 0 | const ContextStack* stack, std::stringstream* out) { |
461 | 0 | int idx = 0; |
462 | 0 | while (idx < document.size() && idx != -1) { |
463 | 0 | OpCtx op; |
464 | 0 | idx = FindNextTag(document, idx, &op, out); |
465 | 0 | idx = EvaluateTag(document, document_root, idx, stack, op, out); |
466 | 0 | } |
467 | |
|
468 | 0 | return idx != -1; |
469 | 0 | } |
470 | | |
471 | | bool RenderTemplate(const std::string& document, const std::string& document_root, |
472 | 0 | const Value& context, std::stringstream* out) { |
473 | 0 | ContextStack stack = {&context, nullptr}; |
474 | 0 | return RenderTemplate(document, document_root, &stack, out); |
475 | 0 | } |
476 | | |
477 | | } // namespace mustache |