Coverage Report

Created: 2024-11-21 20:24

/root/doris/be/src/util/mustache/mustache.cc
Line
Count
Source (jump to first uncovered line)
1
// Licensed under the Apache License, Version 2.0 (the "License");
2
// you may not use this file except in compliance with the License.
3
// You may obtain a copy of the License at
4
//
5
// http://www.apache.org/licenses/LICENSE-2.0
6
//
7
// Unless required by applicable law or agreed to in writing, software
8
// distributed under the License is distributed on an "AS IS" BASIS,
9
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
// See the License for the specific language governing permissions and
11
// limitations under the License.
12
13
#include "mustache.h"
14
15
#include <rapidjson/allocators.h>
16
#include <rapidjson/document.h>
17
#include <rapidjson/encodings.h>
18
#include <rapidjson/prettywriter.h>
19
#include <rapidjson/rapidjson.h>
20
#include <strings.h>
21
22
#include <algorithm>
23
#include <boost/algorithm/string/classification.hpp>
24
#include <boost/algorithm/string/detail/classification.hpp>
25
#include <boost/algorithm/string/join.hpp>
26
#include <boost/algorithm/string/predicate_facade.hpp>
27
#include <boost/algorithm/string/split.hpp>
28
#include <boost/algorithm/string/trim.hpp>
29
#include <boost/iterator/iterator_facade.hpp>
30
#include <boost/type_index/type_index_facade.hpp>
31
#include <fstream> // IWYU pragma: keep
32
#include <iostream>
33
#include <stack>
34
#include <vector>
35
36
#include "rapidjson/stringbuffer.h"
37
#include "rapidjson/writer.h"
38
39
using namespace rapidjson;
40
using namespace boost::algorithm;
41
42
namespace mustache {
43
44
// TODO:
45
// # Handle malformed templates better
46
// # Better support for reading templates from files
47
48
enum TagOperator {
49
    SUBSTITUTION,
50
    SECTION_START,
51
    NEGATED_SECTION_START,
52
    PREDICATE_SECTION_START,
53
    SECTION_END,
54
    PARTIAL,
55
    COMMENT,
56
    LENGTH,
57
    EQUALITY,
58
    INEQUALITY,
59
    LITERAL,
60
    NONE
61
};
62
63
struct OpCtx {
64
    TagOperator op;
65
    std::string tag_name;
66
    std::string tag_arg;
67
    bool escaped = false;
68
};
69
70
struct ContextStack {
71
    const Value* value;
72
    const ContextStack* parent;
73
};
74
75
0
TagOperator GetOperator(const std::string& tag) {
76
0
    if (tag.size() == 0) return SUBSTITUTION;
77
0
    switch (tag[0]) {
78
0
    case '#':
79
0
        return SECTION_START;
80
0
    case '^':
81
0
        return NEGATED_SECTION_START;
82
0
    case '?':
83
0
        return PREDICATE_SECTION_START;
84
0
    case '/':
85
0
        return SECTION_END;
86
0
    case '>':
87
0
        return PARTIAL;
88
0
    case '!':
89
0
        if (tag.size() == 1 || tag[1] != '=') return COMMENT;
90
0
        return INEQUALITY;
91
0
    case '%':
92
0
        return LENGTH;
93
0
    case '~':
94
0
        return LITERAL;
95
0
    case '=':
96
0
        return EQUALITY;
97
0
    default:
98
0
        return SUBSTITUTION;
99
0
    }
100
0
}
101
102
int EvaluateTag(const std::string& document, const std::string& document_root, int idx,
103
                const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out);
104
105
static bool RenderTemplate(const std::string& document, const std::string& document_root,
106
                           const ContextStack* stack, std::stringstream* out);
107
108
0
void EscapeHtml(const std::string& in, std::stringstream* out) {
109
0
    for (const char& c : in) {
110
0
        switch (c) {
111
0
        case '&':
112
0
            (*out) << "&amp;";
113
0
            break;
114
0
        case '"':
115
0
            (*out) << "&quot;";
116
0
            break;
117
0
        case '\'':
118
0
            (*out) << "&apos;";
119
0
            break;
120
0
        case '<':
121
0
            (*out) << "&lt;";
122
0
            break;
123
0
        case '>':
124
0
            (*out) << "&gt;";
125
0
            break;
126
0
        default:
127
0
            (*out) << c;
128
0
            break;
129
0
        }
130
0
    }
131
0
}
132
133
0
void Dump(const rapidjson::Value& v) {
134
0
    StringBuffer buffer;
135
0
    Writer<StringBuffer> writer(buffer);
136
0
    v.Accept(writer);
137
0
    std::cout << buffer.GetString() << std::endl;
138
0
}
139
140
// Breaks a dotted path into individual components. One wrinkle, which stops this from
141
// being a simple split() is that we allow path components to be quoted, e.g.: "foo".bar,
142
// and any '.' characters inside those quoted sections aren't considered to be
143
// delimiters. This is to allow Json keys that contain periods.
144
0
void FindJsonPathComponents(const std::string& path, std::vector<std::string>* components) {
145
0
    bool in_quote = false;
146
0
    bool escape_this_char = false;
147
0
    int start = 0;
148
0
    for (int i = start; i < path.size(); ++i) {
149
0
        if (path[i] == '"' && !escape_this_char) in_quote = !in_quote;
150
0
        if (path[i] == '.' && !escape_this_char && !in_quote) {
151
            // Current char == delimiter and not escaped and not in a quote pair => found a
152
            // component
153
0
            if (i - start > 0) {
154
0
                if (path[start] == '"' && path[(i - 1) - start] == '"') {
155
0
                    if (i - start > 3) {
156
0
                        components->push_back(path.substr(start + 1, i - (start + 2)));
157
0
                    }
158
0
                } else {
159
0
                    components->push_back(path.substr(start, i - start));
160
0
                }
161
0
                start = i + 1;
162
0
            }
163
0
        }
164
165
0
        escape_this_char = (path[i] == '\\' && !escape_this_char);
166
0
    }
167
168
0
    if (path.size() - start > 0) {
169
0
        if (path[start] == '"' && path[(path.size() - 1) - start] == '"') {
170
0
            if (path.size() - start > 3) {
171
0
                components->push_back(path.substr(start + 1, path.size() - (start + 2)));
172
0
            }
173
0
        } else {
174
0
            components->push_back(path.substr(start, path.size() - start));
175
0
        }
176
0
    }
177
0
}
178
179
// Looks up the json entity at 'path' in 'parent_context', and places it in 'resolved'. If
180
// the entity does not exist (i.e. the path is invalid), 'resolved' will be set to nullptr.
181
void ResolveJsonContext(const std::string& path, const ContextStack* stack,
182
0
                        const Value** resolved) {
183
0
    if (path == ".") {
184
0
        *resolved = stack->value;
185
0
        return;
186
0
    }
187
0
    std::vector<std::string> components;
188
0
    FindJsonPathComponents(path, &components);
189
190
    // At each enclosing level of context, try to resolve the path.
191
0
    for (; stack != nullptr; stack = stack->parent) {
192
0
        const Value* cur = stack->value;
193
0
        bool match = true;
194
0
        for (const std::string& c : components) {
195
0
            if (cur->IsObject() && cur->HasMember(c.c_str())) {
196
0
                cur = &(*cur)[c.c_str()];
197
0
            } else {
198
0
                match = false;
199
0
                break;
200
0
            }
201
0
        }
202
0
        if (match) {
203
0
            *resolved = cur;
204
0
            return;
205
0
        }
206
0
    }
207
0
    *resolved = nullptr;
208
0
}
209
210
0
int FindNextTag(const std::string& document, int idx, OpCtx* op, std::stringstream* out) {
211
0
    op->op = NONE;
212
0
    while (idx < document.size()) {
213
0
        if (document[idx] == '{' && idx < (document.size() - 3) && document[idx + 1] == '{') {
214
0
            if (document[idx + 2] == '{') {
215
0
                idx += 3;
216
0
                op->escaped = true;
217
0
            } else {
218
0
                op->escaped = false;
219
0
                idx += 2; // Now at start of template expression
220
0
            }
221
0
            std::stringstream expr;
222
0
            while (idx < document.size()) {
223
0
                if (document[idx] != '}') {
224
0
                    expr << document[idx];
225
0
                    ++idx;
226
0
                } else {
227
0
                    if (!op->escaped && idx < document.size() - 1 && document[idx + 1] == '}') {
228
0
                        ++idx;
229
0
                        break;
230
0
                    } else if (op->escaped && idx < document.size() - 2 &&
231
0
                               document[idx + 1] == '}' && document[idx + 2] == '}') {
232
0
                        idx += 2;
233
0
                        break;
234
0
                    } else {
235
0
                        expr << '}';
236
0
                    }
237
0
                }
238
0
            }
239
240
0
            std::string key = expr.str();
241
0
            trim(key);
242
0
            if (key != ".") trim_if(key, is_any_of("."));
243
0
            if (key.size() == 0) continue;
244
0
            op->op = GetOperator(key);
245
0
            if (op->op != SUBSTITUTION) {
246
0
                int len = op->op == INEQUALITY ? 2 : 1;
247
0
                key = key.substr(len);
248
0
                trim(key);
249
0
            }
250
0
            if (key.size() == 0) continue;
251
252
0
            if (op->op == EQUALITY || op->op == INEQUALITY) {
253
                // Find an argument
254
0
                std::vector<std::string> components;
255
0
                split(components, key, is_any_of(" "));
256
0
                key = components[0];
257
0
                components.erase(components.begin());
258
0
                op->tag_arg = join(components, " ");
259
0
            }
260
261
0
            op->tag_name = key;
262
0
            return ++idx;
263
0
        } else {
264
0
            if (out != nullptr) (*out) << document[idx];
265
0
        }
266
0
        ++idx;
267
0
    }
268
0
    return idx;
269
0
}
270
271
// Evaluates a [PREDICATE_|NEGATED_]SECTION_START / SECTION_END pair by evaluating the tag
272
// in 'parent_context'. False or non-existant values cause the entire section to be
273
// skipped. True values cause the section to be evaluated as though it were a normal
274
// section, but with the parent context being the root context for that section.
275
//
276
// If 'is_negation' is true, the behaviour is the opposite of the above: false values
277
// cause the section to be normally evaluated etc.
278
int EvaluateSection(const std::string& document, const std::string& document_root, int idx,
279
                    const ContextStack* context_stack, const OpCtx& op_ctx,
280
0
                    std::stringstream* out) {
281
    // Precondition: idx is the immediate next character after an opening {{ #tag_name }}
282
0
    const Value* context;
283
0
    ResolveJsonContext(op_ctx.tag_name, context_stack, &context);
284
285
    // If we a) cannot resolve the context from the tag name or b) the context evaluates to
286
    // false, we should skip the contents of the template until a closing {{/tag_name}}.
287
0
    bool skip_contents = false;
288
289
0
    if (op_ctx.op == NEGATED_SECTION_START || op_ctx.op == PREDICATE_SECTION_START ||
290
0
        op_ctx.op == SECTION_START) {
291
0
        skip_contents = (context == nullptr || context->IsFalse());
292
293
        // If the tag is a negative block (i.e. {{^tag_name}}), do the opposite: if the
294
        // context exists and is true, skip the contents, else echo them.
295
0
        if (op_ctx.op == NEGATED_SECTION_START) {
296
0
            context = context_stack->value;
297
0
            skip_contents = !skip_contents;
298
0
        } else if (op_ctx.op == PREDICATE_SECTION_START) {
299
0
            context = context_stack->value;
300
0
        }
301
0
    } else if (op_ctx.op == INEQUALITY || op_ctx.op == EQUALITY) {
302
0
        skip_contents = (context == nullptr || !context->IsString() ||
303
0
                         strcasecmp(context->GetString(), op_ctx.tag_arg.c_str()) != 0);
304
0
        if (op_ctx.op == INEQUALITY) skip_contents = !skip_contents;
305
0
        context = context_stack->value;
306
0
    }
307
308
0
    std::vector<const Value*> values;
309
0
    if (!skip_contents && context != nullptr && context->IsArray()) {
310
0
        for (int i = 0; i < context->Size(); ++i) {
311
0
            values.push_back(&(*context)[i]);
312
0
        }
313
0
    } else {
314
0
        values.push_back(skip_contents ? nullptr : context);
315
0
    }
316
0
    if (values.size() == 0) {
317
0
        skip_contents = true;
318
0
        values.push_back(nullptr);
319
0
    }
320
321
0
    int start_idx = idx;
322
0
    for (const Value* v : values) {
323
0
        idx = start_idx;
324
0
        std::stack<OpCtx> section_starts;
325
0
        section_starts.push(op_ctx);
326
0
        while (idx < document.size()) {
327
0
            OpCtx next_ctx;
328
0
            idx = FindNextTag(document, idx, &next_ctx, skip_contents ? nullptr : out);
329
0
            if (skip_contents &&
330
0
                (next_ctx.op == SECTION_START || next_ctx.op == PREDICATE_SECTION_START ||
331
0
                 next_ctx.op == NEGATED_SECTION_START)) {
332
0
                section_starts.push(next_ctx);
333
0
            } else if (next_ctx.op == SECTION_END) {
334
0
                if (next_ctx.tag_name != section_starts.top().tag_name) return -1;
335
0
                section_starts.pop();
336
0
            }
337
0
            if (section_starts.empty()) break;
338
339
            // Don't need to evaluate any templates if we're skipping the contents
340
0
            if (!skip_contents) {
341
0
                ContextStack new_context = {v, context_stack};
342
0
                idx = EvaluateTag(document, document_root, idx, &new_context, next_ctx, out);
343
0
            }
344
0
        }
345
0
    }
346
0
    return idx;
347
0
}
348
349
// Evaluates a SUBSTITUTION tag, by replacing its contents with the value of the tag's
350
// name in 'parent_context'.
351
int EvaluateSubstitution(const std::string& document, const int idx,
352
                         const ContextStack* context_stack, const OpCtx& op_ctx,
353
0
                         std::stringstream* out) {
354
0
    const Value* val;
355
0
    ResolveJsonContext(op_ctx.tag_name, context_stack, &val);
356
0
    if (val == nullptr) return idx;
357
0
    if (val->IsString()) {
358
0
        if (!op_ctx.escaped) {
359
0
            EscapeHtml(val->GetString(), out);
360
0
        } else {
361
            // TODO: Triple {{{ means don't escape
362
0
            (*out) << val->GetString();
363
0
        }
364
0
    } else if (val->IsInt64()) {
365
0
        (*out) << val->GetInt64();
366
0
    } else if (val->IsInt()) {
367
0
        (*out) << val->GetInt();
368
0
    } else if (val->IsDouble()) {
369
0
        (*out) << val->GetDouble();
370
0
    } else if (val->IsBool()) {
371
0
        (*out) << std::boolalpha << val->GetBool();
372
0
    }
373
0
    return idx;
374
0
}
375
376
// Evaluates a LENGTH tag by replacing its contents with the type-dependent 'size' of the
377
// value.
378
int EvaluateLength(const std::string& document, const int idx, const ContextStack* context_stack,
379
0
                   const std::string& tag_name, std::stringstream* out) {
380
0
    const Value* val;
381
0
    ResolveJsonContext(tag_name, context_stack, &val);
382
0
    if (val == nullptr) return idx;
383
0
    if (val->IsArray()) {
384
0
        (*out) << val->Size();
385
0
    } else if (val->IsString()) {
386
0
        (*out) << val->GetStringLength();
387
0
    };
388
389
0
    return idx;
390
0
}
391
392
int EvaluateLiteral(const std::string& document, const int idx, const ContextStack* context_stack,
393
0
                    const std::string& tag_name, std::stringstream* out) {
394
0
    const Value* val;
395
0
    ResolveJsonContext(tag_name, context_stack, &val);
396
0
    if (val == nullptr) return idx;
397
0
    if (!val->IsArray() && !val->IsObject()) return idx;
398
0
    StringBuffer strbuf;
399
0
    PrettyWriter<StringBuffer> writer(strbuf);
400
0
    val->Accept(writer);
401
0
    (*out) << strbuf.GetString();
402
0
    return idx;
403
0
}
404
405
// Evaluates a 'partial' template by reading it fully from disk, then rendering it
406
// directly into the current output with the current context.
407
//
408
// TODO: This could obviously be more efficient (and there are lots of file accesses in a
409
// long list context).
410
void EvaluatePartial(const std::string& tag_name, const std::string& document_root,
411
0
                     const ContextStack* stack, std::stringstream* out) {
412
0
    std::stringstream ss;
413
0
    ss << document_root << tag_name;
414
0
    std::ifstream tmpl(ss.str().c_str());
415
0
    if (!tmpl.is_open()) {
416
0
        ss << ".mustache";
417
0
        tmpl.open(ss.str().c_str());
418
0
        if (!tmpl.is_open()) return;
419
0
    }
420
0
    std::stringstream file_ss;
421
0
    file_ss << tmpl.rdbuf();
422
0
    RenderTemplate(file_ss.str(), document_root, stack, out);
423
0
}
424
425
// Given a tag name, and its operator, evaluate the tag in the given context and write the
426
// output to 'out'. The heavy-lifting is delegated to specific Evaluate*()
427
// methods. Returns the new cursor position within 'document', or -1 on error.
428
int EvaluateTag(const std::string& document, const std::string& document_root, int idx,
429
0
                const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out) {
430
0
    if (idx == -1) return idx;
431
0
    switch (op_ctx.op) {
432
0
    case SECTION_START:
433
0
    case PREDICATE_SECTION_START:
434
0
    case NEGATED_SECTION_START:
435
0
    case EQUALITY:
436
0
    case INEQUALITY:
437
0
        return EvaluateSection(document, document_root, idx, context, op_ctx, out);
438
0
    case SUBSTITUTION:
439
0
        return EvaluateSubstitution(document, idx, context, op_ctx, out);
440
0
    case COMMENT:
441
0
        return idx; // Ignored
442
0
    case PARTIAL:
443
0
        EvaluatePartial(op_ctx.tag_name, document_root, context, out);
444
0
        return idx;
445
0
    case LENGTH:
446
0
        return EvaluateLength(document, idx, context, op_ctx.tag_name, out);
447
0
    case LITERAL:
448
0
        return EvaluateLiteral(document, idx, context, op_ctx.tag_name, out);
449
0
    case NONE:
450
0
        return idx; // No tag was found
451
0
    case SECTION_END:
452
0
        return idx;
453
0
    default:
454
0
        std::cout << "Unknown tag: " << op_ctx.op << std::endl;
455
0
        return -1;
456
0
    }
457
0
}
458
459
static bool RenderTemplate(const std::string& document, const std::string& document_root,
460
0
                           const ContextStack* stack, std::stringstream* out) {
461
0
    int idx = 0;
462
0
    while (idx < document.size() && idx != -1) {
463
0
        OpCtx op;
464
0
        idx = FindNextTag(document, idx, &op, out);
465
0
        idx = EvaluateTag(document, document_root, idx, stack, op, out);
466
0
    }
467
468
0
    return idx != -1;
469
0
}
470
471
bool RenderTemplate(const std::string& document, const std::string& document_root,
472
0
                    const Value& context, std::stringstream* out) {
473
0
    ContextStack stack = {&context, nullptr};
474
0
    return RenderTemplate(document, document_root, &stack, out);
475
0
}
476
477
} // namespace mustache