Coverage Report

Created: 2026-03-14 18:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/mustache/mustache.cc
Line
Count
Source
1
// Licensed under the Apache License, Version 2.0 (the "License");
2
// you may not use this file except in compliance with the License.
3
// You may obtain a copy of the License at
4
//
5
// http://www.apache.org/licenses/LICENSE-2.0
6
//
7
// Unless required by applicable law or agreed to in writing, software
8
// distributed under the License is distributed on an "AS IS" BASIS,
9
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
// See the License for the specific language governing permissions and
11
// limitations under the License.
12
13
#include "util/mustache/mustache.h"
14
15
#include <rapidjson/allocators.h>
16
#include <rapidjson/document.h>
17
#include <rapidjson/encodings.h>
18
#include <rapidjson/prettywriter.h>
19
#include <rapidjson/rapidjson.h>
20
#include <strings.h>
21
22
#include <algorithm>
23
#include <boost/algorithm/string/classification.hpp>
24
#include <boost/algorithm/string/detail/classification.hpp>
25
#include <boost/algorithm/string/join.hpp>
26
#include <boost/algorithm/string/predicate_facade.hpp>
27
#include <boost/algorithm/string/split.hpp>
28
#include <boost/algorithm/string/trim.hpp>
29
#include <boost/iterator/iterator_facade.hpp>
30
#include <boost/type_index/type_index_facade.hpp>
31
#include <fstream> // IWYU pragma: keep
32
#include <iostream>
33
#include <stack>
34
#include <vector>
35
36
#include "rapidjson/stringbuffer.h"
37
#include "rapidjson/writer.h"
38
39
using namespace rapidjson;
40
using namespace boost::algorithm;
41
42
namespace mustache {
43
44
// TODO:
45
// # Handle malformed templates better
46
// # Better support for reading templates from files
47
48
enum TagOperator {
49
    SUBSTITUTION,
50
    SECTION_START,
51
    NEGATED_SECTION_START,
52
    PREDICATE_SECTION_START,
53
    SECTION_END,
54
    PARTIAL,
55
    COMMENT,
56
    LENGTH,
57
    EQUALITY,
58
    INEQUALITY,
59
    LITERAL,
60
    NONE
61
};
62
63
struct OpCtx {
64
    TagOperator op;
65
    std::string tag_name;
66
    std::string tag_arg;
67
    bool escaped = false;
68
};
69
70
struct ContextStack {
71
    const Value* value;
72
    const ContextStack* parent;
73
};
74
75
34
TagOperator GetOperator(const std::string& tag) {
76
34
    if (tag.size() == 0) return SUBSTITUTION;
77
34
    switch (tag[0]) {
78
2
    case '#':
79
2
        return SECTION_START;
80
1
    case '^':
81
1
        return NEGATED_SECTION_START;
82
0
    case '?':
83
0
        return PREDICATE_SECTION_START;
84
9
    case '/':
85
9
        return SECTION_END;
86
1
    case '>':
87
1
        return PARTIAL;
88
1
    case '!':
89
1
        if (tag.size() == 1 || tag[1] != '=') return COMMENT;
90
0
        return INEQUALITY;
91
0
    case '%':
92
0
        return LENGTH;
93
0
    case '~':
94
0
        return LITERAL;
95
0
    case '=':
96
0
        return EQUALITY;
97
20
    default:
98
20
        return SUBSTITUTION;
99
34
    }
100
34
}
101
102
int EvaluateTag(const std::string& document, const std::string& document_root, int idx,
103
                const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out);
104
105
static bool RenderTemplate(const std::string& document, const std::string& document_root,
106
                           const ContextStack* stack, std::stringstream* out);
107
108
18
void EscapeHtml(const std::string& in, std::stringstream* out) {
109
1.27k
    for (const char& c : in) {
110
1.27k
        switch (c) {
111
0
        case '&':
112
0
            (*out) << "&amp;";
113
0
            break;
114
0
        case '"':
115
0
            (*out) << "&quot;";
116
0
            break;
117
0
        case '\'':
118
0
            (*out) << "&apos;";
119
0
            break;
120
0
        case '<':
121
0
            (*out) << "&lt;";
122
0
            break;
123
32
        case '>':
124
32
            (*out) << "&gt;";
125
32
            break;
126
1.24k
        default:
127
1.24k
            (*out) << c;
128
1.24k
            break;
129
1.27k
        }
130
1.27k
    }
131
18
}
132
133
0
void Dump(const rapidjson::Value& v) {
134
0
    StringBuffer buffer;
135
0
    Writer<StringBuffer> writer(buffer);
136
0
    v.Accept(writer);
137
0
    std::cout << buffer.GetString() << std::endl;
138
0
}
139
140
// Breaks a dotted path into individual components. One wrinkle, which stops this from
141
// being a simple split() is that we allow path components to be quoted, e.g.: "foo".bar,
142
// and any '.' characters inside those quoted sections aren't considered to be
143
// delimiters. This is to allow Json keys that contain periods.
144
22
void FindJsonPathComponents(const std::string& path, std::vector<std::string>* components) {
145
22
    bool in_quote = false;
146
22
    bool escape_this_char = false;
147
22
    int start = 0;
148
167
    for (int i = start; i < path.size(); ++i) {
149
145
        if (path[i] == '"' && !escape_this_char) in_quote = !in_quote;
150
145
        if (path[i] == '.' && !escape_this_char && !in_quote) {
151
            // Current char == delimiter and not escaped and not in a quote pair => found a
152
            // component
153
0
            if (i - start > 0) {
154
0
                if (path[start] == '"' && path[(i - 1) - start] == '"') {
155
0
                    if (i - start > 3) {
156
0
                        components->push_back(path.substr(start + 1, i - (start + 2)));
157
0
                    }
158
0
                } else {
159
0
                    components->push_back(path.substr(start, i - start));
160
0
                }
161
0
                start = i + 1;
162
0
            }
163
0
        }
164
165
145
        escape_this_char = (path[i] == '\\' && !escape_this_char);
166
145
    }
167
168
22
    if (path.size() - start > 0) {
169
22
        if (path[start] == '"' && path[(path.size() - 1) - start] == '"') {
170
0
            if (path.size() - start > 3) {
171
0
                components->push_back(path.substr(start + 1, path.size() - (start + 2)));
172
0
            }
173
22
        } else {
174
22
            components->push_back(path.substr(start, path.size() - start));
175
22
        }
176
22
    }
177
22
}
178
179
// Looks up the json entity at 'path' in 'parent_context', and places it in 'resolved'. If
180
// the entity does not exist (i.e. the path is invalid), 'resolved' will be set to nullptr.
181
void ResolveJsonContext(const std::string& path, const ContextStack* stack,
182
23
                        const Value** resolved) {
183
23
    if (path == ".") {
184
1
        *resolved = stack->value;
185
1
        return;
186
1
    }
187
22
    std::vector<std::string> components;
188
22
    FindJsonPathComponents(path, &components);
189
190
    // At each enclosing level of context, try to resolve the path.
191
22
    for (; stack != nullptr; stack = stack->parent) {
192
22
        const Value* cur = stack->value;
193
22
        bool match = true;
194
22
        for (const std::string& c : components) {
195
22
            if (cur->IsObject() && cur->HasMember(c.c_str())) {
196
22
                cur = &(*cur)[c.c_str()];
197
22
            } else {
198
0
                match = false;
199
0
                break;
200
0
            }
201
22
        }
202
22
        if (match) {
203
22
            *resolved = cur;
204
22
            return;
205
22
        }
206
22
    }
207
0
    *resolved = nullptr;
208
0
}
209
210
36
int FindNextTag(const std::string& document, int idx, OpCtx* op, std::stringstream* out) {
211
36
    op->op = NONE;
212
2.04k
    while (idx < document.size()) {
213
2.04k
        if (document[idx] == '{' && idx < (document.size() - 3) && document[idx + 1] == '{') {
214
34
            if (document[idx + 2] == '{') {
215
2
                idx += 3;
216
2
                op->escaped = true;
217
32
            } else {
218
32
                op->escaped = false;
219
32
                idx += 2; // Now at start of template expression
220
32
            }
221
34
            std::stringstream expr;
222
1.08k
            while (idx < document.size()) {
223
1.08k
                if (document[idx] != '}') {
224
1.05k
                    expr << document[idx];
225
1.05k
                    ++idx;
226
1.05k
                } else {
227
34
                    if (!op->escaped && idx < document.size() - 1 && document[idx + 1] == '}') {
228
32
                        ++idx;
229
32
                        break;
230
32
                    } else if (op->escaped && idx < document.size() - 2 &&
231
2
                               document[idx + 1] == '}' && document[idx + 2] == '}') {
232
2
                        idx += 2;
233
2
                        break;
234
2
                    } else {
235
0
                        expr << '}';
236
0
                    }
237
34
                }
238
1.08k
            }
239
240
34
            std::string key = expr.str();
241
34
            trim(key);
242
34
            if (key != ".") trim_if(key, is_any_of("."));
243
34
            if (key.size() == 0) continue;
244
34
            op->op = GetOperator(key);
245
34
            if (op->op != SUBSTITUTION) {
246
14
                int len = op->op == INEQUALITY ? 2 : 1;
247
14
                key = key.substr(len);
248
14
                trim(key);
249
14
            }
250
34
            if (key.size() == 0) continue;
251
252
34
            if (op->op == EQUALITY || op->op == INEQUALITY) {
253
                // Find an argument
254
0
                std::vector<std::string> components;
255
0
                split(components, key, is_any_of(" "));
256
0
                key = components[0];
257
0
                components.erase(components.begin());
258
0
                op->tag_arg = join(components, " ");
259
0
            }
260
261
34
            op->tag_name = key;
262
34
            return ++idx;
263
2.01k
        } else {
264
2.01k
            if (out != nullptr) (*out) << document[idx];
265
2.01k
        }
266
2.01k
        ++idx;
267
2.01k
    }
268
2
    return idx;
269
36
}
270
271
// Evaluates a [PREDICATE_|NEGATED_]SECTION_START / SECTION_END pair by evaluating the tag
272
// in 'parent_context'. False or non-existant values cause the entire section to be
273
// skipped. True values cause the section to be evaluated as though it were a normal
274
// section, but with the parent context being the root context for that section.
275
//
276
// If 'is_negation' is true, the behaviour is the opposite of the above: false values
277
// cause the section to be normally evaluated etc.
278
int EvaluateSection(const std::string& document, const std::string& document_root, int idx,
279
                    const ContextStack* context_stack, const OpCtx& op_ctx,
280
3
                    std::stringstream* out) {
281
    // Precondition: idx is the immediate next character after an opening {{ #tag_name }}
282
3
    const Value* context;
283
3
    ResolveJsonContext(op_ctx.tag_name, context_stack, &context);
284
285
    // If we a) cannot resolve the context from the tag name or b) the context evaluates to
286
    // false, we should skip the contents of the template until a closing {{/tag_name}}.
287
3
    bool skip_contents = false;
288
289
3
    if (op_ctx.op == NEGATED_SECTION_START || op_ctx.op == PREDICATE_SECTION_START ||
290
3
        op_ctx.op == SECTION_START) {
291
3
        skip_contents = (context == nullptr || context->IsFalse());
292
293
        // If the tag is a negative block (i.e. {{^tag_name}}), do the opposite: if the
294
        // context exists and is true, skip the contents, else echo them.
295
3
        if (op_ctx.op == NEGATED_SECTION_START) {
296
1
            context = context_stack->value;
297
1
            skip_contents = !skip_contents;
298
2
        } else if (op_ctx.op == PREDICATE_SECTION_START) {
299
0
            context = context_stack->value;
300
0
        }
301
3
    } else if (op_ctx.op == INEQUALITY || op_ctx.op == EQUALITY) {
302
0
        skip_contents = (context == nullptr || !context->IsString() ||
303
0
                         strcasecmp(context->GetString(), op_ctx.tag_arg.c_str()) != 0);
304
0
        if (op_ctx.op == INEQUALITY) skip_contents = !skip_contents;
305
0
        context = context_stack->value;
306
0
    }
307
308
3
    std::vector<const Value*> values;
309
3
    if (!skip_contents && context != nullptr && context->IsArray()) {
310
8
        for (int i = 0; i < context->Size(); ++i) {
311
7
            values.push_back(&(*context)[i]);
312
7
        }
313
2
    } else {
314
2
        values.push_back(skip_contents ? nullptr : context);
315
2
    }
316
3
    if (values.size() == 0) {
317
0
        skip_contents = true;
318
0
        values.push_back(nullptr);
319
0
    }
320
321
3
    int start_idx = idx;
322
9
    for (const Value* v : values) {
323
9
        idx = start_idx;
324
9
        std::stack<OpCtx> section_starts;
325
9
        section_starts.push(op_ctx);
326
24
        while (idx < document.size()) {
327
24
            OpCtx next_ctx;
328
24
            idx = FindNextTag(document, idx, &next_ctx, skip_contents ? nullptr : out);
329
24
            if (skip_contents &&
330
24
                (next_ctx.op == SECTION_START || next_ctx.op == PREDICATE_SECTION_START ||
331
1
                 next_ctx.op == NEGATED_SECTION_START)) {
332
0
                section_starts.push(next_ctx);
333
24
            } else if (next_ctx.op == SECTION_END) {
334
9
                if (next_ctx.tag_name != section_starts.top().tag_name) return -1;
335
9
                section_starts.pop();
336
9
            }
337
24
            if (section_starts.empty()) break;
338
339
            // Don't need to evaluate any templates if we're skipping the contents
340
15
            if (!skip_contents) {
341
15
                ContextStack new_context = {v, context_stack};
342
15
                idx = EvaluateTag(document, document_root, idx, &new_context, next_ctx, out);
343
15
            }
344
15
        }
345
9
    }
346
3
    return idx;
347
3
}
348
349
// Evaluates a SUBSTITUTION tag, by replacing its contents with the value of the tag's
350
// name in 'parent_context'.
351
int EvaluateSubstitution(const std::string& document, const int idx,
352
                         const ContextStack* context_stack, const OpCtx& op_ctx,
353
20
                         std::stringstream* out) {
354
20
    const Value* val;
355
20
    ResolveJsonContext(op_ctx.tag_name, context_stack, &val);
356
20
    if (val == nullptr) return idx;
357
20
    if (val->IsString()) {
358
20
        if (!op_ctx.escaped) {
359
18
            EscapeHtml(val->GetString(), out);
360
18
        } else {
361
            // TODO: Triple {{{ means don't escape
362
2
            (*out) << val->GetString();
363
2
        }
364
20
    } else if (val->IsInt64()) {
365
0
        (*out) << val->GetInt64();
366
0
    } else if (val->IsInt()) {
367
0
        (*out) << val->GetInt();
368
0
    } else if (val->IsDouble()) {
369
0
        (*out) << val->GetDouble();
370
0
    } else if (val->IsBool()) {
371
0
        (*out) << std::boolalpha << val->GetBool();
372
0
    }
373
20
    return idx;
374
20
}
375
376
// Evaluates a LENGTH tag by replacing its contents with the type-dependent 'size' of the
377
// value.
378
int EvaluateLength(const std::string& document, const int idx, const ContextStack* context_stack,
379
0
                   const std::string& tag_name, std::stringstream* out) {
380
0
    const Value* val;
381
0
    ResolveJsonContext(tag_name, context_stack, &val);
382
0
    if (val == nullptr) return idx;
383
0
    if (val->IsArray()) {
384
0
        (*out) << val->Size();
385
0
    } else if (val->IsString()) {
386
0
        (*out) << val->GetStringLength();
387
0
    };
388
389
0
    return idx;
390
0
}
391
392
int EvaluateLiteral(const std::string& document, const int idx, const ContextStack* context_stack,
393
0
                    const std::string& tag_name, std::stringstream* out) {
394
0
    const Value* val;
395
0
    ResolveJsonContext(tag_name, context_stack, &val);
396
0
    if (val == nullptr) return idx;
397
0
    if (!val->IsArray() && !val->IsObject()) return idx;
398
0
    StringBuffer strbuf;
399
0
    PrettyWriter<StringBuffer> writer(strbuf);
400
0
    val->Accept(writer);
401
0
    (*out) << strbuf.GetString();
402
0
    return idx;
403
0
}
404
405
// Evaluates a 'partial' template by reading it fully from disk, then rendering it
406
// directly into the current output with the current context.
407
//
408
// TODO: This could obviously be more efficient (and there are lots of file accesses in a
409
// long list context).
410
void EvaluatePartial(const std::string& tag_name, const std::string& document_root,
411
1
                     const ContextStack* stack, std::stringstream* out) {
412
1
    std::stringstream ss;
413
1
    ss << document_root << tag_name;
414
1
    std::ifstream tmpl(ss.str().c_str());
415
1
    if (!tmpl.is_open()) {
416
0
        ss << ".mustache";
417
0
        tmpl.open(ss.str().c_str());
418
0
        if (!tmpl.is_open()) return;
419
0
    }
420
1
    std::stringstream file_ss;
421
1
    file_ss << tmpl.rdbuf();
422
1
    RenderTemplate(file_ss.str(), document_root, stack, out);
423
1
}
424
425
// Given a tag name, and its operator, evaluate the tag in the given context and write the
426
// output to 'out'. The heavy-lifting is delegated to specific Evaluate*()
427
// methods. Returns the new cursor position within 'document', or -1 on error.
428
int EvaluateTag(const std::string& document, const std::string& document_root, int idx,
429
27
                const ContextStack* context, const OpCtx& op_ctx, std::stringstream* out) {
430
27
    if (idx == -1) return idx;
431
27
    switch (op_ctx.op) {
432
2
    case SECTION_START:
433
2
    case PREDICATE_SECTION_START:
434
3
    case NEGATED_SECTION_START:
435
3
    case EQUALITY:
436
3
    case INEQUALITY:
437
3
        return EvaluateSection(document, document_root, idx, context, op_ctx, out);
438
20
    case SUBSTITUTION:
439
20
        return EvaluateSubstitution(document, idx, context, op_ctx, out);
440
1
    case COMMENT:
441
1
        return idx; // Ignored
442
1
    case PARTIAL:
443
1
        EvaluatePartial(op_ctx.tag_name, document_root, context, out);
444
1
        return idx;
445
0
    case LENGTH:
446
0
        return EvaluateLength(document, idx, context, op_ctx.tag_name, out);
447
0
    case LITERAL:
448
0
        return EvaluateLiteral(document, idx, context, op_ctx.tag_name, out);
449
2
    case NONE:
450
2
        return idx; // No tag was found
451
0
    case SECTION_END:
452
0
        return idx;
453
0
    default:
454
0
        std::cout << "Unknown tag: " << op_ctx.op << std::endl;
455
0
        return -1;
456
27
    }
457
27
}
458
459
static bool RenderTemplate(const std::string& document, const std::string& document_root,
460
3
                           const ContextStack* stack, std::stringstream* out) {
461
3
    int idx = 0;
462
15
    while (idx < document.size() && idx != -1) {
463
12
        OpCtx op;
464
12
        idx = FindNextTag(document, idx, &op, out);
465
12
        idx = EvaluateTag(document, document_root, idx, stack, op, out);
466
12
    }
467
468
3
    return idx != -1;
469
3
}
470
471
bool RenderTemplate(const std::string& document, const std::string& document_root,
472
2
                    const Value& context, std::stringstream* out) {
473
2
    ContextStack stack = {&context, nullptr};
474
2
    return RenderTemplate(document, document_root, &stack, out);
475
2
}
476
477
} // namespace mustache