Coverage Report

Created: 2026-01-04 11:29

/root/doris/be/src/exprs/string_functions.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/string-functions.cpp
19
// and modified by Doris
20
21
#include "exprs/string_functions.h"
22
23
#include <re2/re2.h>
24
#include <re2/stringpiece.h>
25
26
#include <sstream>
27
28
#include "util/string_util.h"
29
30
// NOTE: be careful not to use string::append.  It is not performant.
31
namespace doris {
32
33
// This function sets options in the RE2 library before pattern matching.
34
bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str,
35
0
                                      re2::RE2::Options* opts) {
36
0
    for (int i = 0; i < match_parameter.size; i++) {
  Branch (36:21): [True: 0, False: 0]
37
0
        char match = match_parameter.data[i];
38
0
        switch (match) {
39
0
        case 'i':
  Branch (39:9): [True: 0, False: 0]
40
0
            opts->set_case_sensitive(false);
41
0
            break;
42
0
        case 'c':
  Branch (42:9): [True: 0, False: 0]
43
0
            opts->set_case_sensitive(true);
44
0
            break;
45
0
        case 'm':
  Branch (45:9): [True: 0, False: 0]
46
0
            opts->set_posix_syntax(true);
47
0
            opts->set_one_line(false);
48
0
            break;
49
0
        case 'n':
  Branch (49:9): [True: 0, False: 0]
50
0
            opts->set_never_nl(false);
51
0
            opts->set_dot_nl(true);
52
0
            break;
53
0
        default:
  Branch (53:9): [True: 0, False: 0]
54
0
            std::stringstream error;
55
0
            error << "Illegal match parameter " << match;
56
0
            *error_str = error.str();
57
0
            return false;
58
0
        }
59
0
    }
60
0
    return true;
61
0
}
62
63
// The caller owns the returned regex. Returns nullptr if the pattern could not be compiled.
64
bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str,
65
                                    const StringRef& match_parameter,
66
42
                                    const StringRef& options_value, std::unique_ptr<re2::RE2>& re) {
67
42
    re2::StringPiece pattern_sp(pattern.data, pattern.size);
68
42
    re2::RE2::Options options;
69
    // Disable error logging in case e.g. every row causes an error
70
42
    options.set_log_errors(false);
71
    // ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587
72
    // Return the leftmost longest match (rather than the first match).
73
    // options.set_longest_match(true);
74
42
    options.set_dot_nl(true);
75
76
42
    if ((options_value.data != nullptr) && (options_value.size > 0)) {
  Branch (76:9): [True: 0, False: 42]
  Branch (76:44): [True: 0, False: 0]
77
0
        auto options_split = split(options_value.to_string(), ",");
78
0
        for (const auto& option : options_split) {
  Branch (78:33): [True: 0, False: 0]
79
0
            if (iequal("ignore_invalid_escape", option)) {
  Branch (79:17): [True: 0, False: 0]
80
0
                options.set_ignore_replace_escape(true);
81
0
            } else {
82
                // "none" do nothing, and could add more options for future extensibility.
83
0
            }
84
0
        }
85
0
    }
86
87
42
    if (match_parameter.size > 0 &&
  Branch (87:9): [True: 0, False: 42]
88
42
        !StringFunctions::set_re2_options(match_parameter, error_str, &options)) {
  Branch (88:9): [True: 0, False: 0]
89
0
        return false;
90
0
    }
91
42
    re.reset(new re2::RE2(pattern_sp, options));
92
42
    if (!re->ok()) {
  Branch (92:9): [True: 0, False: 42]
93
0
        std::stringstream ss;
94
0
        ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size)
95
0
           << std::endl
96
0
           << "Error: " << re->error();
97
0
        *error_str = ss.str();
98
0
        re.reset();
99
0
        return false;
100
0
    }
101
42
    return true;
102
42
}
103
104
} // namespace doris