/root/doris/be/src/exprs/string_functions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/string-functions.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include "exprs/string_functions.h" |
22 | | |
23 | | #include <re2/re2.h> |
24 | | #include <re2/stringpiece.h> |
25 | | |
26 | | #include <sstream> |
27 | | |
28 | | #include "util/string_util.h" |
29 | | |
30 | | // NOTE: be careful not to use string::append. It is not performant. |
31 | | namespace doris { |
32 | | |
33 | | // This function sets options in the RE2 library before pattern matching. |
34 | | bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str, |
35 | 10 | re2::RE2::Options* opts) { |
36 | 22 | for (int i = 0; i < match_parameter.size; i++) { |
37 | 14 | char match = match_parameter.data[i]; |
38 | 14 | switch (match) { |
39 | 4 | case 'i': |
40 | 4 | opts->set_case_sensitive(false); |
41 | 4 | break; |
42 | 2 | case 'c': |
43 | 2 | opts->set_case_sensitive(true); |
44 | 2 | break; |
45 | 3 | case 'm': |
46 | 3 | opts->set_posix_syntax(true); |
47 | 3 | opts->set_one_line(false); |
48 | 3 | break; |
49 | 3 | case 'n': |
50 | 3 | opts->set_never_nl(false); |
51 | 3 | opts->set_dot_nl(true); |
52 | 3 | break; |
53 | 2 | default: |
54 | 2 | std::stringstream error; |
55 | 2 | error << "Illegal match parameter " << match; |
56 | 2 | *error_str = error.str(); |
57 | 2 | return false; |
58 | 14 | } |
59 | 14 | } |
60 | 8 | return true; |
61 | 10 | } |
62 | | |
63 | | // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. |
64 | | bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str, |
65 | | const StringRef& match_parameter, |
66 | 48 | const StringRef& options_value, std::unique_ptr<re2::RE2>& re) { |
67 | 48 | re2::StringPiece pattern_sp(pattern.data, pattern.size); |
68 | 48 | re2::RE2::Options options; |
69 | | // Disable error logging in case e.g. every row causes an error |
70 | 48 | options.set_log_errors(false); |
71 | | // ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587 |
72 | | // Return the leftmost longest match (rather than the first match). |
73 | | // options.set_longest_match(true); |
74 | 48 | options.set_dot_nl(true); |
75 | | |
76 | 48 | if ((options_value.data != nullptr) && (options_value.size > 0)) { |
77 | 0 | auto options_split = split(options_value.to_string(), ","); |
78 | 0 | for (const auto& option : options_split) { |
79 | 0 | if (iequal("ignore_invalid_escape", option)) { |
80 | 0 | options.set_ignore_replace_escape(true); |
81 | 0 | } else { |
82 | | // "none" do nothing, and could add more options for future extensibility. |
83 | 0 | } |
84 | 0 | } |
85 | 0 | } |
86 | | |
87 | 48 | if (match_parameter.size > 0 && |
88 | 48 | !StringFunctions::set_re2_options(match_parameter, error_str, &options)) { |
89 | 1 | return false; |
90 | 1 | } |
91 | 47 | re.reset(new re2::RE2(pattern_sp, options)); |
92 | 47 | if (!re->ok()) { |
93 | 1 | std::stringstream ss; |
94 | 1 | ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size) |
95 | 1 | << std::endl |
96 | 1 | << "Error: " << re->error(); |
97 | 1 | *error_str = ss.str(); |
98 | 1 | re.reset(); |
99 | 1 | return false; |
100 | 1 | } |
101 | 46 | return true; |
102 | 47 | } |
103 | | |
104 | | } // namespace doris |