/root/doris/be/src/exprs/string_functions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/string-functions.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include "exprs/string_functions.h" |
22 | | |
23 | | #include <re2/re2.h> |
24 | | #include <re2/stringpiece.h> |
25 | | |
26 | | #include <sstream> |
27 | | |
28 | | // NOTE: be careful not to use string::append. It is not performant. |
29 | | namespace doris { |
30 | | |
31 | | // This function sets options in the RE2 library before pattern matching. |
32 | | bool StringFunctions::set_re2_options(const StringRef& match_parameter, std::string* error_str, |
33 | 0 | re2::RE2::Options* opts) { |
34 | 0 | for (int i = 0; i < match_parameter.size; i++) { |
35 | 0 | char match = match_parameter.data[i]; |
36 | 0 | switch (match) { |
37 | 0 | case 'i': |
38 | 0 | opts->set_case_sensitive(false); |
39 | 0 | break; |
40 | 0 | case 'c': |
41 | 0 | opts->set_case_sensitive(true); |
42 | 0 | break; |
43 | 0 | case 'm': |
44 | 0 | opts->set_posix_syntax(true); |
45 | 0 | opts->set_one_line(false); |
46 | 0 | break; |
47 | 0 | case 'n': |
48 | 0 | opts->set_never_nl(false); |
49 | 0 | opts->set_dot_nl(true); |
50 | 0 | break; |
51 | 0 | default: |
52 | 0 | std::stringstream error; |
53 | 0 | error << "Illegal match parameter " << match; |
54 | 0 | *error_str = error.str(); |
55 | 0 | return false; |
56 | 0 | } |
57 | 0 | } |
58 | 0 | return true; |
59 | 0 | } |
60 | | |
61 | | // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. |
62 | | bool StringFunctions::compile_regex(const StringRef& pattern, std::string* error_str, |
63 | | const StringRef& match_parameter, |
64 | 42 | std::unique_ptr<re2::RE2>& re) { |
65 | 42 | re2::StringPiece pattern_sp(pattern.data, pattern.size); |
66 | 42 | re2::RE2::Options options; |
67 | | // Disable error logging in case e.g. every row causes an error |
68 | 42 | options.set_log_errors(false); |
69 | | // ATTN(cmy): no set it, or the lazy mode of regex won't work. See Doris #6587 |
70 | | // Return the leftmost longest match (rather than the first match). |
71 | | // options.set_longest_match(true); |
72 | 42 | options.set_dot_nl(true); |
73 | 42 | if (match_parameter.size > 0 && |
74 | 42 | !StringFunctions::set_re2_options(match_parameter, error_str, &options)) { |
75 | 0 | return false; |
76 | 0 | } |
77 | 42 | re.reset(new re2::RE2(pattern_sp, options)); |
78 | 42 | if (!re->ok()) { |
79 | 0 | std::stringstream ss; |
80 | 0 | ss << "Could not compile regexp pattern: " << std::string(pattern.data, pattern.size) |
81 | 0 | << std::endl |
82 | 0 | << "Error: " << re->error(); |
83 | 0 | *error_str = ss.str(); |
84 | 0 | re.reset(); |
85 | 0 | return false; |
86 | 0 | } |
87 | 42 | return true; |
88 | 42 | } |
89 | | |
90 | | } // namespace doris |