/root/doris/be/src/util/string_util.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <strings.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <boost/token_functions.hpp> |
24 | | #include <boost/tokenizer.hpp> |
25 | | #include <cctype> |
26 | | #include <cstddef> |
27 | | #include <map> |
28 | | #include <set> |
29 | | #include <sstream> |
30 | | #include <string> |
31 | | #include <unordered_map> |
32 | | #include <unordered_set> |
33 | | #include <vector> |
34 | | |
35 | | #include "common/exception.h" |
36 | | #include "common/status.h" |
37 | | |
38 | | namespace doris { |
39 | | |
40 | 5.79k | inline std::string to_lower(const std::string& input) { |
41 | 5.79k | std::string output; |
42 | 5.79k | output.resize(input.size()); |
43 | 5.79k | std::transform(input.begin(), input.end(), output.begin(), |
44 | 35.1k | [](unsigned char c) { return std::tolower(c); }); |
45 | 5.79k | return output; |
46 | 5.79k | } |
47 | | |
48 | 29 | inline std::string to_upper(const std::string& input) { |
49 | 29 | std::string output; |
50 | 29 | output.resize(input.size()); |
51 | 29 | std::transform(input.begin(), input.end(), output.begin(), |
52 | 137 | [](unsigned char c) { return std::toupper(c); }); |
53 | 29 | return output; |
54 | 29 | } |
55 | | |
56 | 49 | inline bool iequal(const std::string& lhs, const std::string& rhs) { |
57 | 49 | if (lhs.size() != rhs.size()) { |
58 | 22 | return false; |
59 | 22 | } |
60 | 27 | return to_lower(lhs) == to_lower(rhs); |
61 | 49 | } |
62 | | |
63 | 9 | inline bool starts_with(const std::string& value, const std::string& beginning) { |
64 | 9 | return value.find(beginning) == 0; |
65 | 9 | } |
66 | | |
67 | 8 | inline bool ends_with(std::string const& value, std::string const& ending) { |
68 | 8 | if (ending.size() > value.size()) { |
69 | 1 | return false; |
70 | 1 | } |
71 | 7 | return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); |
72 | 8 | } |
73 | | |
74 | 7 | inline std::vector<std::string> split(const std::string& s, const std::string& delim) { |
75 | 7 | std::vector<std::string> out; |
76 | 7 | size_t pos {}; |
77 | | |
78 | 20 | for (size_t find = 0; (find = s.find(delim, pos)) != std::string::npos; |
79 | 13 | pos = find + delim.size()) { |
80 | 13 | out.emplace_back(s.data() + pos, s.data() + find); |
81 | 13 | } |
82 | | |
83 | 7 | out.emplace_back(s.data() + pos, s.data() + s.size()); |
84 | 7 | return out; |
85 | 7 | } |
86 | | |
87 | | template <typename T> |
88 | 3 | std::string join(const std::vector<T>& elems, const std::string& delim) { |
89 | 3 | std::stringstream ss; |
90 | 13 | for (size_t i = 0; i < elems.size(); ++i) { |
91 | 10 | if (i != 0) { |
92 | 8 | ss << delim.c_str(); |
93 | 8 | } |
94 | 10 | ss << elems[i]; |
95 | 10 | } |
96 | 3 | return ss.str(); |
97 | 3 | } |
98 | | |
99 | | struct StringCaseHasher { |
100 | | public: |
101 | 1.59k | std::size_t operator()(const std::string& value) const { |
102 | 1.59k | std::string lower_value = to_lower(value); |
103 | 1.59k | return std::hash<std::string>()(lower_value); |
104 | 1.59k | } |
105 | | }; |
106 | | |
107 | | struct StringCaseEqual { |
108 | | public: |
109 | 1.28k | bool operator()(const std::string& lhs, const std::string& rhs) const { |
110 | 1.28k | if (lhs.size() != rhs.size()) { |
111 | 0 | return false; |
112 | 0 | } |
113 | 1.28k | return strncasecmp(lhs.c_str(), rhs.c_str(), lhs.size()) == 0; |
114 | 1.28k | } |
115 | | }; |
116 | | |
117 | | struct StringCaseLess { |
118 | | public: |
119 | 38 | bool operator()(const std::string& lhs, const std::string& rhs) const { |
120 | 38 | size_t common_size = std::min(lhs.size(), rhs.size()); |
121 | 38 | auto cmp = strncasecmp(lhs.c_str(), rhs.c_str(), common_size); |
122 | 38 | if (cmp == 0) { |
123 | 30 | return lhs.size() < rhs.size(); |
124 | 30 | } |
125 | 8 | return cmp < 0; |
126 | 38 | } |
127 | | }; |
128 | | |
129 | | size_t hash_of_path(const std::string& identifier, const std::string& path); |
130 | | |
131 | | using StringCaseSet = std::set<std::string, StringCaseLess>; |
132 | | using StringCaseUnorderedSet = std::unordered_set<std::string, StringCaseHasher, StringCaseEqual>; |
133 | | template <class T> |
134 | | using StringCaseMap = std::map<std::string, T, StringCaseLess>; |
135 | | template <class T> |
136 | | using StringCaseUnorderedMap = |
137 | | std::unordered_map<std::string, T, StringCaseHasher, StringCaseEqual>; |
138 | | |
139 | | template <typename T> |
140 | 14 | auto get_json_token(T& path_string) { |
141 | 14 | try { |
142 | 14 | return boost::tokenizer<boost::escaped_list_separator<char>>( |
143 | 14 | path_string, boost::escaped_list_separator<char>("\\", ".", "\"")); |
144 | 14 | } catch (const boost::escaped_list_error& err) { |
145 | 0 | throw doris::Exception(ErrorCode::INVALID_JSON_PATH, "meet error {}", err.what()); |
146 | 0 | } |
147 | 14 | } |
148 | | |
149 | | #ifdef USE_LIBCPP |
150 | | template <> |
151 | | auto get_json_token(std::string_view& path_string) = delete; |
152 | | #endif |
153 | | |
154 | | } // namespace doris |