/root/doris/be/src/gutil/strings/strip.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | | // based on contributions of various authors in strings/strutil_unittest.cc |
3 | | // |
4 | | // This file contains functions that remove a defined part from the string, |
5 | | // i.e., strip the string. |
6 | | |
7 | | #include "gutil/strings/strip.h" |
8 | | |
9 | | // IWYU pragma: no_include <pstl/glue_algorithm_defs.h> |
10 | | |
11 | | #include <assert.h> |
12 | | #include <string.h> |
13 | | #include <algorithm> |
14 | | #include <iterator> |
15 | | #include <mutex> |
16 | | |
17 | | using std::copy; |
18 | | using std::max; |
19 | | using std::min; |
20 | | using std::reverse; |
21 | | using std::sort; |
22 | | using std::swap; |
23 | | #include <string> |
24 | | |
25 | | using std::string; |
26 | | |
27 | | #include "gutil/strings/ascii_ctype.h" |
28 | | #include "gutil/strings/stringpiece.h" |
29 | | |
30 | 368 | string StripPrefixString(StringPiece str, const StringPiece& prefix) { |
31 | 368 | if (str.starts_with(prefix)) str.remove_prefix(prefix.length()); |
32 | 368 | return str.as_string(); |
33 | 368 | } |
34 | | |
35 | 0 | bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result) { |
36 | 0 | const bool has_prefix = str.starts_with(prefix); |
37 | 0 | if (has_prefix) str.remove_prefix(prefix.length()); |
38 | 0 | str.as_string().swap(*result); |
39 | 0 | return has_prefix; |
40 | 0 | } |
41 | | |
42 | 368 | string StripSuffixString(StringPiece str, const StringPiece& suffix) { |
43 | 368 | if (str.ends_with(suffix)) str.remove_suffix(suffix.length()); |
44 | 368 | return str.as_string(); |
45 | 368 | } |
46 | | |
47 | 0 | bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result) { |
48 | 0 | const bool has_suffix = str.ends_with(suffix); |
49 | 0 | if (has_suffix) str.remove_suffix(suffix.length()); |
50 | 0 | str.as_string().swap(*result); |
51 | 0 | return has_suffix; |
52 | 0 | } |
53 | | |
54 | | // ---------------------------------------------------------------------- |
55 | | // StripString |
56 | | // Replaces any occurrence of the character 'remove' (or the characters |
57 | | // in 'remove') with the character 'replacewith'. |
58 | | // ---------------------------------------------------------------------- |
59 | 0 | void StripString(char* str, StringPiece remove, char replacewith) { |
60 | 0 | for (; *str != '\0'; ++str) { |
61 | 0 | if (remove.find(*str) != StringPiece::npos) { |
62 | 0 | *str = replacewith; |
63 | 0 | } |
64 | 0 | } |
65 | 0 | } |
66 | | |
67 | 0 | void StripString(char* str, int len, StringPiece remove, char replacewith) { |
68 | 0 | char* end = str + len; |
69 | 0 | for (; str < end; ++str) { |
70 | 0 | if (remove.find(*str) != StringPiece::npos) { |
71 | 0 | *str = replacewith; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | } |
75 | | |
76 | 0 | void StripString(string* s, StringPiece remove, char replacewith) { |
77 | 0 | for (char& c : *s) { |
78 | 0 | if (remove.find(c) != StringPiece::npos) { |
79 | 0 | c = replacewith; |
80 | 0 | } |
81 | 0 | } |
82 | 0 | } |
83 | | |
84 | | // ---------------------------------------------------------------------- |
85 | | // StripWhiteSpace |
86 | | // ---------------------------------------------------------------------- |
87 | 1.86k | void StripWhiteSpace(const char** str, int* len) { |
88 | | // strip off trailing whitespace |
89 | 1.86k | while ((*len) > 0 && ascii_isspace((*str)[(*len) - 1])) { |
90 | 1 | (*len)--; |
91 | 1 | } |
92 | | |
93 | | // strip off leading whitespace |
94 | 1.87k | while ((*len) > 0 && ascii_isspace((*str)[0])) { |
95 | 14 | (*len)--; |
96 | 14 | (*str)++; |
97 | 14 | } |
98 | 1.86k | } |
99 | | |
100 | 0 | bool StripTrailingNewline(string* s) { |
101 | 0 | if (!s->empty() && (*s)[s->size() - 1] == '\n') { |
102 | 0 | if (s->size() > 1 && (*s)[s->size() - 2] == '\r') |
103 | 0 | s->resize(s->size() - 2); |
104 | 0 | else |
105 | 0 | s->resize(s->size() - 1); |
106 | 0 | return true; |
107 | 0 | } |
108 | 0 | return false; |
109 | 0 | } |
110 | | |
111 | 99 | void StripWhiteSpace(string* str) { |
112 | 99 | int str_length = str->length(); |
113 | | |
114 | | // Strip off leading whitespace. |
115 | 99 | int first = 0; |
116 | 121 | while (first < str_length && ascii_isspace(str->at(first))) { |
117 | 22 | ++first; |
118 | 22 | } |
119 | | // If entire string is white space. |
120 | 99 | if (first == str_length) { |
121 | 24 | str->clear(); |
122 | 24 | return; |
123 | 24 | } |
124 | 75 | if (first > 0) { |
125 | 22 | str->erase(0, first); |
126 | 22 | str_length -= first; |
127 | 22 | } |
128 | | |
129 | | // Strip off trailing whitespace. |
130 | 75 | int last = str_length - 1; |
131 | 76 | while (last >= 0 && ascii_isspace(str->at(last))) { |
132 | 1 | --last; |
133 | 1 | } |
134 | 75 | if (last != (str_length - 1) && last >= 0) { |
135 | 1 | str->erase(last + 1, string::npos); |
136 | 1 | } |
137 | 75 | } |
138 | | |
139 | | // ---------------------------------------------------------------------- |
140 | | // Misc. stripping routines |
141 | | // ---------------------------------------------------------------------- |
142 | 0 | void StripCurlyBraces(string* s) { |
143 | 0 | return StripBrackets('{', '}', s); |
144 | 0 | } |
145 | | |
146 | 0 | void StripBrackets(char left, char right, string* s) { |
147 | 0 | string::iterator opencurly = find(s->begin(), s->end(), left); |
148 | 0 | while (opencurly != s->end()) { |
149 | 0 | string::iterator closecurly = find(opencurly, s->end(), right); |
150 | 0 | if (closecurly == s->end()) return; |
151 | 0 | opencurly = s->erase(opencurly, closecurly + 1); |
152 | 0 | opencurly = find(opencurly, s->end(), left); |
153 | 0 | } |
154 | 0 | } |
155 | | |
156 | 0 | void StripMarkupTags(string* s) { |
157 | 0 | string::iterator openbracket = find(s->begin(), s->end(), '<'); |
158 | 0 | while (openbracket != s->end()) { |
159 | 0 | string::iterator closebracket = find(openbracket, s->end(), '>'); |
160 | 0 | if (closebracket == s->end()) { |
161 | 0 | s->erase(openbracket, closebracket); |
162 | 0 | return; |
163 | 0 | } |
164 | | |
165 | 0 | openbracket = s->erase(openbracket, closebracket + 1); |
166 | 0 | openbracket = find(openbracket, s->end(), '<'); |
167 | 0 | } |
168 | 0 | } |
169 | | |
170 | 0 | string OutputWithMarkupTagsStripped(const string& s) { |
171 | 0 | string result(s); |
172 | 0 | StripMarkupTags(&result); |
173 | 0 | return result; |
174 | 0 | } |
175 | | |
176 | 0 | int TrimStringLeft(string* s, const StringPiece& remove) { |
177 | 0 | int i = 0; |
178 | 0 | while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { |
179 | 0 | ++i; |
180 | 0 | } |
181 | 0 | if (i > 0) s->erase(0, i); |
182 | 0 | return i; |
183 | 0 | } |
184 | | |
185 | 0 | int TrimStringRight(string* s, const StringPiece& remove) { |
186 | 0 | int i = s->size(), trimmed = 0; |
187 | 0 | while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { |
188 | 0 | --i; |
189 | 0 | } |
190 | 0 | if (i < s->size()) { |
191 | 0 | trimmed = s->size() - i; |
192 | 0 | s->erase(i); |
193 | 0 | } |
194 | 0 | return trimmed; |
195 | 0 | } |
196 | | |
197 | | // ---------------------------------------------------------------------- |
198 | | // Various removal routines |
199 | | // ---------------------------------------------------------------------- |
200 | 0 | int strrm(char* str, char c) { |
201 | 0 | char *src, *dest; |
202 | 0 | for (src = dest = str; *src != '\0'; ++src) |
203 | 0 | if (*src != c) *(dest++) = *src; |
204 | 0 | *dest = '\0'; |
205 | 0 | return dest - str; |
206 | 0 | } |
207 | | |
208 | 0 | int memrm(char* str, int strlen, char c) { |
209 | 0 | char *src, *dest; |
210 | 0 | for (src = dest = str; strlen-- > 0; ++src) |
211 | 0 | if (*src != c) *(dest++) = *src; |
212 | 0 | return dest - str; |
213 | 0 | } |
214 | | |
215 | 0 | int strrmm(char* str, const char* chars) { |
216 | 0 | char *src, *dest; |
217 | 0 | for (src = dest = str; *src != '\0'; ++src) { |
218 | 0 | bool skip = false; |
219 | 0 | for (const char* c = chars; *c != '\0'; c++) { |
220 | 0 | if (*src == *c) { |
221 | 0 | skip = true; |
222 | 0 | break; |
223 | 0 | } |
224 | 0 | } |
225 | 0 | if (!skip) *(dest++) = *src; |
226 | 0 | } |
227 | 0 | *dest = '\0'; |
228 | 0 | return dest - str; |
229 | 0 | } |
230 | | |
231 | 0 | int strrmm(string* str, const string& chars) { |
232 | 0 | size_t str_len = str->length(); |
233 | 0 | size_t in_index = str->find_first_of(chars); |
234 | 0 | if (in_index == string::npos) return str_len; |
235 | | |
236 | 0 | size_t out_index = in_index++; |
237 | |
|
238 | 0 | while (in_index < str_len) { |
239 | 0 | char c = (*str)[in_index++]; |
240 | 0 | if (chars.find(c) == string::npos) (*str)[out_index++] = c; |
241 | 0 | } |
242 | |
|
243 | 0 | str->resize(out_index); |
244 | 0 | return out_index; |
245 | 0 | } |
246 | | |
247 | | // ---------------------------------------------------------------------- |
248 | | // StripDupCharacters |
249 | | // Replaces any repeated occurrence of the character 'repeat_char' |
250 | | // with single occurrence. e.g., |
251 | | // StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" |
252 | | // Return the number of characters removed |
253 | | // ---------------------------------------------------------------------- |
254 | 0 | int StripDupCharacters(string* s, char dup_char, int start_pos) { |
255 | 0 | if (start_pos < 0) start_pos = 0; |
256 | | |
257 | | // remove dups by compaction in-place |
258 | 0 | int input_pos = start_pos; // current reader position |
259 | 0 | int output_pos = start_pos; // current writer position |
260 | 0 | const int input_end = s->size(); |
261 | 0 | while (input_pos < input_end) { |
262 | | // keep current character |
263 | 0 | const char curr_char = (*s)[input_pos]; |
264 | 0 | if (output_pos != input_pos) // must copy |
265 | 0 | (*s)[output_pos] = curr_char; |
266 | 0 | ++input_pos; |
267 | 0 | ++output_pos; |
268 | |
|
269 | 0 | if (curr_char == dup_char) { // skip subsequent dups |
270 | 0 | while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) ++input_pos; |
271 | 0 | } |
272 | 0 | } |
273 | 0 | const int num_deleted = input_pos - output_pos; |
274 | 0 | s->resize(s->size() - num_deleted); |
275 | 0 | return num_deleted; |
276 | 0 | } |
277 | | |
278 | | // ---------------------------------------------------------------------- |
279 | | // RemoveExtraWhitespace() |
280 | | // Remove leading, trailing, and duplicate internal whitespace. |
281 | | // ---------------------------------------------------------------------- |
282 | 0 | void RemoveExtraWhitespace(string* s) { |
283 | 0 | assert(s != nullptr); |
284 | | // Empty strings clearly have no whitespace, and this code assumes that |
285 | | // string length is greater than 0 |
286 | 0 | if (s->empty()) return; |
287 | | |
288 | 0 | int input_pos = 0; // current reader position |
289 | 0 | int output_pos = 0; // current writer position |
290 | 0 | const int input_end = s->size(); |
291 | | // Strip off leading space |
292 | 0 | while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++; |
293 | |
|
294 | 0 | while (input_pos < input_end - 1) { |
295 | 0 | char c = (*s)[input_pos]; |
296 | 0 | char next = (*s)[input_pos + 1]; |
297 | | // Copy each non-whitespace character to the right position. |
298 | | // For a block of whitespace, print the last one. |
299 | 0 | if (!ascii_isspace(c) || !ascii_isspace(next)) { |
300 | 0 | if (output_pos != input_pos) { // only copy if needed |
301 | 0 | (*s)[output_pos] = c; |
302 | 0 | } |
303 | 0 | output_pos++; |
304 | 0 | } |
305 | 0 | input_pos++; |
306 | 0 | } |
307 | | // Pick up the last character if needed. |
308 | 0 | char c = (*s)[input_end - 1]; |
309 | 0 | if (!ascii_isspace(c)) (*s)[output_pos++] = c; |
310 | |
|
311 | 0 | s->resize(output_pos); |
312 | 0 | } |
313 | | |
314 | | //------------------------------------------------------------------------ |
315 | | // See comment in header file for a complete description. |
316 | | //------------------------------------------------------------------------ |
317 | 0 | void StripLeadingWhiteSpace(string* str) { |
318 | 0 | char const* const leading = StripLeadingWhiteSpace(const_cast<char*>(str->c_str())); |
319 | 0 | if (leading != nullptr) { |
320 | 0 | string const tmp(leading); |
321 | 0 | str->assign(tmp); |
322 | 0 | } else { |
323 | 0 | str->assign(""); |
324 | 0 | } |
325 | 0 | } |
326 | | |
327 | 0 | void StripTrailingWhitespace(string* const s) { |
328 | 0 | string::size_type i; |
329 | 0 | for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) { |
330 | 0 | } |
331 | |
|
332 | 0 | s->resize(i); |
333 | 0 | } |
334 | | |
335 | | // ---------------------------------------------------------------------- |
336 | | // TrimRunsInString |
337 | | // Removes leading and trailing runs, and collapses middle |
338 | | // runs of a set of characters into a single character (the |
339 | | // first one specified in 'remove'). Useful for collapsing |
340 | | // runs of repeated delimiters, whitespace, etc. E.g., |
341 | | // TrimRunsInString(&s, " :,()") removes leading and trailing |
342 | | // delimiter chars and collapses and converts internal runs |
343 | | // of delimiters to single ' ' characters, so, for example, |
344 | | // " a:(b):c " -> "a b c" |
345 | | // "first,last::(area)phone, ::zip" -> "first last area phone zip" |
346 | | // ---------------------------------------------------------------------- |
347 | 0 | void TrimRunsInString(string* s, StringPiece remove) { |
348 | 0 | string::iterator dest = s->begin(); |
349 | 0 | string::iterator src_end = s->end(); |
350 | 0 | for (string::iterator src = s->begin(); src != src_end;) { |
351 | 0 | if (remove.find(*src) == StringPiece::npos) { |
352 | 0 | *(dest++) = *(src++); |
353 | 0 | } else { |
354 | | // Skip to the end of this run of chars that are in 'remove'. |
355 | 0 | for (++src; src != src_end; ++src) { |
356 | 0 | if (remove.find(*src) == StringPiece::npos) { |
357 | 0 | if (dest != s->begin()) { |
358 | | // This is an internal run; collapse it. |
359 | 0 | *(dest++) = remove[0]; |
360 | 0 | } |
361 | 0 | *(dest++) = *(src++); |
362 | 0 | break; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | 0 | } |
367 | 0 | s->erase(dest, src_end); |
368 | 0 | } |
369 | | |
370 | | // ---------------------------------------------------------------------- |
371 | | // RemoveNullsInString |
372 | | // Removes any internal \0 characters from the string. |
373 | | // ---------------------------------------------------------------------- |
374 | 0 | void RemoveNullsInString(string* s) { |
375 | 0 | s->erase(remove(s->begin(), s->end(), '\0'), s->end()); |
376 | 0 | } |