Coverage Report

Created: 2024-11-21 23:45

/root/doris/be/src/gutil/strings/strip.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
// based on contributions of various authors in strings/strutil_unittest.cc
3
//
4
// This file contains functions that remove a defined part from the string,
5
// i.e., strip the string.
6
7
#include "gutil/strings/strip.h"
8
9
// IWYU pragma: no_include <pstl/glue_algorithm_defs.h>
10
11
#include <assert.h>
12
#include <string.h>
13
#include <algorithm>
14
#include <iterator>
15
#include <mutex>
16
17
using std::copy;
18
using std::max;
19
using std::min;
20
using std::reverse;
21
using std::sort;
22
using std::swap;
23
#include <string>
24
25
using std::string;
26
27
#include "gutil/strings/ascii_ctype.h"
28
#include "gutil/strings/stringpiece.h"
29
30
368
string StripPrefixString(StringPiece str, const StringPiece& prefix) {
31
368
    if (str.starts_with(prefix)) str.remove_prefix(prefix.length());
32
368
    return str.as_string();
33
368
}
34
35
0
bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result) {
36
0
    const bool has_prefix = str.starts_with(prefix);
37
0
    if (has_prefix) str.remove_prefix(prefix.length());
38
0
    str.as_string().swap(*result);
39
0
    return has_prefix;
40
0
}
41
42
368
string StripSuffixString(StringPiece str, const StringPiece& suffix) {
43
368
    if (str.ends_with(suffix)) str.remove_suffix(suffix.length());
44
368
    return str.as_string();
45
368
}
46
47
0
bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result) {
48
0
    const bool has_suffix = str.ends_with(suffix);
49
0
    if (has_suffix) str.remove_suffix(suffix.length());
50
0
    str.as_string().swap(*result);
51
0
    return has_suffix;
52
0
}
53
54
// ----------------------------------------------------------------------
55
// StripString
56
//    Replaces any occurrence of the character 'remove' (or the characters
57
//    in 'remove') with the character 'replacewith'.
58
// ----------------------------------------------------------------------
59
0
void StripString(char* str, StringPiece remove, char replacewith) {
60
0
    for (; *str != '\0'; ++str) {
61
0
        if (remove.find(*str) != StringPiece::npos) {
62
0
            *str = replacewith;
63
0
        }
64
0
    }
65
0
}
66
67
0
void StripString(char* str, int len, StringPiece remove, char replacewith) {
68
0
    char* end = str + len;
69
0
    for (; str < end; ++str) {
70
0
        if (remove.find(*str) != StringPiece::npos) {
71
0
            *str = replacewith;
72
0
        }
73
0
    }
74
0
}
75
76
0
void StripString(string* s, StringPiece remove, char replacewith) {
77
0
    for (char& c : *s) {
78
0
        if (remove.find(c) != StringPiece::npos) {
79
0
            c = replacewith;
80
0
        }
81
0
    }
82
0
}
83
84
// ----------------------------------------------------------------------
85
// StripWhiteSpace
86
// ----------------------------------------------------------------------
87
2.69k
void StripWhiteSpace(const char** str, int* len) {
88
    // strip off trailing whitespace
89
2.69k
    while ((*len) > 0 && ascii_isspace((*str)[(*len) - 1])) {
90
1
        (*len)--;
91
1
    }
92
93
    // strip off leading whitespace
94
2.71k
    while ((*len) > 0 && ascii_isspace((*str)[0])) {
95
14
        (*len)--;
96
14
        (*str)++;
97
14
    }
98
2.69k
}
99
100
0
bool StripTrailingNewline(string* s) {
101
0
    if (!s->empty() && (*s)[s->size() - 1] == '\n') {
102
0
        if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
103
0
            s->resize(s->size() - 2);
104
0
        else
105
0
            s->resize(s->size() - 1);
106
0
        return true;
107
0
    }
108
0
    return false;
109
0
}
110
111
99
void StripWhiteSpace(string* str) {
112
99
    int str_length = str->length();
113
114
    // Strip off leading whitespace.
115
99
    int first = 0;
116
121
    while (first < str_length && ascii_isspace(str->at(first))) {
117
22
        ++first;
118
22
    }
119
    // If entire string is white space.
120
99
    if (first == str_length) {
121
24
        str->clear();
122
24
        return;
123
24
    }
124
75
    if (first > 0) {
125
22
        str->erase(0, first);
126
22
        str_length -= first;
127
22
    }
128
129
    // Strip off trailing whitespace.
130
75
    int last = str_length - 1;
131
76
    while (last >= 0 && ascii_isspace(str->at(last))) {
132
1
        --last;
133
1
    }
134
75
    if (last != (str_length - 1) && last >= 0) {
135
1
        str->erase(last + 1, string::npos);
136
1
    }
137
75
}
138
139
// ----------------------------------------------------------------------
140
// Misc. stripping routines
141
// ----------------------------------------------------------------------
142
0
void StripCurlyBraces(string* s) {
143
0
    return StripBrackets('{', '}', s);
144
0
}
145
146
0
void StripBrackets(char left, char right, string* s) {
147
0
    string::iterator opencurly = find(s->begin(), s->end(), left);
148
0
    while (opencurly != s->end()) {
149
0
        string::iterator closecurly = find(opencurly, s->end(), right);
150
0
        if (closecurly == s->end()) return;
151
0
        opencurly = s->erase(opencurly, closecurly + 1);
152
0
        opencurly = find(opencurly, s->end(), left);
153
0
    }
154
0
}
155
156
0
void StripMarkupTags(string* s) {
157
0
    string::iterator openbracket = find(s->begin(), s->end(), '<');
158
0
    while (openbracket != s->end()) {
159
0
        string::iterator closebracket = find(openbracket, s->end(), '>');
160
0
        if (closebracket == s->end()) {
161
0
            s->erase(openbracket, closebracket);
162
0
            return;
163
0
        }
164
165
0
        openbracket = s->erase(openbracket, closebracket + 1);
166
0
        openbracket = find(openbracket, s->end(), '<');
167
0
    }
168
0
}
169
170
0
string OutputWithMarkupTagsStripped(const string& s) {
171
0
    string result(s);
172
0
    StripMarkupTags(&result);
173
0
    return result;
174
0
}
175
176
0
int TrimStringLeft(string* s, const StringPiece& remove) {
177
0
    int i = 0;
178
0
    while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
179
0
        ++i;
180
0
    }
181
0
    if (i > 0) s->erase(0, i);
182
0
    return i;
183
0
}
184
185
0
int TrimStringRight(string* s, const StringPiece& remove) {
186
0
    int i = s->size(), trimmed = 0;
187
0
    while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
188
0
        --i;
189
0
    }
190
0
    if (i < s->size()) {
191
0
        trimmed = s->size() - i;
192
0
        s->erase(i);
193
0
    }
194
0
    return trimmed;
195
0
}
196
197
// ----------------------------------------------------------------------
198
// Various removal routines
199
// ----------------------------------------------------------------------
200
0
int strrm(char* str, char c) {
201
0
    char *src, *dest;
202
0
    for (src = dest = str; *src != '\0'; ++src)
203
0
        if (*src != c) *(dest++) = *src;
204
0
    *dest = '\0';
205
0
    return dest - str;
206
0
}
207
208
0
int memrm(char* str, int strlen, char c) {
209
0
    char *src, *dest;
210
0
    for (src = dest = str; strlen-- > 0; ++src)
211
0
        if (*src != c) *(dest++) = *src;
212
0
    return dest - str;
213
0
}
214
215
0
int strrmm(char* str, const char* chars) {
216
0
    char *src, *dest;
217
0
    for (src = dest = str; *src != '\0'; ++src) {
218
0
        bool skip = false;
219
0
        for (const char* c = chars; *c != '\0'; c++) {
220
0
            if (*src == *c) {
221
0
                skip = true;
222
0
                break;
223
0
            }
224
0
        }
225
0
        if (!skip) *(dest++) = *src;
226
0
    }
227
0
    *dest = '\0';
228
0
    return dest - str;
229
0
}
230
231
0
int strrmm(string* str, const string& chars) {
232
0
    size_t str_len = str->length();
233
0
    size_t in_index = str->find_first_of(chars);
234
0
    if (in_index == string::npos) return str_len;
235
236
0
    size_t out_index = in_index++;
237
238
0
    while (in_index < str_len) {
239
0
        char c = (*str)[in_index++];
240
0
        if (chars.find(c) == string::npos) (*str)[out_index++] = c;
241
0
    }
242
243
0
    str->resize(out_index);
244
0
    return out_index;
245
0
}
246
247
// ----------------------------------------------------------------------
248
// StripDupCharacters
249
//    Replaces any repeated occurrence of the character 'repeat_char'
250
//    with single occurrence.  e.g.,
251
//       StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
252
//    Return the number of characters removed
253
// ----------------------------------------------------------------------
254
0
int StripDupCharacters(string* s, char dup_char, int start_pos) {
255
0
    if (start_pos < 0) start_pos = 0;
256
257
    // remove dups by compaction in-place
258
0
    int input_pos = start_pos;  // current reader position
259
0
    int output_pos = start_pos; // current writer position
260
0
    const int input_end = s->size();
261
0
    while (input_pos < input_end) {
262
        // keep current character
263
0
        const char curr_char = (*s)[input_pos];
264
0
        if (output_pos != input_pos) // must copy
265
0
            (*s)[output_pos] = curr_char;
266
0
        ++input_pos;
267
0
        ++output_pos;
268
269
0
        if (curr_char == dup_char) { // skip subsequent dups
270
0
            while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) ++input_pos;
271
0
        }
272
0
    }
273
0
    const int num_deleted = input_pos - output_pos;
274
0
    s->resize(s->size() - num_deleted);
275
0
    return num_deleted;
276
0
}
277
278
// ----------------------------------------------------------------------
279
// RemoveExtraWhitespace()
280
//   Remove leading, trailing, and duplicate internal whitespace.
281
// ----------------------------------------------------------------------
282
0
void RemoveExtraWhitespace(string* s) {
283
0
    assert(s != nullptr);
284
    // Empty strings clearly have no whitespace, and this code assumes that
285
    // string length is greater than 0
286
0
    if (s->empty()) return;
287
288
0
    int input_pos = 0;  // current reader position
289
0
    int output_pos = 0; // current writer position
290
0
    const int input_end = s->size();
291
    // Strip off leading space
292
0
    while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++;
293
294
0
    while (input_pos < input_end - 1) {
295
0
        char c = (*s)[input_pos];
296
0
        char next = (*s)[input_pos + 1];
297
        // Copy each non-whitespace character to the right position.
298
        // For a block of whitespace, print the last one.
299
0
        if (!ascii_isspace(c) || !ascii_isspace(next)) {
300
0
            if (output_pos != input_pos) { // only copy if needed
301
0
                (*s)[output_pos] = c;
302
0
            }
303
0
            output_pos++;
304
0
        }
305
0
        input_pos++;
306
0
    }
307
    // Pick up the last character if needed.
308
0
    char c = (*s)[input_end - 1];
309
0
    if (!ascii_isspace(c)) (*s)[output_pos++] = c;
310
311
0
    s->resize(output_pos);
312
0
}
313
314
//------------------------------------------------------------------------
315
// See comment in header file for a complete description.
316
//------------------------------------------------------------------------
317
0
void StripLeadingWhiteSpace(string* str) {
318
0
    char const* const leading = StripLeadingWhiteSpace(const_cast<char*>(str->c_str()));
319
0
    if (leading != nullptr) {
320
0
        string const tmp(leading);
321
0
        str->assign(tmp);
322
0
    } else {
323
0
        str->assign("");
324
0
    }
325
0
}
326
327
0
void StripTrailingWhitespace(string* const s) {
328
0
    string::size_type i;
329
0
    for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) {
330
0
    }
331
332
0
    s->resize(i);
333
0
}
334
335
// ----------------------------------------------------------------------
336
// TrimRunsInString
337
//    Removes leading and trailing runs, and collapses middle
338
//    runs of a set of characters into a single character (the
339
//    first one specified in 'remove').  Useful for collapsing
340
//    runs of repeated delimiters, whitespace, etc.  E.g.,
341
//    TrimRunsInString(&s, " :,()") removes leading and trailing
342
//    delimiter chars and collapses and converts internal runs
343
//    of delimiters to single ' ' characters, so, for example,
344
//    "  a:(b):c  " -> "a b c"
345
//    "first,last::(area)phone, ::zip" -> "first last area phone zip"
346
// ----------------------------------------------------------------------
347
0
void TrimRunsInString(string* s, StringPiece remove) {
348
0
    string::iterator dest = s->begin();
349
0
    string::iterator src_end = s->end();
350
0
    for (string::iterator src = s->begin(); src != src_end;) {
351
0
        if (remove.find(*src) == StringPiece::npos) {
352
0
            *(dest++) = *(src++);
353
0
        } else {
354
            // Skip to the end of this run of chars that are in 'remove'.
355
0
            for (++src; src != src_end; ++src) {
356
0
                if (remove.find(*src) == StringPiece::npos) {
357
0
                    if (dest != s->begin()) {
358
                        // This is an internal run; collapse it.
359
0
                        *(dest++) = remove[0];
360
0
                    }
361
0
                    *(dest++) = *(src++);
362
0
                    break;
363
0
                }
364
0
            }
365
0
        }
366
0
    }
367
0
    s->erase(dest, src_end);
368
0
}
369
370
// ----------------------------------------------------------------------
371
// RemoveNullsInString
372
//    Removes any internal \0 characters from the string.
373
// ----------------------------------------------------------------------
374
0
void RemoveNullsInString(string* s) {
375
0
    s->erase(remove(s->begin(), s->end(), '\0'), s->end());
376
0
}