Coverage Report

Created: 2024-11-18 11:49

/root/doris/be/src/gutil/strings/strip.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2011 Google Inc. All Rights Reserved.
2
// Refactored from contributions of various authors in strings/strutil.h
3
//
4
// This file contains functions that remove a defined part from the string,
5
// i.e., strip the string.
6
7
#pragma once
8
9
#include <stddef.h>
10
11
#include <string>
12
using std::string;
13
14
#include "gutil/strings/ascii_ctype.h"
15
#include "gutil/strings/stringpiece.h"
16
17
// Given a string and a putative prefix, returns the string minus the
18
// prefix string if the prefix matches, otherwise the original
19
// string.
20
string StripPrefixString(StringPiece str, const StringPiece& prefix);
21
22
// Like StripPrefixString, but return true if the prefix was
23
// successfully matched.  Write the output to *result.
24
// It is safe for result to point back to the input string.
25
bool TryStripPrefixString(StringPiece str, const StringPiece& prefix, string* result);
26
27
// Given a string and a putative suffix, returns the string minus the
28
// suffix string if the suffix matches, otherwise the original
29
// string.
30
string StripSuffixString(StringPiece str, const StringPiece& suffix);
31
32
// Like StripSuffixString, but return true if the suffix was
33
// successfully matched.  Write the output to *result.
34
// It is safe for result to point back to the input string.
35
bool TryStripSuffixString(StringPiece str, const StringPiece& suffix, string* result);
36
37
// ----------------------------------------------------------------------
38
// StripString
39
//    Replaces any occurrence of the character 'remove' (or the characters
40
//    in 'remove') with the character 'replacewith'.
41
//    Good for keeping html characters or protocol characters (\t) out
42
//    of places where they might cause a problem.
43
// ----------------------------------------------------------------------
44
0
inline void StripString(char* str, char remove, char replacewith) {
45
0
    for (; *str; str++) {
46
0
        if (*str == remove) *str = replacewith;
47
0
    }
48
0
}
49
50
void StripString(char* str, StringPiece remove, char replacewith);
51
void StripString(char* str, int len, StringPiece remove, char replacewith);
52
void StripString(string* s, StringPiece remove, char replacewith);
53
54
// ----------------------------------------------------------------------
55
// StripDupCharacters
56
//    Replaces any repeated occurrence of the character 'dup_char'
57
//    with single occurrence.  e.g.,
58
//       StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
59
//    Return the number of characters removed
60
// ----------------------------------------------------------------------
61
int StripDupCharacters(string* s, char dup_char, int start_pos);
62
63
// ----------------------------------------------------------------------
64
// StripWhiteSpace
65
//    "Removes" whitespace from both sides of string.  Pass in a pointer to an
66
//    array of characters, and its length.  The function changes the pointer
67
//    and length to refer to a substring that does not contain leading or
68
//    trailing spaces; it does not modify the string itself.  If the caller is
69
//    using NUL-terminated strings, it is the caller's responsibility to insert
70
//    the NUL character at the end of the substring."
71
//
72
//    Note: to be completely type safe, this function should be
73
//    parameterized as a template: template<typename anyChar> void
74
//    StripWhiteSpace(anyChar** str, int* len), where the expectation
75
//    is that anyChar could be char, const char, w_char, const w_char,
76
//    unicode_char, or any other character type we want.  However, we
77
//    just provided a version for char and const char.  C++ is
78
//    inconvenient, but correct, here.  Ask Amit is you want to know
79
//    the type safety details.
80
// ----------------------------------------------------------------------
81
void StripWhiteSpace(const char** str, int* len);
82
83
//------------------------------------------------------------------------
84
// StripTrailingWhitespace()
85
//   Removes whitespace at the end of the string *s.
86
//------------------------------------------------------------------------
87
void StripTrailingWhitespace(string* s);
88
89
//------------------------------------------------------------------------
90
// StripTrailingNewline(string*)
91
//   Strips the very last trailing newline or CR+newline from its
92
//   input, if one exists.  Useful for dealing with MapReduce's text
93
//   input mode, which appends '\n' to each map input.  Returns true
94
//   if a newline was stripped.
95
//------------------------------------------------------------------------
96
bool StripTrailingNewline(string* s);
97
98
0
inline void StripWhiteSpace(char** str, int* len) {
99
0
    // The "real" type for StripWhiteSpace is ForAll char types C, take
100
0
    // (C, int) as input and return (C, int) as output.  We're using the
101
0
    // cast here to assert that we can take a char*, even though the
102
0
    // function thinks it's assigning to const char*.
103
0
    StripWhiteSpace(const_cast<const char**>(str), len);
104
0
}
105
106
1.96k
inline void StripWhiteSpace(StringPiece* str) {
107
1.96k
    const char* data = str->data();
108
1.96k
    int len = str->size();
109
1.96k
    StripWhiteSpace(&data, &len);
110
1.96k
    str->set(data, len);
111
1.96k
}
112
113
void StripWhiteSpace(string* str);
114
115
namespace strings {
116
117
template <typename Collection>
118
void StripWhiteSpaceInCollection(Collection* collection) {
119
    for (typename Collection::iterator it = collection->begin(); it != collection->end(); ++it)
120
        StripWhiteSpace(&(*it));
121
}
122
123
} // namespace strings
124
125
// ----------------------------------------------------------------------
126
// StripLeadingWhiteSpace
127
//    "Removes" whitespace from beginning of string. Returns ptr to first
128
//    non-whitespace character if one is present, NULL otherwise. Assumes
129
//    "line" is null-terminated.
130
// ----------------------------------------------------------------------
131
132
0
inline const char* StripLeadingWhiteSpace(const char* line) {
133
    // skip leading whitespace
134
0
    while (ascii_isspace(*line)) ++line;
135
136
0
    if ('\0' == *line) // end of line, no non-whitespace
137
0
        return NULL;
138
139
0
    return line;
140
0
}
141
142
// StripLeadingWhiteSpace for non-const strings.
143
0
inline char* StripLeadingWhiteSpace(char* line) {
144
0
    return const_cast<char*>(StripLeadingWhiteSpace(const_cast<const char*>(line)));
145
0
}
146
147
void StripLeadingWhiteSpace(string* str);
148
149
// Remove leading, trailing, and duplicate internal whitespace.
150
void RemoveExtraWhitespace(string* s);
151
152
// ----------------------------------------------------------------------
153
// SkipLeadingWhiteSpace
154
//    Returns str advanced past white space characters, if any.
155
//    Never returns NULL.  "str" must be terminated by a null character.
156
// ----------------------------------------------------------------------
157
0
inline const char* SkipLeadingWhiteSpace(const char* str) {
158
0
    while (ascii_isspace(*str)) ++str;
159
0
    return str;
160
0
}
161
162
0
inline char* SkipLeadingWhiteSpace(char* str) {
163
0
    while (ascii_isspace(*str)) ++str;
164
0
    return str;
165
0
}
166
167
// ----------------------------------------------------------------------
168
// StripCurlyBraces
169
//    Strips everything enclosed in pairs of curly braces and the curly
170
//    braces. Doesn't touch open braces. It doesn't handle nested curly
171
//    braces. This is used for removing things like {:stopword} from
172
//    queries.
173
// StripBrackets does the same, but allows the caller to specify different
174
//    left and right bracket characters, such as '(' and ')'.
175
// ----------------------------------------------------------------------
176
177
void StripCurlyBraces(string* s);
178
void StripBrackets(char left, char right, string* s);
179
180
// ----------------------------------------------------------------------
181
// StripMarkupTags
182
//    Strips everything enclosed in pairs of angle brackets and the angle
183
//    brackets.
184
//    This is used for stripping strings of markup; e.g. going from
185
//    "the quick <b>brown</b> fox" to "the quick brown fox."
186
//    If you want to skip entire sections of markup (e.g. the word "brown"
187
//    too in that example), see webutil/pageutil/pageutil.h .
188
//    This function was designed for stripping the bold tags (inserted by the
189
//    docservers) from the titles of news stories being returned by RSS.
190
//    This implementation DOES NOT cover all cases in html documents
191
//    like tags that contain quoted angle-brackets, or HTML comment.
192
//    For example <IMG SRC = "foo.gif" ALT = "A > B">
193
//    or <!-- <A comment> -->
194
//    See "perldoc -q html"
195
// ----------------------------------------------------------------------
196
197
void StripMarkupTags(string* s);
198
string OutputWithMarkupTagsStripped(const string& s);
199
200
// ----------------------------------------------------------------------
201
// TrimStringLeft
202
//    Removes any occurrences of the characters in 'remove' from the start
203
//    of the string.  Returns the number of chars trimmed.
204
// ----------------------------------------------------------------------
205
int TrimStringLeft(string* s, const StringPiece& remove);
206
207
// ----------------------------------------------------------------------
208
// TrimStringRight
209
//    Removes any occurrences of the characters in 'remove' from the end
210
//    of the string.  Returns the number of chars trimmed.
211
// ----------------------------------------------------------------------
212
int TrimStringRight(string* s, const StringPiece& remove);
213
214
// ----------------------------------------------------------------------
215
// TrimString
216
//    Removes any occurrences of the characters in 'remove' from either
217
//    end of the string.
218
// ----------------------------------------------------------------------
219
0
inline int TrimString(string* s, const StringPiece& remove) {
220
0
    return TrimStringRight(s, remove) + TrimStringLeft(s, remove);
221
0
}
222
223
// ----------------------------------------------------------------------
224
// TrimRunsInString
225
//    Removes leading and trailing runs, and collapses middle
226
//    runs of a set of characters into a single character (the
227
//    first one specified in 'remove').  Useful for collapsing
228
//    runs of repeated delimiters, whitespace, etc.  E.g.,
229
//    TrimRunsInString(&s, " :,()") removes leading and trailing
230
//    delimiter chars and collapses and converts internal runs
231
//    of delimiters to single ' ' characters, so, for example,
232
//    "  a:(b):c  " -> "a b c"
233
//    "first,last::(area)phone, ::zip" -> "first last area phone zip"
234
// ----------------------------------------------------------------------
235
void TrimRunsInString(string* s, StringPiece remove);
236
237
// ----------------------------------------------------------------------
238
// RemoveNullsInString
239
//    Removes any internal \0 characters from the string.
240
// ----------------------------------------------------------------------
241
void RemoveNullsInString(string* s);
242
243
// ----------------------------------------------------------------------
244
// strrm()
245
// memrm()
246
//    Remove all occurrences of a given character from a string.
247
//    Returns the new length.
248
// ----------------------------------------------------------------------
249
250
int strrm(char* str, char c);
251
int memrm(char* str, int strlen, char c);
252
253
// ----------------------------------------------------------------------
254
// strrmm()
255
//    Remove all occurrences of a given set of characters from a string.
256
//    Returns the new length.
257
// ----------------------------------------------------------------------
258
int strrmm(char* str, const char* chars);
259
int strrmm(string* str, const string& chars);