Coverage Report

Created: 2024-11-20 19:28

/root/doris/be/src/gutil/strings/escaping.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2006 Google Inc. All Rights Reserved.
2
// Authors: Numerous. Principal maintainers are csilvers and zunger.
3
//
4
// This is a grab-bag file for string utilities involved in escaping and
5
// unescaping strings in various ways. Who knew there were so many?
6
//
7
// NOTE: Although the functions declared here have been imported into
8
// the global namespace, the using statements are slated for removal.
9
// Do not refer to these symbols without properly namespace-qualifying
10
// them with "strings::". Of course you may also use "using" statements
11
// within a .cc file.
12
//
13
// There are more escaping functions in:
14
//   webutil/html/tagutils.h (Escaping strings for HTML, PRE, JavaScript, etc.)
15
//   webutil/url/url.h (Escaping for URL's, both RFC-2396 and other methods)
16
//   template/template_modifiers.h (All sorts of stuff)
17
//   util/regex/re2/re2.h (Escaping for literals within regular expressions
18
//                         - see RE2::QuoteMeta).
19
// And probably many more places, as well.
20
21
#pragma once
22
23
#include <stddef.h>
24
25
#include <string>
26
using std::string;
27
#include <vector>
28
using std::vector;
29
30
#include "common/logging.h"
31
32
#include "gutil/strings/ascii_ctype.h"
33
#include "gutil/strings/charset.h"
34
#include "gutil/strings/stringpiece.h"
35
36
namespace strings {
37
38
// ----------------------------------------------------------------------
39
// EscapeStrForCSV()
40
//    Escapes the quotes in 'src' by doubling them. This is necessary
41
//    for generating CSV files (see SplitCSVLine).
42
//    Returns the number of characters written into dest (not counting
43
//    the \0) or -1 if there was insufficient space.
44
//
45
//    Example: [some "string" to test] --> [some ""string"" to test]
46
// ----------------------------------------------------------------------
47
int EscapeStrForCSV(const char* src, char* dest, int dest_len);
48
49
// ----------------------------------------------------------------------
50
// UnescapeCEscapeSequences()
51
//    Copies "source" to "dest", rewriting C-style escape sequences
52
//    -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
53
//    equivalents.  "dest" must be sufficiently large to hold all
54
//    the characters in the rewritten string (i.e. at least as large
55
//    as strlen(source) + 1 should be safe, since the replacements
56
//    are always shorter than the original escaped sequences).  It's
57
//    safe for source and dest to be the same.  RETURNS the length
58
//    of dest.
59
//
60
//    It allows hex sequences \xhh, or generally \xhhhhh with an
61
//    arbitrary number of hex digits, but all of them together must
62
//    specify a value of a single byte (e.g. \x0045 is equivalent
63
//    to \x45, and \x1234 is erroneous). If the value is too large,
64
//    it is truncated to 8 bits and an error is set. This is also
65
//    true of octal values that exceed 0xff.
66
//
67
//    It also allows escape sequences of the form \uhhhh (exactly four
68
//    hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
69
//    hex digits, upper or lower case) to specify a Unicode code
70
//    point. The dest array will contain the UTF8-encoded version of
71
//    that code-point (e.g., if source contains \u2019, then dest will
72
//    contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
73
//    transformation, use UniLib::UTF8EscapeString
74
//    (util/utf8/public/unilib.h), not CEscapeString.
75
//
76
//    Errors: In the first form of the call, errors are reported with
77
//    LOG(ERROR). The same is true for the second form of the call if
78
//    the pointer to the string vector is NULL; otherwise, error
79
//    messages are stored in the vector. In either case, the effect on
80
//    the dest array is not defined, but rest of the source will be
81
//    processed.
82
//
83
//    *** DEPRECATED: Use CUnescape() in new code ***
84
//    ----------------------------------------------------------------------
85
int UnescapeCEscapeSequences(const char* source, char* dest);
86
int UnescapeCEscapeSequences(const char* source, char* dest, vector<string>* errors);
87
88
// ----------------------------------------------------------------------
89
// UnescapeCEscapeString()
90
//    This does the same thing as UnescapeCEscapeSequences, but creates
91
//    a new string. The caller does not need to worry about allocating
92
//    a dest buffer. This should be used for non performance critical
93
//    tasks such as printing debug messages. It is safe for src and dest
94
//    to be the same.
95
//
96
//    The second call stores its errors in a supplied string vector.
97
//    If the string vector pointer is NULL, it reports the errors with LOG().
98
//
99
//    In the first and second calls, the length of dest is returned. In the
100
//    the third call, the new string is returned.
101
//
102
//    *** DEPRECATED: Use CUnescape() in new code ***
103
// ----------------------------------------------------------------------
104
int UnescapeCEscapeString(const string& src, string* dest);
105
int UnescapeCEscapeString(const string& src, string* dest, vector<string>* errors);
106
string UnescapeCEscapeString(const string& src);
107
108
// ----------------------------------------------------------------------
109
// CUnescape()
110
//    Copies "source" to "dest", rewriting C-style escape sequences
111
//    -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
112
//    equivalents.  "dest" must be sufficiently large to hold all
113
//    the characters in the rewritten string (i.e. at least as large
114
//    as source.size() should be safe, since the replacements
115
//    are never longer than the original escaped sequences).  It's
116
//    safe for source and dest to be the same.  RETURNS true if
117
//    conversion was successful, false otherwise. Stores the size of
118
//    the result in 'dest_len'.
119
//
120
//    It allows hex sequences \xhh, or generally \xhhhhh with an
121
//    arbitrary number of hex digits, but all of them together must
122
//    specify a value of a single byte (e.g. \x0045 is equivalent
123
//    to \x45, and \x1234 is erroneous). If the value is too large,
124
//    an error is set. This is also true of octal values that exceed 0xff.
125
//
126
//    It also allows escape sequences of the form \uhhhh (exactly four
127
//    hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
128
//    hex digits, upper or lower case) to specify a Unicode code
129
//    point. The dest array will contain the UTF8-encoded version of
130
//    that code-point (e.g., if source contains \u2019, then dest will
131
//    contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
132
//    transformation, use UniLib::UTF8EscapeString
133
//    (util/utf8/public/unilib.h), not CEscapeString.
134
//
135
//    Errors: Sets the description of the first encountered error in
136
//    'error'. To disable error reporting, set 'error' to NULL.
137
// ----------------------------------------------------------------------
138
bool CUnescape(const StringPiece& source, char* dest, int* dest_len, string* error);
139
140
bool CUnescape(const StringPiece& source, string* dest, string* error);
141
142
// A version with no error reporting.
143
0
inline bool CUnescape(const StringPiece& source, string* dest) {
144
0
    return CUnescape(source, dest, NULL);
145
0
}
146
147
// ----------------------------------------------------------------------
148
// CUnescapeForNullTerminatedString()
149
//
150
// This has the same behavior as CUnescape, except that each octal, hex,
151
// or Unicode escape sequence that resolves to a null character ('\0')
152
// is left in its original escaped form.  The result is a
153
// display-formatted string that can be interpreted as a null-terminated
154
// const char* and will not be cut short if it contains embedded null
155
// characters.
156
//
157
// ----------------------------------------------------------------------
158
159
bool CUnescapeForNullTerminatedString(const StringPiece& source, char* dest, int* dest_len,
160
                                      string* error);
161
162
bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest, string* error);
163
164
// A version with no error reporting.
165
0
inline bool CUnescapeForNullTerminatedString(const StringPiece& source, string* dest) {
166
0
    return CUnescapeForNullTerminatedString(source, dest, NULL);
167
0
}
168
169
// ----------------------------------------------------------------------
170
// CEscapeString()
171
// CHexEscapeString()
172
// Utf8SafeCEscapeString()
173
// Utf8SafeCHexEscapeString()
174
//    Copies 'src' to 'dest', escaping dangerous characters using
175
//    C-style escape sequences. This is very useful for preparing query
176
//    flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
177
//    hexadecimal rather than octal sequences. The 'Utf8Safe' version
178
//    doesn't touch UTF-8 bytes.
179
//    Returns the number of bytes written to 'dest' (not including the \0)
180
//    or -1 if there was insufficient space.
181
//
182
//    Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped.
183
// ----------------------------------------------------------------------
184
int CEscapeString(const char* src, int src_len, char* dest, int dest_len);
185
int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len);
186
int Utf8SafeCEscapeString(const char* src, int src_len, char* dest, int dest_len);
187
int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest, int dest_len);
188
189
// ----------------------------------------------------------------------
190
// CEscape()
191
// CHexEscape()
192
// Utf8SafeCEscape()
193
// Utf8SafeCHexEscape()
194
//    More convenient form of CEscapeString: returns result as a "string".
195
//    This version is slower than CEscapeString() because it does more
196
//    allocation.  However, it is much more convenient to use in
197
//    non-speed-critical code like logging messages etc.
198
// ----------------------------------------------------------------------
199
string CEscape(const StringPiece& src);
200
string CHexEscape(const StringPiece& src);
201
string Utf8SafeCEscape(const StringPiece& src);
202
string Utf8SafeCHexEscape(const StringPiece& src);
203
204
// ----------------------------------------------------------------------
205
// BackslashEscape()
206
//    Given a string and a list of characters to escape, replace any
207
//    instance of one of those characters with \ + that character. For
208
//    example, when exporting maps to /varz, label values need to have
209
//    all dots escaped. Appends the result to dest.
210
// BackslashUnescape()
211
//    Replace \ + any of the indicated "unescape me" characters with just
212
//    that character. Appends the result to dest.
213
//
214
//    IMPORTANT:
215
//    This function does not escape \ by default, so if you do not include
216
//    it in the chars to escape you will most certainly get an undesirable
217
//    result. That is, it won't be a reversible operation:
218
//      string src = "foo\\:bar";
219
//      BackslashUnescape(BackslashEscape(src, ":"), ":") == "foo\\\\:bar"
220
//    On the other hand, for all strings "src", the following is true:
221
//      BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src
222
// ----------------------------------------------------------------------
223
void BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape, string* dest);
224
void BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape, string* dest);
225
226
0
inline string BackslashEscape(const StringPiece& src, const strings::CharSet& to_escape) {
227
0
    string s;
228
0
    BackslashEscape(src, to_escape, &s);
229
0
    return s;
230
0
}
231
232
0
inline string BackslashUnescape(const StringPiece& src, const strings::CharSet& to_unescape) {
233
0
    string s;
234
0
    BackslashUnescape(src, to_unescape, &s);
235
0
    return s;
236
0
}
237
238
// ----------------------------------------------------------------------
239
// QuotedPrintableUnescape()
240
//    Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
241
//    more details, only briefly implemented. But from the web...
242
//    Quoted-printable is an encoding method defined in the MIME
243
//    standard. It is used primarily to encode 8-bit text (such as text
244
//    that includes foreign characters) into 7-bit US ASCII, creating a
245
//    document that is mostly readable by humans, even in its encoded
246
//    form. All MIME compliant applications can decode quoted-printable
247
//    text, though they may not necessarily be able to properly display the
248
//    document as it was originally intended. As quoted-printable encoding
249
//    is implemented most commonly, printable ASCII characters (values 33
250
//    through 126, excluding 61), tabs and spaces that do not appear at the
251
//    end of lines, and end-of-line characters are not encoded. Other
252
//    characters are represented by an equal sign (=) immediately followed
253
//    by that character's hexadecimal value. Lines that are longer than 76
254
//    characters are shortened by line breaks, with the equal sign marking
255
//    where the breaks occurred.
256
//
257
//    Note that QuotedPrintableUnescape is different from 'Q'-encoding as
258
//    defined in rfc2047. In particular, This does not treat '_'s as spaces.
259
//
260
//    See QEncodingUnescape().
261
//
262
//    Copies "src" to "dest", rewriting quoted printable escape sequences
263
//    =XX to their ASCII equivalents. src is not null terminated, instead
264
//    specify len. I recommend that slen<szdest, but we honor szdest
265
//    anyway.
266
//    RETURNS the length of dest.
267
// ----------------------------------------------------------------------
268
int QuotedPrintableUnescape(const char* src, int slen, char* dest, int szdest);
269
270
// ----------------------------------------------------------------------
271
// QEncodingUnescape()
272
//    This is very similar to QuotedPrintableUnescape except that we convert
273
//    '_'s into spaces. (See RFC 2047)
274
//    http://www.faqs.org/rfcs/rfc2047.html.
275
//
276
//    Copies "src" to "dest", rewriting q-encoding escape sequences
277
//    =XX to their ASCII equivalents. src is not null terminated, instead
278
//    specify len. I recommend that slen<szdest, but we honour szdest
279
//    anyway.
280
//    RETURNS the length of dest.
281
// ----------------------------------------------------------------------
282
int QEncodingUnescape(const char* src, int slen, char* dest, int szdest);
283
284
// ----------------------------------------------------------------------
285
// Base64Unescape()
286
// WebSafeBase64Unescape()
287
//    Copies "src" to "dest", where src is in base64 and is written to its
288
//    ASCII equivalents. src is not null terminated, instead specify len.
289
//    I recommend that slen<szdest, but we honor szdest anyway.
290
//    RETURNS the length of dest, or -1 if src contains invalid chars.
291
//    The WebSafe variation use '-' instead of '+' and '_' instead of '/'.
292
//    The variations that store into a string clear the string first, and
293
//    return false (with dest empty) if src contains invalid chars; for
294
//    these versions src and dest must be different strings.
295
// ----------------------------------------------------------------------
296
int Base64Unescape(const char* src, int slen, char* dest, int szdest);
297
bool Base64Unescape(const char* src, int slen, string* dest);
298
0
inline bool Base64Unescape(const string& src, string* dest) {
299
0
    return Base64Unescape(src.data(), src.size(), dest);
300
0
}
301
302
int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest);
303
bool WebSafeBase64Unescape(const char* src, int slen, string* dest);
304
0
inline bool WebSafeBase64Unescape(const string& src, string* dest) {
305
0
    return WebSafeBase64Unescape(src.data(), src.size(), dest);
306
0
}
307
308
// Return the length to use for the output buffer given to the base64 escape
309
// routines. Make sure to use the same value for do_padding in both.
310
// This function may return incorrect results if given input_len values that
311
// are extremely high, which should happen rarely.
312
int CalculateBase64EscapedLen(int input_len, bool do_padding);
313
// Use this version when calling Base64Escape without a do_padding arg.
314
int CalculateBase64EscapedLen(int input_len);
315
316
// ----------------------------------------------------------------------
317
// Base64Escape()
318
// WebSafeBase64Escape()
319
//    Encode "src" to "dest" using base64 encoding.
320
//    src is not null terminated, instead specify len.
321
//    'dest' should have at least CalculateBase64EscapedLen() length.
322
//    RETURNS the length of dest.
323
//    The WebSafe variation use '-' instead of '+' and '_' instead of '/'
324
//    so that we can place the out in the URL or cookies without having
325
//    to escape them.  It also has an extra parameter "do_padding",
326
//    which when set to false will prevent padding with "=".
327
// ----------------------------------------------------------------------
328
int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest);
329
int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest, int szdest,
330
                        bool do_padding);
331
// Encode src into dest with padding.
332
void Base64Escape(const string& src, string* dest);
333
// Encode src into dest web-safely without padding.
334
void WebSafeBase64Escape(const string& src, string* dest);
335
// Encode src into dest web-safely with padding.
336
void WebSafeBase64EscapeWithPadding(const string& src, string* dest);
337
338
void Base64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding);
339
void WebSafeBase64Escape(const unsigned char* src, int szsrc, string* dest, bool do_padding);
340
341
// ----------------------------------------------------------------------
342
// Base32Unescape()
343
//    Copies "src" to "dest", where src is in base32 and is written to its
344
//    ASCII equivalents. src is not null terminated, instead specify len.
345
//    RETURNS the length of dest, or -1 if src contains invalid chars.
346
// ----------------------------------------------------------------------
347
int Base32Unescape(const char* src, int slen, char* dest, int szdest);
348
bool Base32Unescape(const char* src, int slen, string* dest);
349
0
inline bool Base32Unescape(const string& src, string* dest) {
350
0
    return Base32Unescape(src.data(), src.size(), dest);
351
0
}
352
353
// ----------------------------------------------------------------------
354
// Base32Escape()
355
//    Encode "src" to "dest" using base32 encoding.
356
//    src is not null terminated, instead specify len.
357
//    'dest' should have at least CalculateBase32EscapedLen() length.
358
//    RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
359
//    too small to fit the fully encoded result.  'dest' is padded with '='.
360
//
361
//    Note that this is "Base 32 Encoding" from RFC 4648 section 6.
362
// ----------------------------------------------------------------------
363
int Base32Escape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest);
364
bool Base32Escape(const string& src, string* dest);
365
366
// ----------------------------------------------------------------------
367
// Base32HexEscape()
368
//    Encode "src" to "dest" using base32hex encoding.
369
//    src is not null terminated, instead specify len.
370
//    'dest' should have at least CalculateBase32EscapedLen() length.
371
//    RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
372
//    too small to fit the fully encoded result.  'dest' is padded with '='.
373
//
374
//    Note that this is "Base 32 Encoding with Extended Hex Alphabet"
375
//    from RFC 4648 section 7.
376
// ----------------------------------------------------------------------
377
int Base32HexEscape(const unsigned char* src, size_t szsrc, char* dest, size_t szdest);
378
bool Base32HexEscape(const string& src, string* dest);
379
380
// Return the length to use for the output buffer given to the base32 escape
381
// routines.  This function may return incorrect results if given input_len
382
// values that are extremely high, which should happen rarely.
383
int CalculateBase32EscapedLen(size_t input_len);
384
385
// ----------------------------------------------------------------------
386
// EightBase32DigitsToTenHexDigits()
387
// TenHexDigitsToEightBase32Digits()
388
//    Convert base32 to and from hex.
389
//
390
//   for EightBase32DigitsToTenHexDigits():
391
//     *in must point to 8 base32 digits.
392
//     *out must point to 10 bytes.
393
//
394
//   for TenHexDigitsToEightBase32Digits():
395
//     *in must point to 10 hex digits.
396
//     *out must point to 8 bytes.
397
//
398
//   Note that the Base64 functions above are different. They convert base64
399
//   to and from binary data. We convert to and from string representations
400
//   of hex. They deal with arbitrary lengths and we deal with single,
401
//   whole base32 quanta.
402
//
403
//   See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt
404
//   for details on base32.
405
// ----------------------------------------------------------------------
406
void EightBase32DigitsToTenHexDigits(const char* in, char* out);
407
void TenHexDigitsToEightBase32Digits(const char* in, char* out);
408
409
// ----------------------------------------------------------------------
410
// EightBase32DigitsToFiveBytes()
411
// FiveBytesToEightBase32Digits()
412
//   Convert base32 to and from binary
413
//
414
//   for EightBase32DigitsToTenHexDigits():
415
//     *in must point to 8 base32 digits.
416
//     *out must point to 5 bytes.
417
//
418
//   for TenHexDigitsToEightBase32Digits():
419
//     *in must point to 5 bytes.
420
//     *out must point to 8 bytes.
421
//
422
//   Note that the Base64 functions above are different.  They deal with
423
//   arbitrary lengths and we deal with single, whole base32 quanta.
424
// ----------------------------------------------------------------------
425
void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out);
426
void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out);
427
428
// ----------------------------------------------------------------------
429
// EscapeFileName()
430
// UnescapeFileName()
431
//   Utility functions to (un)escape strings to make them suitable for use in
432
//   filenames. Characters not in [a-zA-Z0-9-_.] will be escaped into %XX.
433
//   E.g: "Hello, world!" will be escaped as "Hello%2c%20world%21"
434
//
435
//   NB that this function escapes slashes, so the output will be a flat
436
//   filename and will not keep the directory structure. Slashes are replaced
437
//   with '~', instead of a %XX sequence to make it easier for people to
438
//   understand the escaped form when the original string is a file path.
439
//
440
//   WARNING: filenames produced by these functions may not be compatible with
441
//   Colossus FS. In particular, the '%' character has a special meaning in
442
//   CFS.
443
//
444
//   The versions that receive a string for the output will append to it.
445
// ----------------------------------------------------------------------
446
void EscapeFileName(const StringPiece& src, string* dst);
447
void UnescapeFileName(const StringPiece& src, string* dst);
448
0
inline string EscapeFileName(const StringPiece& src) {
449
0
    string r;
450
0
    EscapeFileName(src, &r);
451
0
    return r;
452
0
}
453
0
inline string UnescapeFileName(const StringPiece& src) {
454
0
    string r;
455
0
    UnescapeFileName(src, &r);
456
0
    return r;
457
0
}
458
459
// ----------------------------------------------------------------------
460
// Here are a couple utility methods to change ints to hex chars & back
461
// ----------------------------------------------------------------------
462
463
0
inline int int_to_hex_digit(int i) {
464
0
    DCHECK((i >= 0) && (i <= 15));
465
0
    return ((i < 10) ? (i + '0') : ((i - 10) + 'A'));
466
0
}
467
468
0
inline int int_to_lower_hex_digit(int i) {
469
0
    DCHECK((i >= 0) && (i <= 15));
470
0
    return (i < 10) ? (i + '0') : ((i - 10) + 'a');
471
0
}
472
473
0
inline int hex_digit_to_int(char c) {
474
    /* Assume ASCII. */
475
0
    DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
476
0
    DCHECK(ascii_isxdigit(c));
477
0
    int x = static_cast<unsigned char>(c);
478
0
    if (x > '9') {
479
0
        x += 9;
480
0
    }
481
0
    return x & 0xf;
482
0
}
483
484
// ----------------------------------------------------------------------
485
// a2b_hex()
486
//  Description: Ascii-to-Binary hex conversion.  This converts
487
//         2*'num' hexadecimal characters to 'num' binary data.
488
//        Return value: 'num' bytes of binary data (via the 'to' argument)
489
// ----------------------------------------------------------------------
490
void a2b_hex(const char* from, unsigned char* to, int num);
491
void a2b_hex(const char* from, char* to, int num);
492
void a2b_hex(const char* from, string* to, int num);
493
string a2b_hex(const string& a);
494
495
// ----------------------------------------------------------------------
496
// a2b_bin()
497
//  Description: Ascii-to-Binary binary conversion.  This converts
498
//        a.size() binary characters (ascii '0' or '1') to
499
//        ceil(a.size()/8) bytes of binary data.  The first character is
500
//        considered the most significant if byte_order_msb is set.  a is
501
//        considered to be padded with trailing 0s if its size is not a
502
//        multiple of 8.
503
//        Return value: ceil(a.size()/8) bytes of binary data
504
// ----------------------------------------------------------------------
505
string a2b_bin(const string& a, bool byte_order_msb);
506
507
// ----------------------------------------------------------------------
508
// b2a_hex()
509
//  Description: Binary-to-Ascii hex conversion.  This converts
510
//   'num' bytes of binary to a 2*'num'-character hexadecimal representation
511
//    Return value: 2*'num' characters of ascii text (via the 'to' argument)
512
// ----------------------------------------------------------------------
513
void b2a_hex(const unsigned char* from, char* to, int num);
514
void b2a_hex(const unsigned char* from, string* to, int num);
515
516
// ----------------------------------------------------------------------
517
// b2a_hex()
518
//  Description: Binary-to-Ascii hex conversion.  This converts
519
//   'num' bytes of binary to a 2*'num'-character hexadecimal representation
520
//    Return value: 2*'num' characters of ascii string
521
// ----------------------------------------------------------------------
522
string b2a_hex(const char* from, int num);
523
string b2a_hex(const StringPiece& b);
524
525
// ----------------------------------------------------------------------
526
// b2a_bin()
527
//  Description: Binary-to-Ascii binary conversion.  This converts
528
//   b.size() bytes of binary to a 8*b.size() character representation
529
//   (ascii '0' or '1').  The highest order bit in each byte is returned
530
//   first in the string if byte_order_msb is set.
531
//   Return value: 8*b.size() characters of ascii text
532
// ----------------------------------------------------------------------
533
string b2a_bin(const string& b, bool byte_order_msb);
534
535
// ----------------------------------------------------------------------
536
// ShellEscape
537
//   Make a shell command argument from a string.
538
//   Returns a Bourne shell string literal such that, once the shell finishes
539
//   expanding the argument, the argument passed on to the program being
540
//   run will be the same as whatever you passed in.
541
//   NOTE: This is "ported" from python2.2's commands.mkarg(); it should be
542
//         safe for Bourne shell syntax (i.e. sh, bash), but mileage may vary
543
//         with other shells.
544
// ----------------------------------------------------------------------
545
string ShellEscape(StringPiece src);
546
547
// Runs ShellEscape() on the arguments, concatenates them with a space, and
548
// returns the resulting string.
549
template <class InputIterator>
550
string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) {
551
    string result;
552
    for (; begin != end; ++begin) {
553
        if (!result.empty()) result.append(" ");
554
        result.append(ShellEscape(*begin));
555
    }
556
    return result;
557
}
558
559
// Reads at most bytes_to_read from binary_string and writes it to
560
// ascii_string in lower case hex.
561
void ByteStringToAscii(const string& binary_string, int bytes_to_read, string* ascii_string);
562
563
0
inline string ByteStringToAscii(const string& binary_string, int bytes_to_read) {
564
0
    string result;
565
0
    ByteStringToAscii(binary_string, bytes_to_read, &result);
566
0
    return result;
567
0
}
568
569
// Converts the hex from ascii_string into binary data and
570
// writes the binary data into binary_string.
571
// Empty input successfully converts to empty output.
572
// Returns false and may modify output if it is
573
// unable to parse the hex string.
574
bool ByteStringFromAscii(const string& ascii_string, string* binary_string);
575
576
// Clean up a multi-line string to conform to Unix line endings.
577
// Reads from src and appends to dst, so usually dst should be empty.
578
// If there is no line ending at the end of a non-empty string, it can
579
// be added automatically.
580
//
581
// Four different types of input are correctly handled:
582
//
583
//   - Unix/Linux files: line ending is LF, pass through unchanged
584
//
585
//   - DOS/Windows files: line ending is CRLF: convert to LF
586
//
587
//   - Legacy Mac files: line ending is CR: convert to LF
588
//
589
//   - Garbled files: random line endings, covert gracefully
590
//                    lonely CR, lonely LF, CRLF: convert to LF
591
//
592
//   @param src The multi-line string to convert
593
//   @param dst The converted string is appended to this string
594
//   @param auto_end_last_line Automatically terminate the last line
595
//
596
//   Limitations:
597
//
598
//     This does not do the right thing for CRCRLF files created by
599
//     broken programs that do another Unix->DOS conversion on files
600
//     that are already in CRLF format.
601
void CleanStringLineEndings(const string& src, string* dst, bool auto_end_last_line);
602
603
// Same as above, but transforms the argument in place.
604
void CleanStringLineEndings(string* str, bool auto_end_last_line);
605
606
} // namespace strings
607
608
// The following functions used to be defined in strutil.h in the top-level
609
// namespace, so we alias them here. Do not add new functions here.
610
//
611
//             Talk to him if you want to help.
612
//
613
// DEPRECATED(mec): Using these names in the global namespace is deprecated.
614
// Use the strings:: names.
615
616
using strings::EscapeStrForCSV;
617
using strings::UnescapeCEscapeSequences;
618
using strings::UnescapeCEscapeString;
619
using strings::CEscapeString;
620
using strings::CHexEscapeString;
621
using strings::CEscape;
622
using strings::CHexEscape;
623
using strings::BackslashEscape;
624
using strings::BackslashUnescape;
625
using strings::QuotedPrintableUnescape;
626
using strings::QEncodingUnescape;
627
using strings::Base64Unescape;
628
using strings::WebSafeBase64Unescape;
629
using strings::CalculateBase64EscapedLen;
630
using strings::Base64Escape;
631
using strings::WebSafeBase64Escape;
632
using strings::WebSafeBase64EscapeWithPadding;
633
using strings::Base32Escape;
634
using strings::Base32HexEscape;
635
using strings::CalculateBase32EscapedLen;
636
using strings::EightBase32DigitsToTenHexDigits;
637
using strings::TenHexDigitsToEightBase32Digits;
638
using strings::EightBase32DigitsToFiveBytes;
639
using strings::FiveBytesToEightBase32Digits;
640
using strings::int_to_hex_digit;
641
using strings::int_to_lower_hex_digit;
642
using strings::hex_digit_to_int;
643
using strings::a2b_hex;
644
using strings::a2b_bin;
645
using strings::b2a_hex;
646
using strings::b2a_bin;
647
using strings::ShellEscape;
648
using strings::ShellEscapeCommandLine;
649
using strings::ByteStringFromAscii;
650
using strings::ByteStringToAscii;
651
using strings::CleanStringLineEndings;