Coverage Report

Created: 2026-03-16 19:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/string_utils/string_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Commom/StringUtils/StringUtils.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <cstddef>
24
#include <cstring>
25
#include <string>
26
27
#include "common/compiler_util.h"
28
29
/// More efficient than libc, because doesn't respect locale. But for some functions table implementation could be better.
30
31
254
inline bool is_ascii(char c) {
32
254
    return static_cast<unsigned char>(c) < 0x80;
33
254
}
34
35
34.2k
inline bool is_alpha_ascii(char c) {
36
34.2k
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
37
34.2k
}
38
39
6.61M
inline bool is_numeric_ascii(char c) {
40
    /// This is faster than
41
    /// return UInt8(UInt8(c) - UInt8('0')) < UInt8(10);
42
    /// on Intel CPUs when compiled by gcc 8.
43
6.61M
    return (c >= '0' && c <= '9');
44
6.61M
}
45
46
33.7k
inline bool is_alpha_numeric_ascii(char c) {
47
33.7k
    return is_alpha_ascii(c) || is_numeric_ascii(c);
48
33.7k
}
49
50
250
inline bool is_word_char_ascii(char c) {
51
250
    return is_alpha_numeric_ascii(c) || c == '_';
52
250
}
53
54
250
inline bool is_valid_identifier_begin(char c) {
55
250
    return is_alpha_ascii(c) || c == '_';
56
250
}
57
58
33.0k
inline bool is_non_alnum(char c) {
59
33.0k
    return !is_alpha_numeric_ascii(c);
60
33.0k
}
61
62
0
inline bool is_tz_name_part_ascii(char c) {
63
0
    return is_alpha_ascii(c) || c == '_';
64
0
}
65
66
0
inline bool is_slash_ascii(char c) {
67
0
    return c == '/';
68
0
}
69
70
// Our own definition of "isspace" that optimize on the ' ' branch.
71
2.67M
inline bool is_whitespace_ascii(char c) {
72
2.67M
    return LIKELY(c == ' ') ||
73
2.67M
           UNLIKELY(c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r');
74
2.67M
}
75
76
306
inline bool is_not_whitespace_ascii(char c) {
77
306
    return !is_whitespace_ascii(c);
78
306
}
79
80
424
inline bool is_hex_ascii(char c) {
81
424
    return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || is_numeric_ascii(c);
82
424
}