Coverage Report

Created: 2026-03-20 05:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/format_ip.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <array>
25
#include <bit>
26
#include <cstdint>
27
#include <cstring>
28
#include <utility>
29
30
#include "core/types.h"
31
#include "exec/common/hex.h"
32
#include "exec/common/string_utils/string_utils.h"
33
34
constexpr size_t IPV4_BINARY_LENGTH = 4;
35
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
36
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
37
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
38
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
39
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
40
constexpr size_t IPV4_OCTET_BITS = 8;
41
constexpr size_t DECIMAL_BASE = 10;
42
constexpr size_t IPV6_BINARY_LENGTH = 16;
43
44
namespace doris {
45
#include "common/compile_check_begin.h"
46
47
extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;
48
49
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
50
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
51
  *
52
  * Any number of the tail bytes can be masked with given mask string.
53
  *
54
  * Assumptions:
55
  *     src is IPV4_BINARY_LENGTH long,
56
  *     dst is IPV4_MAX_TEXT_LENGTH long,
57
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
58
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
59
  *
60
  * Examples:
61
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
62
  *         > dst == "127.0.0.1"
63
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
64
  *         > dst == "127.0.0.xxx"
65
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
66
  *         > dst == "127.0.0.0"
67
  */
68
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
69
1.50M
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
70
1.50M
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
71
1.50M
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
72
1.50M
    const size_t padding = std::min(4 - src_size, limit);
73
1.50M
    for (size_t octet = 0; octet < padding; ++octet) {
74
0
        *dst++ = '0';
75
0
        *dst++ = '.';
76
0
    }
77
78
7.54M
    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
79
6.03M
        uint8_t value = 0;
80
        if constexpr (std::endian::native == std::endian::little)
81
6.03M
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
82
        else
83
            value = static_cast<uint8_t>(src[octet]);
84
6.03M
        const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
85
6.03M
        const char* str = one_byte_to_string_lookup_table[value].first;
86
87
6.03M
        memcpy(dst, str, len);
88
6.03M
        dst += len;
89
90
6.03M
        *dst++ = '.';
91
6.03M
    }
92
93
1.50M
    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
94
12
        memcpy(dst, mask_string, mask_length);
95
12
        dst += mask_length;
96
97
12
        *dst++ = '.';
98
12
    }
99
100
1.50M
    dst--;
101
1.50M
}
102
103
inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
104
1.50M
                        const char* mask_string = "xxx") {
105
1.50M
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
106
1.50M
}
107
108
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
109
 *
110
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
111
 * which should be long enough.
112
 * That is "127.0.0.1" becomes 0x7f000001.
113
 *
114
 * In case of failure doesn't modify buffer pointed by `dst`.
115
 *
116
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
117
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
118
 *           To parse strings use overloads below.
119
 *
120
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
121
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
122
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
123
 * @param first_octet - preparsed first octet
124
 * @return            - true if parsed successfully, false otherwise.
125
 */
126
template <typename T, typename EOFfunction>
127
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
128
182k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
182k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
182k
    UInt32 result = 0;
134
182k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
182k
    if (first_octet >= 0) {
136
388
        result |= first_octet << offset;
137
388
        offset -= IPV4_OCTET_BITS;
138
388
    }
139
140
710k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
710k
        if (eof()) {
142
65
            return false;
143
65
        }
144
145
710k
        UInt32 value = 0;
146
710k
        size_t len = 0;
147
2.07M
        while (is_numeric_ascii(*src) && len <= 3) {
148
1.53M
            value = value * DECIMAL_BASE + (*src - '0');
149
1.53M
            ++len;
150
1.53M
            ++src;
151
1.53M
            if (eof()) {
152
174k
                break;
153
174k
            }
154
1.53M
        }
155
710k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
6.29k
            return false;
157
6.29k
        }
158
704k
        result |= value << offset;
159
160
704k
        if (offset == 0) {
161
175k
            break;
162
175k
        }
163
704k
    }
164
165
175k
    memcpy(dst, &result, sizeof(result));
166
175k
    return true;
167
182k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
178k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
178k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
178k
    UInt32 result = 0;
134
178k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
178k
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
702k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
702k
        if (eof()) {
142
65
            return false;
143
65
        }
144
145
702k
        UInt32 value = 0;
146
702k
        size_t len = 0;
147
2.05M
        while (is_numeric_ascii(*src) && len <= 3) {
148
1.52M
            value = value * DECIMAL_BASE + (*src - '0');
149
1.52M
            ++len;
150
1.52M
            ++src;
151
1.52M
            if (eof()) {
152
174k
                break;
153
174k
            }
154
1.52M
        }
155
702k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
3.93k
            return false;
157
3.93k
        }
158
698k
        result |= value << offset;
159
160
698k
        if (offset == 0) {
161
174k
            break;
162
174k
        }
163
698k
    }
164
165
174k
    memcpy(dst, &result, sizeof(result));
166
174k
    return true;
167
178k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
3.45k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
3.45k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
3.45k
    UInt32 result = 0;
134
3.45k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
3.45k
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
6.78k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
6.78k
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
6.78k
        UInt32 value = 0;
146
6.78k
        size_t len = 0;
147
17.6k
        while (is_numeric_ascii(*src) && len <= 3) {
148
10.9k
            value = value * DECIMAL_BASE + (*src - '0');
149
10.9k
            ++len;
150
10.9k
            ++src;
151
10.9k
            if (eof()) {
152
0
                break;
153
0
            }
154
10.9k
        }
155
6.78k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
2.35k
            return false;
157
2.35k
        }
158
4.43k
        result |= value << offset;
159
160
4.43k
        if (offset == 0) {
161
1.10k
            break;
162
1.10k
        }
163
4.43k
    }
164
165
1.10k
    memcpy(dst, &result, sizeof(result));
166
1.10k
    return true;
167
3.45k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
320
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
320
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
320
    UInt32 result = 0;
134
320
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
320
    if (first_octet >= 0) {
136
320
        result |= first_octet << offset;
137
320
        offset -= IPV4_OCTET_BITS;
138
320
    }
139
140
959
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
959
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
959
        UInt32 value = 0;
146
959
        size_t len = 0;
147
2.43k
        while (is_numeric_ascii(*src) && len <= 3) {
148
1.79k
            value = value * DECIMAL_BASE + (*src - '0');
149
1.79k
            ++len;
150
1.79k
            ++src;
151
1.79k
            if (eof()) {
152
319
                break;
153
319
            }
154
1.79k
        }
155
959
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
2
            return false;
157
2
        }
158
957
        result |= value << offset;
159
160
957
        if (offset == 0) {
161
318
            break;
162
318
        }
163
957
    }
164
165
318
    memcpy(dst, &result, sizeof(result));
166
318
    return true;
167
320
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
68
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
68
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
68
    UInt32 result = 0;
134
68
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
68
    if (first_octet >= 0) {
136
68
        result |= first_octet << offset;
137
68
        offset -= IPV4_OCTET_BITS;
138
68
    }
139
140
204
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
204
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
204
        UInt32 value = 0;
146
204
        size_t len = 0;
147
642
        while (is_numeric_ascii(*src) && len <= 3) {
148
438
            value = value * DECIMAL_BASE + (*src - '0');
149
438
            ++len;
150
438
            ++src;
151
438
            if (eof()) {
152
0
                break;
153
0
            }
154
438
        }
155
204
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
0
            return false;
157
0
        }
158
204
        result |= value << offset;
159
160
204
        if (offset == 0) {
161
68
            break;
162
68
        }
163
204
    }
164
165
68
    memcpy(dst, &result, sizeof(result));
166
68
    return true;
167
68
}
168
169
/// returns pointer to the right after parsed sequence or null on failed parsing
170
178k
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
171
178k
    if (parse_ipv4(
172
2.75M
                src, [&src, end]() { return src == end; }, dst)) {
173
174k
        return src;
174
174k
    }
175
4.00k
    return nullptr;
176
178k
}
177
178
/// returns true if whole buffer was parsed successfully
179
178k
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
180
178k
    return parse_ipv4(src, end, dst) == end;
181
178k
}
182
183
/// returns pointer to the right after parsed sequence or null on failed parsing
184
3.45k
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
185
3.45k
    if (parse_ipv4(
186
3.45k
                src, []() { return false; }, dst)) {
187
1.10k
        return src;
188
1.10k
    }
189
2.35k
    return nullptr;
190
3.45k
}
191
192
/// returns true if whole null-terminated string was parsed successfully
193
3.45k
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
194
3.45k
    const char* end = parse_ipv4(src, dst);
195
3.45k
    return end != nullptr && *end == '\0';
196
3.45k
}
197
198
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
199
0
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
200
0
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
201
0
                         : value % base > 1 || carry;
202
0
}
203
204
/// Print integer in desired base, faster than sprintf.
205
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
206
/// But it doesn't matter here.
207
template <UInt32 base, typename T>
208
134k
inline void print_integer(char*& out, T value) {
209
134k
    if (value == 0) {
210
44
        *out++ = '0';
211
134k
    } else {
212
134k
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);
213
214
134k
        char buf[buffer_size];
215
134k
        auto ptr = buf;
216
217
560k
        while (value > 0) {
218
426k
            *ptr = hex_digit_lowercase(value % base);
219
426k
            ++ptr;
220
426k
            value /= base;
221
426k
        }
222
223
        /// Copy to out reversed.
224
560k
        while (ptr != buf) {
225
426k
            --ptr;
226
426k
            *out = *ptr;
227
426k
            ++out;
228
426k
        }
229
134k
    }
230
134k
}
231
232
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
233
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
234
  * bounds checking, unnecessary string copying and length calculation.
235
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
236
  * @param dst         - where to put format result bytes
237
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
238
  */
239
1.49M
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
240
1.49M
    struct {
241
1.49M
        Int64 base, len;
242
1.49M
    } best {-1, 0}, cur {-1, 0};
243
1.49M
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};
244
245
    // the current function logic is processed in big endian manner
246
    // but ipv6 in doris is stored in little-endian byte order
247
    // so transfer to big-endian byte order first
248
    // compatible with parse_ipv6 function in format_ip.h
249
1.49M
    std::reverse(src, src + IPV6_BINARY_LENGTH);
250
251
    /** Preprocess:
252
        *    Copy the input (bytewise) array into a wordwise array.
253
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
254
13.4M
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
255
11.9M
        words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
256
11.9M
    }
257
258
13.4M
    for (size_t i = 0; i < words.size(); i++) {
259
11.9M
        if (words[i] == 0) {
260
11.8M
            if (cur.base == -1) {
261
1.49M
                cur.base = i;
262
1.49M
                cur.len = 1;
263
10.3M
            } else {
264
10.3M
                cur.len++;
265
10.3M
            }
266
11.8M
        } else {
267
134k
            if (cur.base != -1) {
268
35.4k
                if (best.base == -1 || cur.len > best.len) {
269
35.4k
                    best = cur;
270
35.4k
                }
271
35.4k
                cur.base = -1;
272
35.4k
            }
273
134k
        }
274
11.9M
    }
275
276
1.49M
    if (cur.base != -1) {
277
1.45M
        if (best.base == -1 || cur.len > best.len) {
278
1.45M
            best = cur;
279
1.45M
        }
280
1.45M
    }
281
1.49M
    if (best.base != -1 && best.len < 2) {
282
32
        best.base = -1;
283
32
    }
284
285
    /// Format the result.
286
13.4M
    for (size_t i = 0; i < words.size(); i++) {
287
        /// Are we inside the best run of 0x00's?
288
11.9M
        if (best.base != -1) {
289
11.9M
            auto best_base = static_cast<size_t>(best.base);
290
11.9M
            if (i >= best_base && i < (best_base + best.len)) {
291
11.8M
                if (i == best_base) {
292
1.49M
                    *dst++ = ':';
293
1.49M
                }
294
11.8M
                continue;
295
11.8M
            }
296
11.9M
        }
297
        /// Are we following an initial run of 0x00s or any real hex?
298
134k
        if (i != 0) {
299
97.0k
            *dst++ = ':';
300
97.0k
        }
301
        /// Is this address an encapsulated IPv4?
302
134k
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
303
34
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
304
34
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
305
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
306
34
            if constexpr (std::endian::native == std::endian::little) {
307
34
                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
308
34
            }
309
34
            format_ipv4(ipv4_buffer, dst,
310
34
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
311
34
                        "0");
312
            // format_ipv4 has already added a null-terminator for us.
313
34
            return;
314
34
        }
315
134k
        print_integer<16>(dst, words[i]);
316
134k
    }
317
318
    /// Was it a trailing run of 0x00's?
319
1.49M
    if (best.base != -1 &&
320
1.49M
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
321
1.45M
        *dst++ = ':';
322
1.45M
    }
323
1.49M
}
324
325
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
326
*
327
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
328
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
329
*
330
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
331
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
332
*           To parse strings use overloads below.
333
*
334
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
335
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
336
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
337
* @param first_block - preparsed first block
338
* @return            - true if parsed successfully, false otherwise.
339
*/
340
template <typename T, typename EOFfunction>
341
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
342
338k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
338k
    const auto clear_dst = [dst]() {
344
8.06k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
8.06k
        return false;
346
8.06k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
343
823
    const auto clear_dst = [dst]() {
344
823
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
823
        return false;
346
823
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
343
7.23k
    const auto clear_dst = [dst]() {
344
7.23k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
7.23k
        return false;
346
7.23k
    };
347
348
338k
    if (src == nullptr || eof()) return clear_dst();
349
350
338k
    int groups = 0;            /// number of parsed groups
351
338k
    unsigned char* iter = dst; /// iterator over dst buffer
352
338k
    unsigned char* zptr =
353
338k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
338k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
338k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
338k
    bool group_start = true;
368
369
2.82M
    while (!eof() && groups < 8) {
370
2.49M
        if (*src == ':') {
371
2.12M
            ++src;
372
2.12M
            if (eof()) /// trailing colon is not allowed
373
32
                return clear_dst();
374
375
2.12M
            group_start = true;
376
377
2.12M
            if (*src == ':') {
378
61.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
157
                    return clear_dst();
380
60.9k
                zptr = iter;
381
60.9k
                ++src;
382
60.9k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
5
                    return clear_dst();
385
5
                }
386
60.9k
                continue;
387
60.9k
            }
388
2.06M
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
2.06M
        }
391
392
        /// mixed IPv4 parsing
393
2.43M
        if (*src == '.') {
394
984
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
596
                return clear_dst();
396
397
388
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
388
            ++src;
401
388
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
388
            --groups;
405
388
            iter -= 2;
406
407
388
            UInt16 num = 0;
408
1.16k
            for (int i = 0; i < 2; ++i) {
409
776
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
776
                unsigned char second = iter[i] & 0x0fu;
411
776
                if (first > 9 || second > 9) return clear_dst();
412
776
                (num *= 100) += first * 10 + second;
413
776
            }
414
388
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
388
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
386
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
386
            iter += 4;
423
386
            groups += 2;
424
386
            break; /// IPv4 block is the last - end of parsing
425
388
        }
426
427
2.43M
        if (!group_start) /// end of parsing
428
1.62k
            break;
429
2.42M
        group_start = false;
430
431
2.42M
        UInt16 val = 0;  /// current decoded group
432
2.42M
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
9.05M
            UInt8 num = unhex(*src);
436
9.05M
            if (num == 0xFF) break;
437
8.69M
            (val <<= 4) |= num;
438
8.69M
        }
439
440
2.42M
        if (xdigits == 0) /// end of parsing
441
3.04k
            break;
442
443
2.42M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
2.42M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
2.42M
        ++groups;
446
2.42M
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
338k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
330k
    if (zptr != nullptr) {
453
60.7k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
2
            return clear_dst();
457
2
        }
458
60.7k
        size_t msize = iter - zptr;
459
60.7k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
60.7k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
60.7k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
330k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
330k
    return true;
469
330k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
342
337k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
337k
    const auto clear_dst = [dst]() {
344
337k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
337k
        return false;
346
337k
    };
347
348
337k
    if (src == nullptr || eof()) return clear_dst();
349
350
337k
    int groups = 0;            /// number of parsed groups
351
337k
    unsigned char* iter = dst; /// iterator over dst buffer
352
337k
    unsigned char* zptr =
353
337k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
337k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
337k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
337k
    bool group_start = true;
368
369
2.82M
    while (!eof() && groups < 8) {
370
2.48M
        if (*src == ':') {
371
2.11M
            ++src;
372
2.11M
            if (eof()) /// trailing colon is not allowed
373
32
                return clear_dst();
374
375
2.11M
            group_start = true;
376
377
2.11M
            if (*src == ':') {
378
60.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
157
                    return clear_dst();
380
59.9k
                zptr = iter;
381
59.9k
                ++src;
382
59.9k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
5
                    return clear_dst();
385
5
                }
386
59.9k
                continue;
387
59.9k
            }
388
2.05M
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
2.05M
        }
391
392
        /// mixed IPv4 parsing
393
2.42M
        if (*src == '.') {
394
916
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
596
                return clear_dst();
396
397
320
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
320
            ++src;
401
320
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
320
            --groups;
405
320
            iter -= 2;
406
407
320
            UInt16 num = 0;
408
960
            for (int i = 0; i < 2; ++i) {
409
640
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
640
                unsigned char second = iter[i] & 0x0fu;
411
640
                if (first > 9 || second > 9) return clear_dst();
412
640
                (num *= 100) += first * 10 + second;
413
640
            }
414
320
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
320
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
318
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
318
            iter += 4;
423
318
            groups += 2;
424
318
            break; /// IPv4 block is the last - end of parsing
425
320
        }
426
427
2.42M
        if (!group_start) /// end of parsing
428
821
            break;
429
2.42M
        group_start = false;
430
431
2.42M
        UInt16 val = 0;  /// current decoded group
432
2.42M
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
9.04M
            UInt8 num = unhex(*src);
436
9.04M
            if (num == 0xFF) break;
437
8.68M
            (val <<= 4) |= num;
438
8.68M
        }
439
440
2.42M
        if (xdigits == 0) /// end of parsing
441
2.13k
            break;
442
443
2.42M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
2.42M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
2.42M
        ++groups;
446
2.42M
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
336k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
329k
    if (zptr != nullptr) {
453
59.7k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
2
            return clear_dst();
457
2
        }
458
59.7k
        size_t msize = iter - zptr;
459
59.7k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
59.7k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
59.7k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
329k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
329k
    return true;
469
329k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
342
1.88k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
1.88k
    const auto clear_dst = [dst]() {
344
1.88k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
1.88k
        return false;
346
1.88k
    };
347
348
1.88k
    if (src == nullptr || eof()) return clear_dst();
349
350
1.88k
    int groups = 0;            /// number of parsed groups
351
1.88k
    unsigned char* iter = dst; /// iterator over dst buffer
352
1.88k
    unsigned char* zptr =
353
1.88k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
1.88k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
1.88k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
1.88k
    bool group_start = true;
368
369
6.16k
    while (!eof() && groups < 8) {
370
6.05k
        if (*src == ':') {
371
2.44k
            ++src;
372
2.44k
            if (eof()) /// trailing colon is not allowed
373
0
                return clear_dst();
374
375
2.44k
            group_start = true;
376
377
2.44k
            if (*src == ':') {
378
957
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
0
                    return clear_dst();
380
957
                zptr = iter;
381
957
                ++src;
382
957
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
0
                    return clear_dst();
385
0
                }
386
957
                continue;
387
957
            }
388
1.49k
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
1.49k
        }
391
392
        /// mixed IPv4 parsing
393
5.09k
        if (*src == '.') {
394
68
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
0
                return clear_dst();
396
397
68
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
68
            ++src;
401
68
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
68
            --groups;
405
68
            iter -= 2;
406
407
68
            UInt16 num = 0;
408
204
            for (int i = 0; i < 2; ++i) {
409
136
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
136
                unsigned char second = iter[i] & 0x0fu;
411
136
                if (first > 9 || second > 9) return clear_dst();
412
136
                (num *= 100) += first * 10 + second;
413
136
            }
414
68
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
68
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
68
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
68
            iter += 4;
423
68
            groups += 2;
424
68
            break; /// IPv4 block is the last - end of parsing
425
68
        }
426
427
5.02k
        if (!group_start) /// end of parsing
428
799
            break;
429
4.22k
        group_start = false;
430
431
4.22k
        UInt16 val = 0;  /// current decoded group
432
4.22k
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
14.9k
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
13.0k
            UInt8 num = unhex(*src);
436
13.0k
            if (num == 0xFF) break;
437
10.7k
            (val <<= 4) |= num;
438
10.7k
        }
439
440
4.22k
        if (xdigits == 0) /// end of parsing
441
913
            break;
442
443
3.31k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
3.31k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
3.31k
        ++groups;
446
3.31k
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
1.88k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
1.06k
    if (zptr != nullptr) {
453
957
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
0
            return clear_dst();
457
0
        }
458
957
        size_t msize = iter - zptr;
459
957
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
957
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
957
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
1.06k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
1.06k
    return true;
469
1.06k
}
470
471
/// returns pointer to the right after parsed sequence or null on failed parsing
472
337k
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
473
337k
    if (parse_ipv6(
474
16.4M
                src, [&src, end]() { return src == end; }, dst))
475
329k
        return src;
476
7.23k
    return nullptr;
477
337k
}
478
479
/// returns true if whole buffer was parsed successfully
480
337k
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
481
337k
    return parse_ipv6(src, end, dst) == end;
482
337k
}
483
484
/// returns pointer to the right after parsed sequence or null on failed parsing
485
1.88k
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
486
1.88k
    if (parse_ipv6(
487
1.88k
                src, []() { return false; }, dst))
488
1.06k
        return src;
489
823
    return nullptr;
490
1.88k
}
491
492
/// returns true if whole null-terminated string was parsed successfully
493
1.88k
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
494
1.88k
    const char* end = parse_ipv6(src, dst);
495
1.88k
    return end != nullptr && *end == '\0';
496
1.88k
}
497
498
#include "common/compile_check_end.h"
499
} // namespace doris