Coverage Report

Created: 2026-04-10 12:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/format_ip.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <array>
25
#include <bit>
26
#include <cstdint>
27
#include <cstring>
28
#include <utility>
29
30
#include "core/types.h"
31
#include "exec/common/hex.h"
32
#include "exec/common/string_utils/string_utils.h"
33
34
constexpr size_t IPV4_BINARY_LENGTH = 4;
35
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
36
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
37
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
38
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
39
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
40
constexpr size_t IPV4_OCTET_BITS = 8;
41
constexpr size_t DECIMAL_BASE = 10;
42
constexpr size_t IPV6_BINARY_LENGTH = 16;
43
44
namespace doris {
45
46
extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;
47
48
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
49
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
50
  *
51
  * Any number of the tail bytes can be masked with given mask string.
52
  *
53
  * Assumptions:
54
  *     src is IPV4_BINARY_LENGTH long,
55
  *     dst is IPV4_MAX_TEXT_LENGTH long,
56
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
57
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
58
  *
59
  * Examples:
60
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
61
  *         > dst == "127.0.0.1"
62
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
63
  *         > dst == "127.0.0.xxx"
64
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
65
  *         > dst == "127.0.0.0"
66
  */
67
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
68
1.51M
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
69
1.51M
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
70
1.51M
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
71
1.51M
    const size_t padding = std::min(4 - src_size, limit);
72
1.51M
    for (size_t octet = 0; octet < padding; ++octet) {
73
6
        *dst++ = '0';
74
6
        *dst++ = '.';
75
6
    }
76
77
7.55M
    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
78
6.04M
        uint8_t value = 0;
79
        if constexpr (std::endian::native == std::endian::little)
80
6.04M
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
81
        else
82
            value = static_cast<uint8_t>(src[octet]);
83
6.04M
        const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
84
6.04M
        const char* str = one_byte_to_string_lookup_table[value].first;
85
86
6.04M
        memcpy(dst, str, len);
87
6.04M
        dst += len;
88
89
6.04M
        *dst++ = '.';
90
6.04M
    }
91
92
1.51M
    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
93
12
        memcpy(dst, mask_string, mask_length);
94
12
        dst += mask_length;
95
96
12
        *dst++ = '.';
97
12
    }
98
99
1.51M
    dst--;
100
1.51M
}
101
102
inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
103
1.51M
                        const char* mask_string = "xxx") {
104
1.51M
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
105
1.51M
}
106
107
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
108
 *
109
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
110
 * which should be long enough.
111
 * That is "127.0.0.1" becomes 0x7f000001.
112
 *
113
 * In case of failure doesn't modify buffer pointed by `dst`.
114
 *
115
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
116
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
117
 *           To parse strings use overloads below.
118
 *
119
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
120
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
121
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
122
 * @param first_octet - preparsed first octet
123
 * @return            - true if parsed successfully, false otherwise.
124
 */
125
template <typename T, typename EOFfunction>
126
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
127
183k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
183k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
183k
    UInt32 result = 0;
133
183k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
183k
    if (first_octet >= 0) {
135
461
        result |= first_octet << offset;
136
461
        offset -= IPV4_OCTET_BITS;
137
461
    }
138
139
713k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
713k
        if (eof()) {
141
66
            return false;
142
66
        }
143
144
713k
        UInt32 value = 0;
145
713k
        size_t len = 0;
146
2.08M
        while (is_numeric_ascii(*src) && len <= 3) {
147
1.54M
            value = value * DECIMAL_BASE + (*src - '0');
148
1.54M
            ++len;
149
1.54M
            ++src;
150
1.54M
            if (eof()) {
151
175k
                break;
152
175k
            }
153
1.54M
        }
154
713k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
6.31k
            return false;
156
6.31k
        }
157
707k
        result |= value << offset;
158
159
707k
        if (offset == 0) {
160
176k
            break;
161
176k
        }
162
707k
    }
163
164
176k
    memcpy(dst, &result, sizeof(result));
165
176k
    return true;
166
183k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
179k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
179k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
179k
    UInt32 result = 0;
133
179k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
179k
    if (first_octet >= 0) {
135
0
        result |= first_octet << offset;
136
0
        offset -= IPV4_OCTET_BITS;
137
0
    }
138
139
705k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
705k
        if (eof()) {
141
66
            return false;
142
66
        }
143
144
705k
        UInt32 value = 0;
145
705k
        size_t len = 0;
146
2.06M
        while (is_numeric_ascii(*src) && len <= 3) {
147
1.53M
            value = value * DECIMAL_BASE + (*src - '0');
148
1.53M
            ++len;
149
1.53M
            ++src;
150
1.53M
            if (eof()) {
151
175k
                break;
152
175k
            }
153
1.53M
        }
154
705k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
3.97k
            return false;
156
3.97k
        }
157
701k
        result |= value << offset;
158
159
701k
        if (offset == 0) {
160
175k
            break;
161
175k
        }
162
701k
    }
163
164
175k
    memcpy(dst, &result, sizeof(result));
165
175k
    return true;
166
179k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
3.45k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
3.45k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
3.45k
    UInt32 result = 0;
133
3.45k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
3.45k
    if (first_octet >= 0) {
135
0
        result |= first_octet << offset;
136
0
        offset -= IPV4_OCTET_BITS;
137
0
    }
138
139
6.77k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
6.77k
        if (eof()) {
141
0
            return false;
142
0
        }
143
144
6.77k
        UInt32 value = 0;
145
6.77k
        size_t len = 0;
146
17.6k
        while (is_numeric_ascii(*src) && len <= 3) {
147
10.9k
            value = value * DECIMAL_BASE + (*src - '0');
148
10.9k
            ++len;
149
10.9k
            ++src;
150
10.9k
            if (eof()) {
151
0
                break;
152
0
            }
153
10.9k
        }
154
6.77k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
2.34k
            return false;
156
2.34k
        }
157
4.43k
        result |= value << offset;
158
159
4.43k
        if (offset == 0) {
160
1.10k
            break;
161
1.10k
        }
162
4.43k
    }
163
164
1.10k
    memcpy(dst, &result, sizeof(result));
165
1.10k
    return true;
166
3.45k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
393
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
393
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
393
    UInt32 result = 0;
133
393
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
393
    if (first_octet >= 0) {
135
393
        result |= first_octet << offset;
136
393
        offset -= IPV4_OCTET_BITS;
137
393
    }
138
139
1.17k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
1.17k
        if (eof()) {
141
0
            return false;
142
0
        }
143
144
1.17k
        UInt32 value = 0;
145
1.17k
        size_t len = 0;
146
3.13k
        while (is_numeric_ascii(*src) && len <= 3) {
147
2.34k
            value = value * DECIMAL_BASE + (*src - '0');
148
2.34k
            ++len;
149
2.34k
            ++src;
150
2.34k
            if (eof()) {
151
391
                break;
152
391
            }
153
2.34k
        }
154
1.17k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
4
            return false;
156
4
        }
157
1.17k
        result |= value << offset;
158
159
1.17k
        if (offset == 0) {
160
389
            break;
161
389
        }
162
1.17k
    }
163
164
389
    memcpy(dst, &result, sizeof(result));
165
389
    return true;
166
393
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
68
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
68
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
68
    UInt32 result = 0;
133
68
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
68
    if (first_octet >= 0) {
135
68
        result |= first_octet << offset;
136
68
        offset -= IPV4_OCTET_BITS;
137
68
    }
138
139
204
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
204
        if (eof()) {
141
0
            return false;
142
0
        }
143
144
204
        UInt32 value = 0;
145
204
        size_t len = 0;
146
642
        while (is_numeric_ascii(*src) && len <= 3) {
147
438
            value = value * DECIMAL_BASE + (*src - '0');
148
438
            ++len;
149
438
            ++src;
150
438
            if (eof()) {
151
0
                break;
152
0
            }
153
438
        }
154
204
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
0
            return false;
156
0
        }
157
204
        result |= value << offset;
158
159
204
        if (offset == 0) {
160
68
            break;
161
68
        }
162
204
    }
163
164
68
    memcpy(dst, &result, sizeof(result));
165
68
    return true;
166
68
}
167
168
/// returns pointer to the right after parsed sequence or null on failed parsing
169
179k
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
170
179k
    if (parse_ipv4(
171
2.76M
                src, [&src, end]() { return src == end; }, dst)) {
172
175k
        return src;
173
175k
    }
174
4.04k
    return nullptr;
175
179k
}
176
177
/// returns true if whole buffer was parsed successfully
178
179k
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
179
179k
    return parse_ipv4(src, end, dst) == end;
180
179k
}
181
182
/// returns pointer to the right after parsed sequence or null on failed parsing
183
3.45k
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
184
3.45k
    if (parse_ipv4(
185
3.45k
                src, []() { return false; }, dst)) {
186
1.10k
        return src;
187
1.10k
    }
188
2.34k
    return nullptr;
189
3.45k
}
190
191
/// returns true if whole null-terminated string was parsed successfully
192
3.45k
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
193
3.45k
    const char* end = parse_ipv4(src, dst);
194
3.45k
    return end != nullptr && *end == '\0';
195
3.45k
}
196
197
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
198
0
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
199
0
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
200
0
                         : value % base > 1 || carry;
201
0
}
202
203
/// Print integer in desired base, faster than sprintf.
204
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
205
/// But it doesn't matter here.
206
template <UInt32 base, typename T>
207
140k
inline void print_integer(char*& out, T value) {
208
140k
    if (value == 0) {
209
177
        *out++ = '0';
210
140k
    } else {
211
140k
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);
212
213
140k
        char buf[buffer_size];
214
140k
        auto ptr = buf;
215
216
584k
        while (value > 0) {
217
444k
            *ptr = hex_digit_lowercase(value % base);
218
444k
            ++ptr;
219
444k
            value /= base;
220
444k
        }
221
222
        /// Copy to out reversed.
223
584k
        while (ptr != buf) {
224
444k
            --ptr;
225
444k
            *out = *ptr;
226
444k
            ++out;
227
444k
        }
228
140k
    }
229
140k
}
230
231
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
232
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
233
  * bounds checking, unnecessary string copying and length calculation.
234
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
235
  * @param dst         - where to put format result bytes
236
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
237
  */
238
1.49M
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
239
1.49M
    struct {
240
1.49M
        Int64 base, len;
241
1.49M
    } best {-1, 0}, cur {-1, 0};
242
1.49M
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};
243
244
    // the current function logic is processed in big endian manner
245
    // but ipv6 in doris is stored in little-endian byte order
246
    // so transfer to big-endian byte order first
247
    // compatible with parse_ipv6 function in format_ip.h
248
1.49M
    std::reverse(src, src + IPV6_BINARY_LENGTH);
249
250
    /** Preprocess:
251
        *    Copy the input (bytewise) array into a wordwise array.
252
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
253
13.4M
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
254
11.9M
        words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
255
11.9M
    }
256
257
13.4M
    for (size_t i = 0; i < words.size(); i++) {
258
11.9M
        if (words[i] == 0) {
259
11.8M
            if (cur.base == -1) {
260
1.49M
                cur.base = i;
261
1.49M
                cur.len = 1;
262
10.3M
            } else {
263
10.3M
                cur.len++;
264
10.3M
            }
265
11.8M
        } else {
266
140k
            if (cur.base != -1) {
267
36.7k
                if (best.base == -1 || cur.len > best.len) {
268
36.7k
                    best = cur;
269
36.7k
                }
270
36.7k
                cur.base = -1;
271
36.7k
            }
272
140k
        }
273
11.9M
    }
274
275
1.49M
    if (cur.base != -1) {
276
1.45M
        if (best.base == -1 || cur.len > best.len) {
277
1.45M
            best = cur;
278
1.45M
        }
279
1.45M
    }
280
1.49M
    if (best.base != -1 && best.len < 2) {
281
60
        best.base = -1;
282
60
    }
283
284
    /// Format the result.
285
13.4M
    for (size_t i = 0; i < words.size(); i++) {
286
        /// Are we inside the best run of 0x00's?
287
11.9M
        if (best.base != -1) {
288
11.9M
            auto best_base = static_cast<size_t>(best.base);
289
11.9M
            if (i >= best_base && i < (best_base + best.len)) {
290
11.8M
                if (i == best_base) {
291
1.49M
                    *dst++ = ':';
292
1.49M
                }
293
11.8M
                continue;
294
11.8M
            }
295
11.9M
        }
296
        /// Are we following an initial run of 0x00s or any real hex?
297
140k
        if (i != 0) {
298
101k
            *dst++ = ':';
299
101k
        }
300
        /// Is this address an encapsulated IPv4?
301
140k
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
302
74
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
303
74
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
304
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
305
74
            if constexpr (std::endian::native == std::endian::little) {
306
74
                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
307
74
            }
308
74
            format_ipv4(ipv4_buffer, dst,
309
74
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
310
74
                        "0");
311
            // format_ipv4 has already added a null-terminator for us.
312
74
            return;
313
74
        }
314
140k
        print_integer<16>(dst, words[i]);
315
140k
    }
316
317
    /// Was it a trailing run of 0x00's?
318
1.49M
    if (best.base != -1 &&
319
1.49M
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
320
1.45M
        *dst++ = ':';
321
1.45M
    }
322
1.49M
}
323
324
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
325
*
326
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
327
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
328
*
329
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
330
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
331
*           To parse strings use overloads below.
332
*
333
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
334
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
335
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
336
* @param first_block - preparsed first block
337
* @return            - true if parsed successfully, false otherwise.
338
*/
339
template <typename T, typename EOFfunction>
340
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
341
340k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
342
340k
    const auto clear_dst = [dst]() {
343
8.08k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
8.08k
        return false;
345
8.08k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
342
822
    const auto clear_dst = [dst]() {
343
822
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
822
        return false;
345
822
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
342
7.26k
    const auto clear_dst = [dst]() {
343
7.26k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
7.26k
        return false;
345
7.26k
    };
346
347
340k
    if (src == nullptr || eof()) return clear_dst();
348
349
340k
    int groups = 0;            /// number of parsed groups
350
340k
    unsigned char* iter = dst; /// iterator over dst buffer
351
340k
    unsigned char* zptr =
352
340k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
353
354
340k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
355
356
340k
    if (first_block >= 0) {
357
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
358
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
359
0
        if (*src == ':') {
360
0
            zptr = iter;
361
0
            ++src;
362
0
        }
363
0
        ++groups;
364
0
    }
365
366
340k
    bool group_start = true;
367
368
2.83M
    while (!eof() && groups < 8) {
369
2.49M
        if (*src == ':') {
370
2.12M
            ++src;
371
2.12M
            if (eof()) /// trailing colon is not allowed
372
32
                return clear_dst();
373
374
2.12M
            group_start = true;
375
376
2.12M
            if (*src == ':') {
377
62.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
378
159
                    return clear_dst();
379
61.9k
                zptr = iter;
380
61.9k
                ++src;
381
61.9k
                if (!eof() && *src == ':') {
382
                    /// more than one all-zeroes block is not allowed
383
10
                    return clear_dst();
384
10
                }
385
61.9k
                continue;
386
61.9k
            }
387
2.06M
            if (groups == 0) /// leading colon is not allowed
388
0
                return clear_dst();
389
2.06M
        }
390
391
        /// mixed IPv4 parsing
392
2.43M
        if (*src == '.') {
393
1.05k
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
394
598
                return clear_dst();
395
396
461
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
397
0
                return clear_dst();
398
399
461
            ++src;
400
461
            if (eof()) return clear_dst();
401
402
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
403
461
            --groups;
404
461
            iter -= 2;
405
406
461
            UInt16 num = 0;
407
1.38k
            for (int i = 0; i < 2; ++i) {
408
922
                unsigned char first = (iter[i] >> 4) & 0x0fu;
409
922
                unsigned char second = iter[i] & 0x0fu;
410
922
                if (first > 9 || second > 9) return clear_dst();
411
922
                (num *= 100) += first * 10 + second;
412
922
            }
413
461
            if (num > 255) return clear_dst();
414
415
            /// parse IPv4 with known first octet
416
461
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
417
418
            if constexpr (std::endian::native == std::endian::little)
419
457
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
420
421
457
            iter += 4;
422
457
            groups += 2;
423
457
            break; /// IPv4 block is the last - end of parsing
424
461
        }
425
426
2.43M
        if (!group_start) /// end of parsing
427
1.61k
            break;
428
2.43M
        group_start = false;
429
430
2.43M
        UInt16 val = 0;  /// current decoded group
431
2.43M
        int xdigits = 0; /// number of decoded hex digits in current group
432
433
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
434
9.07M
            UInt8 num = unhex(*src);
435
9.07M
            if (num == 0xFF) break;
436
8.70M
            (val <<= 4) |= num;
437
8.70M
        }
438
439
2.43M
        if (xdigits == 0) /// end of parsing
440
3.04k
            break;
441
442
2.43M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
443
2.43M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
444
2.43M
        ++groups;
445
2.43M
    }
446
447
    /// either all 8 groups or all-zeroes block should be present
448
339k
    if (groups < 8 && zptr == nullptr) return clear_dst();
449
450
    /// process all-zeroes block
451
332k
    if (zptr != nullptr) {
452
61.7k
        if (groups == 8) {
453
            /// all-zeroes block at least should be one
454
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
455
4
            return clear_dst();
456
4
        }
457
61.7k
        size_t msize = iter - zptr;
458
61.7k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
459
61.7k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
460
61.7k
    }
461
462
    /// the current function logic is processed in big endian manner
463
    /// but ipv6 in doris is stored in little-endian byte order
464
    /// so transfer to little-endian
465
332k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
466
467
332k
    return true;
468
332k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
341
338k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
342
338k
    const auto clear_dst = [dst]() {
343
338k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
338k
        return false;
345
338k
    };
346
347
338k
    if (src == nullptr || eof()) return clear_dst();
348
349
338k
    int groups = 0;            /// number of parsed groups
350
338k
    unsigned char* iter = dst; /// iterator over dst buffer
351
338k
    unsigned char* zptr =
352
338k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
353
354
338k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
355
356
338k
    if (first_block >= 0) {
357
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
358
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
359
0
        if (*src == ':') {
360
0
            zptr = iter;
361
0
            ++src;
362
0
        }
363
0
        ++groups;
364
0
    }
365
366
338k
    bool group_start = true;
367
368
2.82M
    while (!eof() && groups < 8) {
369
2.49M
        if (*src == ':') {
370
2.12M
            ++src;
371
2.12M
            if (eof()) /// trailing colon is not allowed
372
32
                return clear_dst();
373
374
2.12M
            group_start = true;
375
376
2.12M
            if (*src == ':') {
377
61.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
378
159
                    return clear_dst();
379
61.0k
                zptr = iter;
380
61.0k
                ++src;
381
61.0k
                if (!eof() && *src == ':') {
382
                    /// more than one all-zeroes block is not allowed
383
10
                    return clear_dst();
384
10
                }
385
61.0k
                continue;
386
61.0k
            }
387
2.06M
            if (groups == 0) /// leading colon is not allowed
388
0
                return clear_dst();
389
2.06M
        }
390
391
        /// mixed IPv4 parsing
392
2.43M
        if (*src == '.') {
393
991
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
394
598
                return clear_dst();
395
396
393
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
397
0
                return clear_dst();
398
399
393
            ++src;
400
393
            if (eof()) return clear_dst();
401
402
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
403
393
            --groups;
404
393
            iter -= 2;
405
406
393
            UInt16 num = 0;
407
1.17k
            for (int i = 0; i < 2; ++i) {
408
786
                unsigned char first = (iter[i] >> 4) & 0x0fu;
409
786
                unsigned char second = iter[i] & 0x0fu;
410
786
                if (first > 9 || second > 9) return clear_dst();
411
786
                (num *= 100) += first * 10 + second;
412
786
            }
413
393
            if (num > 255) return clear_dst();
414
415
            /// parse IPv4 with known first octet
416
393
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
417
418
            if constexpr (std::endian::native == std::endian::little)
419
389
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
420
421
389
            iter += 4;
422
389
            groups += 2;
423
389
            break; /// IPv4 block is the last - end of parsing
424
393
        }
425
426
2.43M
        if (!group_start) /// end of parsing
427
823
            break;
428
2.43M
        group_start = false;
429
430
2.43M
        UInt16 val = 0;  /// current decoded group
431
2.43M
        int xdigits = 0; /// number of decoded hex digits in current group
432
433
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
434
9.06M
            UInt8 num = unhex(*src);
435
9.06M
            if (num == 0xFF) break;
436
8.69M
            (val <<= 4) |= num;
437
8.69M
        }
438
439
2.43M
        if (xdigits == 0) /// end of parsing
440
2.13k
            break;
441
442
2.42M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
443
2.42M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
444
2.42M
        ++groups;
445
2.42M
    }
446
447
    /// either all 8 groups or all-zeroes block should be present
448
337k
    if (groups < 8 && zptr == nullptr) return clear_dst();
449
450
    /// process all-zeroes block
451
331k
    if (zptr != nullptr) {
452
60.8k
        if (groups == 8) {
453
            /// all-zeroes block at least should be one
454
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
455
4
            return clear_dst();
456
4
        }
457
60.8k
        size_t msize = iter - zptr;
458
60.8k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
459
60.8k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
460
60.8k
    }
461
462
    /// the current function logic is processed in big endian manner
463
    /// but ipv6 in doris is stored in little-endian byte order
464
    /// so transfer to little-endian
465
331k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
466
467
331k
    return true;
468
331k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
341
1.87k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
342
1.87k
    const auto clear_dst = [dst]() {
343
1.87k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
1.87k
        return false;
345
1.87k
    };
346
347
1.87k
    if (src == nullptr || eof()) return clear_dst();
348
349
1.87k
    int groups = 0;            /// number of parsed groups
350
1.87k
    unsigned char* iter = dst; /// iterator over dst buffer
351
1.87k
    unsigned char* zptr =
352
1.87k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
353
354
1.87k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
355
356
1.87k
    if (first_block >= 0) {
357
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
358
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
359
0
        if (*src == ':') {
360
0
            zptr = iter;
361
0
            ++src;
362
0
        }
363
0
        ++groups;
364
0
    }
365
366
1.87k
    bool group_start = true;
367
368
6.10k
    while (!eof() && groups < 8) {
369
6.00k
        if (*src == ':') {
370
2.42k
            ++src;
371
2.42k
            if (eof()) /// trailing colon is not allowed
372
0
                return clear_dst();
373
374
2.42k
            group_start = true;
375
376
2.42k
            if (*src == ':') {
377
947
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
378
0
                    return clear_dst();
379
947
                zptr = iter;
380
947
                ++src;
381
947
                if (!eof() && *src == ':') {
382
                    /// more than one all-zeroes block is not allowed
383
0
                    return clear_dst();
384
0
                }
385
947
                continue;
386
947
            }
387
1.47k
            if (groups == 0) /// leading colon is not allowed
388
0
                return clear_dst();
389
1.47k
        }
390
391
        /// mixed IPv4 parsing
392
5.05k
        if (*src == '.') {
393
68
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
394
0
                return clear_dst();
395
396
68
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
397
0
                return clear_dst();
398
399
68
            ++src;
400
68
            if (eof()) return clear_dst();
401
402
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
403
68
            --groups;
404
68
            iter -= 2;
405
406
68
            UInt16 num = 0;
407
204
            for (int i = 0; i < 2; ++i) {
408
136
                unsigned char first = (iter[i] >> 4) & 0x0fu;
409
136
                unsigned char second = iter[i] & 0x0fu;
410
136
                if (first > 9 || second > 9) return clear_dst();
411
136
                (num *= 100) += first * 10 + second;
412
136
            }
413
68
            if (num > 255) return clear_dst();
414
415
            /// parse IPv4 with known first octet
416
68
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
417
418
            if constexpr (std::endian::native == std::endian::little)
419
68
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
420
421
68
            iter += 4;
422
68
            groups += 2;
423
68
            break; /// IPv4 block is the last - end of parsing
424
68
        }
425
426
4.98k
        if (!group_start) /// end of parsing
427
793
            break;
428
4.19k
        group_start = false;
429
430
4.19k
        UInt16 val = 0;  /// current decoded group
431
4.19k
        int xdigits = 0; /// number of decoded hex digits in current group
432
433
14.8k
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
434
12.9k
            UInt8 num = unhex(*src);
435
12.9k
            if (num == 0xFF) break;
436
10.6k
            (val <<= 4) |= num;
437
10.6k
        }
438
439
4.19k
        if (xdigits == 0) /// end of parsing
440
908
            break;
441
442
3.28k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
443
3.28k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
444
3.28k
        ++groups;
445
3.28k
    }
446
447
    /// either all 8 groups or all-zeroes block should be present
448
1.87k
    if (groups < 8 && zptr == nullptr) return clear_dst();
449
450
    /// process all-zeroes block
451
1.05k
    if (zptr != nullptr) {
452
947
        if (groups == 8) {
453
            /// all-zeroes block at least should be one
454
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
455
0
            return clear_dst();
456
0
        }
457
947
        size_t msize = iter - zptr;
458
947
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
459
947
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
460
947
    }
461
462
    /// the current function logic is processed in big endian manner
463
    /// but ipv6 in doris is stored in little-endian byte order
464
    /// so transfer to little-endian
465
1.05k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
466
467
1.05k
    return true;
468
1.05k
}
469
470
/// returns pointer to the right after parsed sequence or null on failed parsing
471
338k
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
472
338k
    if (parse_ipv6(
473
16.4M
                src, [&src, end]() { return src == end; }, dst))
474
331k
        return src;
475
7.26k
    return nullptr;
476
338k
}
477
478
/// returns true if whole buffer was parsed successfully
479
338k
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
480
338k
    return parse_ipv6(src, end, dst) == end;
481
338k
}
482
483
/// returns pointer to the right after parsed sequence or null on failed parsing
484
1.87k
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
485
1.87k
    if (parse_ipv6(
486
1.87k
                src, []() { return false; }, dst))
487
1.05k
        return src;
488
822
    return nullptr;
489
1.87k
}
490
491
/// returns true if whole null-terminated string was parsed successfully
492
1.87k
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
493
1.87k
    const char* end = parse_ipv6(src, dst);
494
1.87k
    return end != nullptr && *end == '\0';
495
1.87k
}
496
497
} // namespace doris