Coverage Report

Created: 2026-04-10 04:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/format_ip.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <array>
25
#include <bit>
26
#include <cstdint>
27
#include <cstring>
28
#include <utility>
29
30
#include "core/types.h"
31
#include "exec/common/hex.h"
32
#include "exec/common/string_utils/string_utils.h"
33
34
constexpr size_t IPV4_BINARY_LENGTH = 4;
35
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
36
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
37
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
38
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
39
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
40
constexpr size_t IPV4_OCTET_BITS = 8;
41
constexpr size_t DECIMAL_BASE = 10;
42
constexpr size_t IPV6_BINARY_LENGTH = 16;
43
44
namespace doris {
45
46
extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;
47
48
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
49
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
50
  *
51
  * Any number of the tail bytes can be masked with given mask string.
52
  *
53
  * Assumptions:
54
  *     src is IPV4_BINARY_LENGTH long,
55
  *     dst is IPV4_MAX_TEXT_LENGTH long,
56
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
57
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
58
  *
59
  * Examples:
60
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
61
  *         > dst == "127.0.0.1"
62
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
63
  *         > dst == "127.0.0.xxx"
64
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
65
  *         > dst == "127.0.0.0"
66
  */
67
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
68
1.49M
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
69
1.49M
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
70
1.49M
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
71
1.49M
    const size_t padding = std::min(4 - src_size, limit);
72
1.49M
    for (size_t octet = 0; octet < padding; ++octet) {
73
0
        *dst++ = '0';
74
0
        *dst++ = '.';
75
0
    }
76
77
7.49M
    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
78
5.99M
        uint8_t value = 0;
79
        if constexpr (std::endian::native == std::endian::little)
80
5.99M
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
81
        else
82
            value = static_cast<uint8_t>(src[octet]);
83
5.99M
        const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
84
5.99M
        const char* str = one_byte_to_string_lookup_table[value].first;
85
86
5.99M
        memcpy(dst, str, len);
87
5.99M
        dst += len;
88
89
5.99M
        *dst++ = '.';
90
5.99M
    }
91
92
1.49M
    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
93
6
        memcpy(dst, mask_string, mask_length);
94
6
        dst += mask_length;
95
96
6
        *dst++ = '.';
97
6
    }
98
99
1.49M
    dst--;
100
1.49M
}
101
102
inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
103
1.49M
                        const char* mask_string = "xxx") {
104
1.49M
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
105
1.49M
}
106
107
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
108
 *
109
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
110
 * which should be long enough.
111
 * That is "127.0.0.1" becomes 0x7f000001.
112
 *
113
 * In case of failure doesn't modify buffer pointed by `dst`.
114
 *
115
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
116
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
117
 *           To parse strings use overloads below.
118
 *
119
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
120
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
121
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
122
 * @param first_octet - preparsed first octet
123
 * @return            - true if parsed successfully, false otherwise.
124
 */
125
template <typename T, typename EOFfunction>
126
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
127
83.4k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
83.4k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
83.4k
    UInt32 result = 0;
133
83.4k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
83.4k
    if (first_octet >= 0) {
135
78
        result |= first_octet << offset;
136
78
        offset -= IPV4_OCTET_BITS;
137
78
    }
138
139
329k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
329k
        if (eof()) {
141
61
            return false;
142
61
        }
143
144
329k
        UInt32 value = 0;
145
329k
        size_t len = 0;
146
824k
        while (is_numeric_ascii(*src) && len <= 3) {
147
576k
            value = value * DECIMAL_BASE + (*src - '0');
148
576k
            ++len;
149
576k
            ++src;
150
576k
            if (eof()) {
151
81.8k
                break;
152
81.8k
            }
153
576k
        }
154
329k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
1.53k
            return false;
156
1.53k
        }
157
328k
        result |= value << offset;
158
159
328k
        if (offset == 0) {
160
81.8k
            break;
161
81.8k
        }
162
328k
    }
163
164
81.8k
    memcpy(dst, &result, sizeof(result));
165
81.8k
    return true;
166
83.4k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
83.3k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
83.3k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
83.3k
    UInt32 result = 0;
133
83.3k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
83.3k
    if (first_octet >= 0) {
135
0
        result |= first_octet << offset;
136
0
        offset -= IPV4_OCTET_BITS;
137
0
    }
138
139
329k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
329k
        if (eof()) {
141
61
            return false;
142
61
        }
143
144
329k
        UInt32 value = 0;
145
329k
        size_t len = 0;
146
823k
        while (is_numeric_ascii(*src) && len <= 3) {
147
576k
            value = value * DECIMAL_BASE + (*src - '0');
148
576k
            ++len;
149
576k
            ++src;
150
576k
            if (eof()) {
151
81.7k
                break;
152
81.7k
            }
153
576k
        }
154
329k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
1.53k
            return false;
156
1.53k
        }
157
327k
        result |= value << offset;
158
159
327k
        if (offset == 0) {
160
81.7k
            break;
161
81.7k
        }
162
327k
    }
163
164
81.7k
    memcpy(dst, &result, sizeof(result));
165
81.7k
    return true;
166
83.3k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
1
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
1
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
1
    UInt32 result = 0;
133
1
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
1
    if (first_octet >= 0) {
135
0
        result |= first_octet << offset;
136
0
        offset -= IPV4_OCTET_BITS;
137
0
    }
138
139
4
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
4
        if (eof()) {
141
0
            return false;
142
0
        }
143
144
4
        UInt32 value = 0;
145
4
        size_t len = 0;
146
10
        while (is_numeric_ascii(*src) && len <= 3) {
147
6
            value = value * DECIMAL_BASE + (*src - '0');
148
6
            ++len;
149
6
            ++src;
150
6
            if (eof()) {
151
0
                break;
152
0
            }
153
6
        }
154
4
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
0
            return false;
156
0
        }
157
4
        result |= value << offset;
158
159
4
        if (offset == 0) {
160
1
            break;
161
1
        }
162
4
    }
163
164
1
    memcpy(dst, &result, sizeof(result));
165
1
    return true;
166
1
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
127
78
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
128
78
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
129
0
        return false;
130
0
    }
131
132
78
    UInt32 result = 0;
133
78
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
134
78
    if (first_octet >= 0) {
135
78
        result |= first_octet << offset;
136
78
        offset -= IPV4_OCTET_BITS;
137
78
    }
138
139
233
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
140
233
        if (eof()) {
141
0
            return false;
142
0
        }
143
144
233
        UInt32 value = 0;
145
233
        size_t len = 0;
146
659
        while (is_numeric_ascii(*src) && len <= 3) {
147
503
            value = value * DECIMAL_BASE + (*src - '0');
148
503
            ++len;
149
503
            ++src;
150
503
            if (eof()) {
151
77
                break;
152
77
            }
153
503
        }
154
233
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
155
2
            return false;
156
2
        }
157
231
        result |= value << offset;
158
159
231
        if (offset == 0) {
160
76
            break;
161
76
        }
162
231
    }
163
164
76
    memcpy(dst, &result, sizeof(result));
165
76
    return true;
166
78
}
Unexecuted instantiation: _ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
167
168
/// returns pointer to the right after parsed sequence or null on failed parsing
169
83.3k
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
170
83.3k
    if (parse_ipv4(
171
1.15M
                src, [&src, end]() { return src == end; }, dst)) {
172
81.7k
        return src;
173
81.7k
    }
174
1.59k
    return nullptr;
175
83.3k
}
176
177
/// returns true if whole buffer was parsed successfully
178
83.3k
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
179
83.3k
    return parse_ipv4(src, end, dst) == end;
180
83.3k
}
181
182
/// returns pointer to the right after parsed sequence or null on failed parsing
183
1
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
184
1
    if (parse_ipv4(
185
1
                src, []() { return false; }, dst)) {
186
1
        return src;
187
1
    }
188
0
    return nullptr;
189
1
}
190
191
/// returns true if whole null-terminated string was parsed successfully
192
1
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
193
1
    const char* end = parse_ipv4(src, dst);
194
1
    return end != nullptr && *end == '\0';
195
1
}
196
197
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
198
0
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
199
0
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
200
0
                         : value % base > 1 || carry;
201
0
}
202
203
/// Print integer in desired base, faster than sprintf.
204
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
205
/// But it doesn't matter here.
206
template <UInt32 base, typename T>
207
101k
inline void print_integer(char*& out, T value) {
208
101k
    if (value == 0) {
209
1
        *out++ = '0';
210
101k
    } else {
211
101k
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);
212
213
101k
        char buf[buffer_size];
214
101k
        auto ptr = buf;
215
216
417k
        while (value > 0) {
217
315k
            *ptr = hex_digit_lowercase(value % base);
218
315k
            ++ptr;
219
315k
            value /= base;
220
315k
        }
221
222
        /// Copy to out reversed.
223
417k
        while (ptr != buf) {
224
315k
            --ptr;
225
315k
            *out = *ptr;
226
315k
            ++out;
227
315k
        }
228
101k
    }
229
101k
}
230
231
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
232
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
233
  * bounds checking, unnecessary string copying and length calculation.
234
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
235
  * @param dst         - where to put format result bytes
236
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
237
  */
238
1.48M
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
239
1.48M
    struct {
240
1.48M
        Int64 base, len;
241
1.48M
    } best {-1, 0}, cur {-1, 0};
242
1.48M
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};
243
244
    // the current function logic is processed in big endian manner
245
    // but ipv6 in doris is stored in little-endian byte order
246
    // so transfer to big-endian byte order first
247
    // compatible with parse_ipv6 function in format_ip.h
248
1.48M
    std::reverse(src, src + IPV6_BINARY_LENGTH);
249
250
    /** Preprocess:
251
        *    Copy the input (bytewise) array into a wordwise array.
252
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
253
13.3M
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
254
11.8M
        words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
255
11.8M
    }
256
257
13.3M
    for (size_t i = 0; i < words.size(); i++) {
258
11.8M
        if (words[i] == 0) {
259
11.7M
            if (cur.base == -1) {
260
1.48M
                cur.base = i;
261
1.48M
                cur.len = 1;
262
10.3M
            } else {
263
10.3M
                cur.len++;
264
10.3M
            }
265
11.7M
        } else {
266
101k
            if (cur.base != -1) {
267
28.2k
                if (best.base == -1 || cur.len > best.len) {
268
28.2k
                    best = cur;
269
28.2k
                }
270
28.2k
                cur.base = -1;
271
28.2k
            }
272
101k
        }
273
11.8M
    }
274
275
1.48M
    if (cur.base != -1) {
276
1.45M
        if (best.base == -1 || cur.len > best.len) {
277
1.45M
            best = cur;
278
1.45M
        }
279
1.45M
    }
280
1.48M
    if (best.base != -1 && best.len < 2) {
281
0
        best.base = -1;
282
0
    }
283
284
    /// Format the result.
285
13.3M
    for (size_t i = 0; i < words.size(); i++) {
286
        /// Are we inside the best run of 0x00's?
287
11.8M
        if (best.base != -1) {
288
11.8M
            auto best_base = static_cast<size_t>(best.base);
289
11.8M
            if (i >= best_base && i < (best_base + best.len)) {
290
11.7M
                if (i == best_base) {
291
1.48M
                    *dst++ = ':';
292
1.48M
                }
293
11.7M
                continue;
294
11.7M
            }
295
11.8M
        }
296
        /// Are we following an initial run of 0x00s or any real hex?
297
101k
        if (i != 0) {
298
71.8k
            *dst++ = ':';
299
71.8k
        }
300
        /// Is this address an encapsulated IPv4?
301
101k
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
302
9
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
303
9
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
304
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
305
9
            if constexpr (std::endian::native == std::endian::little) {
306
9
                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
307
9
            }
308
9
            format_ipv4(ipv4_buffer, dst,
309
9
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
310
9
                        "0");
311
            // format_ipv4 has already added a null-terminator for us.
312
9
            return;
313
9
        }
314
101k
        print_integer<16>(dst, words[i]);
315
101k
    }
316
317
    /// Was it a trailing run of 0x00's?
318
1.48M
    if (best.base != -1 &&
319
1.48M
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
320
1.45M
        *dst++ = ':';
321
1.45M
    }
322
1.48M
}
323
324
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
325
*
326
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
327
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
328
*
329
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
330
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
331
*           To parse strings use overloads below.
332
*
333
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
334
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
335
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
336
* @param first_block - preparsed first block
337
* @return            - true if parsed successfully, false otherwise.
338
*/
339
template <typename T, typename EOFfunction>
340
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
341
60.8k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
342
60.8k
    const auto clear_dst = [dst]() {
343
5.42k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
5.42k
        return false;
345
5.42k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
342
5.42k
    const auto clear_dst = [dst]() {
343
5.42k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
5.42k
        return false;
345
5.42k
    };
Unexecuted instantiation: _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
346
347
60.8k
    if (src == nullptr || eof()) return clear_dst();
348
349
60.8k
    int groups = 0;            /// number of parsed groups
350
60.8k
    unsigned char* iter = dst; /// iterator over dst buffer
351
60.8k
    unsigned char* zptr =
352
60.8k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
353
354
60.8k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
355
356
60.8k
    if (first_block >= 0) {
357
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
358
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
359
0
        if (*src == ':') {
360
0
            zptr = iter;
361
0
            ++src;
362
0
        }
363
0
        ++groups;
364
0
    }
365
366
60.8k
    bool group_start = true;
367
368
495k
    while (!eof() && groups < 8) {
369
436k
        if (*src == ':') {
370
372k
            ++src;
371
372k
            if (eof()) /// trailing colon is not allowed
372
31
                return clear_dst();
373
374
372k
            group_start = true;
375
376
372k
            if (*src == ':') {
377
3.59k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
378
152
                    return clear_dst();
379
3.43k
                zptr = iter;
380
3.43k
                ++src;
381
3.43k
                if (!eof() && *src == ':') {
382
                    /// more than one all-zeroes block is not allowed
383
5
                    return clear_dst();
384
5
                }
385
3.43k
                continue;
386
3.43k
            }
387
368k
            if (groups == 0) /// leading colon is not allowed
388
0
                return clear_dst();
389
368k
        }
390
391
        /// mixed IPv4 parsing
392
433k
        if (*src == '.') {
393
78
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
394
0
                return clear_dst();
395
396
78
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
397
0
                return clear_dst();
398
399
78
            ++src;
400
78
            if (eof()) return clear_dst();
401
402
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
403
78
            --groups;
404
78
            iter -= 2;
405
406
78
            UInt16 num = 0;
407
234
            for (int i = 0; i < 2; ++i) {
408
156
                unsigned char first = (iter[i] >> 4) & 0x0fu;
409
156
                unsigned char second = iter[i] & 0x0fu;
410
156
                if (first > 9 || second > 9) return clear_dst();
411
156
                (num *= 100) += first * 10 + second;
412
156
            }
413
78
            if (num > 255) return clear_dst();
414
415
            /// parse IPv4 with known first octet
416
78
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
417
418
            if constexpr (std::endian::native == std::endian::little)
419
76
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
420
421
76
            iter += 4;
422
76
            groups += 2;
423
76
            break; /// IPv4 block is the last - end of parsing
424
78
        }
425
426
433k
        if (!group_start) /// end of parsing
427
815
            break;
428
432k
        group_start = false;
429
430
432k
        UInt16 val = 0;  /// current decoded group
431
432k
        int xdigits = 0; /// number of decoded hex digits in current group
432
433
1.28M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
434
1.10M
            UInt8 num = unhex(*src);
435
1.10M
            if (num == 0xFF) break;
436
857k
            (val <<= 4) |= num;
437
857k
        }
438
439
432k
        if (xdigits == 0) /// end of parsing
440
920
            break;
441
442
431k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
443
431k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
444
431k
        ++groups;
445
431k
    }
446
447
    /// either all 8 groups or all-zeroes block should be present
448
60.6k
    if (groups < 8 && zptr == nullptr) return clear_dst();
449
450
    /// process all-zeroes block
451
55.4k
    if (zptr != nullptr) {
452
3.24k
        if (groups == 8) {
453
            /// all-zeroes block at least should be one
454
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
455
2
            return clear_dst();
456
2
        }
457
3.24k
        size_t msize = iter - zptr;
458
3.24k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
459
3.24k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
460
3.24k
    }
461
462
    /// the current function logic is processed in big endian manner
463
    /// but ipv6 in doris is stored in little-endian byte order
464
    /// so transfer to little-endian
465
55.4k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
466
467
55.4k
    return true;
468
55.4k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
341
60.8k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
342
60.8k
    const auto clear_dst = [dst]() {
343
60.8k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
344
60.8k
        return false;
345
60.8k
    };
346
347
60.8k
    if (src == nullptr || eof()) return clear_dst();
348
349
60.8k
    int groups = 0;            /// number of parsed groups
350
60.8k
    unsigned char* iter = dst; /// iterator over dst buffer
351
60.8k
    unsigned char* zptr =
352
60.8k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
353
354
60.8k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
355
356
60.8k
    if (first_block >= 0) {
357
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
358
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
359
0
        if (*src == ':') {
360
0
            zptr = iter;
361
0
            ++src;
362
0
        }
363
0
        ++groups;
364
0
    }
365
366
60.8k
    bool group_start = true;
367
368
495k
    while (!eof() && groups < 8) {
369
436k
        if (*src == ':') {
370
372k
            ++src;
371
372k
            if (eof()) /// trailing colon is not allowed
372
31
                return clear_dst();
373
374
372k
            group_start = true;
375
376
372k
            if (*src == ':') {
377
3.59k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
378
152
                    return clear_dst();
379
3.43k
                zptr = iter;
380
3.43k
                ++src;
381
3.43k
                if (!eof() && *src == ':') {
382
                    /// more than one all-zeroes block is not allowed
383
5
                    return clear_dst();
384
5
                }
385
3.43k
                continue;
386
3.43k
            }
387
368k
            if (groups == 0) /// leading colon is not allowed
388
0
                return clear_dst();
389
368k
        }
390
391
        /// mixed IPv4 parsing
392
433k
        if (*src == '.') {
393
78
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
394
0
                return clear_dst();
395
396
78
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
397
0
                return clear_dst();
398
399
78
            ++src;
400
78
            if (eof()) return clear_dst();
401
402
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
403
78
            --groups;
404
78
            iter -= 2;
405
406
78
            UInt16 num = 0;
407
234
            for (int i = 0; i < 2; ++i) {
408
156
                unsigned char first = (iter[i] >> 4) & 0x0fu;
409
156
                unsigned char second = iter[i] & 0x0fu;
410
156
                if (first > 9 || second > 9) return clear_dst();
411
156
                (num *= 100) += first * 10 + second;
412
156
            }
413
78
            if (num > 255) return clear_dst();
414
415
            /// parse IPv4 with known first octet
416
78
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
417
418
            if constexpr (std::endian::native == std::endian::little)
419
76
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
420
421
76
            iter += 4;
422
76
            groups += 2;
423
76
            break; /// IPv4 block is the last - end of parsing
424
78
        }
425
426
433k
        if (!group_start) /// end of parsing
427
815
            break;
428
432k
        group_start = false;
429
430
432k
        UInt16 val = 0;  /// current decoded group
431
432k
        int xdigits = 0; /// number of decoded hex digits in current group
432
433
1.28M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
434
1.10M
            UInt8 num = unhex(*src);
435
1.10M
            if (num == 0xFF) break;
436
857k
            (val <<= 4) |= num;
437
857k
        }
438
439
432k
        if (xdigits == 0) /// end of parsing
440
920
            break;
441
442
431k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
443
431k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
444
431k
        ++groups;
445
431k
    }
446
447
    /// either all 8 groups or all-zeroes block should be present
448
60.6k
    if (groups < 8 && zptr == nullptr) return clear_dst();
449
450
    /// process all-zeroes block
451
55.4k
    if (zptr != nullptr) {
452
3.24k
        if (groups == 8) {
453
            /// all-zeroes block at least should be one
454
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
455
2
            return clear_dst();
456
2
        }
457
3.24k
        size_t msize = iter - zptr;
458
3.24k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
459
3.24k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
460
3.24k
    }
461
462
    /// the current function logic is processed in big endian manner
463
    /// but ipv6 in doris is stored in little-endian byte order
464
    /// so transfer to little-endian
465
55.4k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
466
467
55.4k
    return true;
468
55.4k
}
Unexecuted instantiation: _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
469
470
/// returns pointer to the right after parsed sequence or null on failed parsing
471
60.8k
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
472
60.8k
    if (parse_ipv6(
473
2.22M
                src, [&src, end]() { return src == end; }, dst))
474
55.4k
        return src;
475
5.42k
    return nullptr;
476
60.8k
}
477
478
/// returns true if whole buffer was parsed successfully
479
60.8k
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
480
60.8k
    return parse_ipv6(src, end, dst) == end;
481
60.8k
}
482
483
/// returns pointer to the right after parsed sequence or null on failed parsing
484
0
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
485
0
    if (parse_ipv6(
486
0
                src, []() { return false; }, dst))
487
0
        return src;
488
0
    return nullptr;
489
0
}
490
491
/// returns true if whole null-terminated string was parsed successfully
492
0
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
493
0
    const char* end = parse_ipv6(src, dst);
494
0
    return end != nullptr && *end == '\0';
495
0
}
496
497
} // namespace doris