Coverage Report

Created: 2026-03-15 01:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/format_ip.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <array>
25
#include <bit>
26
#include <cstdint>
27
#include <cstring>
28
#include <utility>
29
30
#include "core/types.h"
31
#include "exec/common/hex.h"
32
#include "exec/common/string_utils/string_utils.h"
33
34
constexpr size_t IPV4_BINARY_LENGTH = 4;
35
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
36
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
37
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
38
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
39
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
40
constexpr size_t IPV4_OCTET_BITS = 8;
41
constexpr size_t DECIMAL_BASE = 10;
42
constexpr size_t IPV6_BINARY_LENGTH = 16;
43
44
namespace doris {
45
#include "common/compile_check_begin.h"
46
47
extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;
48
49
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
50
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
51
  *
52
  * Any number of the tail bytes can be masked with given mask string.
53
  *
54
  * Assumptions:
55
  *     src is IPV4_BINARY_LENGTH long,
56
  *     dst is IPV4_MAX_TEXT_LENGTH long,
57
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
58
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
59
  *
60
  * Examples:
61
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
62
  *         > dst == "127.0.0.1"
63
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
64
  *         > dst == "127.0.0.xxx"
65
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
66
  *         > dst == "127.0.0.0"
67
  */
68
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
69
1.49M
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
70
1.49M
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
71
1.49M
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
72
1.49M
    const size_t padding = std::min(4 - src_size, limit);
73
1.49M
    for (size_t octet = 0; octet < padding; ++octet) {
74
0
        *dst++ = '0';
75
0
        *dst++ = '.';
76
0
    }
77
78
7.49M
    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
79
5.99M
        uint8_t value = 0;
80
        if constexpr (std::endian::native == std::endian::little)
81
5.99M
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
82
        else
83
            value = static_cast<uint8_t>(src[octet]);
84
5.99M
        const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
85
5.99M
        const char* str = one_byte_to_string_lookup_table[value].first;
86
87
5.99M
        memcpy(dst, str, len);
88
5.99M
        dst += len;
89
90
5.99M
        *dst++ = '.';
91
5.99M
    }
92
93
1.49M
    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
94
6
        memcpy(dst, mask_string, mask_length);
95
6
        dst += mask_length;
96
97
6
        *dst++ = '.';
98
6
    }
99
100
1.49M
    dst--;
101
1.49M
}
102
103
inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
104
1.49M
                        const char* mask_string = "xxx") {
105
1.49M
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
106
1.49M
}
107
108
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
109
 *
110
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
111
 * which should be long enough.
112
 * That is "127.0.0.1" becomes 0x7f000001.
113
 *
114
 * In case of failure doesn't modify buffer pointed by `dst`.
115
 *
116
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
117
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
118
 *           To parse strings use overloads below.
119
 *
120
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
121
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
122
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
123
 * @param first_octet - preparsed first octet
124
 * @return            - true if parsed successfully, false otherwise.
125
 */
126
template <typename T, typename EOFfunction>
127
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
128
83.4k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
83.4k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
83.4k
    UInt32 result = 0;
134
83.4k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
83.4k
    if (first_octet >= 0) {
136
78
        result |= first_octet << offset;
137
78
        offset -= IPV4_OCTET_BITS;
138
78
    }
139
140
329k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
329k
        if (eof()) {
142
61
            return false;
143
61
        }
144
145
329k
        UInt32 value = 0;
146
329k
        size_t len = 0;
147
824k
        while (is_numeric_ascii(*src) && len <= 3) {
148
576k
            value = value * DECIMAL_BASE + (*src - '0');
149
576k
            ++len;
150
576k
            ++src;
151
576k
            if (eof()) {
152
81.8k
                break;
153
81.8k
            }
154
576k
        }
155
329k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
1.53k
            return false;
157
1.53k
        }
158
328k
        result |= value << offset;
159
160
328k
        if (offset == 0) {
161
81.8k
            break;
162
81.8k
        }
163
328k
    }
164
165
81.8k
    memcpy(dst, &result, sizeof(result));
166
81.8k
    return true;
167
83.4k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
83.3k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
83.3k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
83.3k
    UInt32 result = 0;
134
83.3k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
83.3k
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
329k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
329k
        if (eof()) {
142
61
            return false;
143
61
        }
144
145
329k
        UInt32 value = 0;
146
329k
        size_t len = 0;
147
823k
        while (is_numeric_ascii(*src) && len <= 3) {
148
576k
            value = value * DECIMAL_BASE + (*src - '0');
149
576k
            ++len;
150
576k
            ++src;
151
576k
            if (eof()) {
152
81.7k
                break;
153
81.7k
            }
154
576k
        }
155
329k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
1.53k
            return false;
157
1.53k
        }
158
327k
        result |= value << offset;
159
160
327k
        if (offset == 0) {
161
81.7k
            break;
162
81.7k
        }
163
327k
    }
164
165
81.7k
    memcpy(dst, &result, sizeof(result));
166
81.7k
    return true;
167
83.3k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
1
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
1
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
1
    UInt32 result = 0;
134
1
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
1
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
4
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
4
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
4
        UInt32 value = 0;
146
4
        size_t len = 0;
147
10
        while (is_numeric_ascii(*src) && len <= 3) {
148
6
            value = value * DECIMAL_BASE + (*src - '0');
149
6
            ++len;
150
6
            ++src;
151
6
            if (eof()) {
152
0
                break;
153
0
            }
154
6
        }
155
4
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
0
            return false;
157
0
        }
158
4
        result |= value << offset;
159
160
4
        if (offset == 0) {
161
1
            break;
162
1
        }
163
4
    }
164
165
1
    memcpy(dst, &result, sizeof(result));
166
1
    return true;
167
1
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
78
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
78
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
78
    UInt32 result = 0;
134
78
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
78
    if (first_octet >= 0) {
136
78
        result |= first_octet << offset;
137
78
        offset -= IPV4_OCTET_BITS;
138
78
    }
139
140
233
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
233
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
233
        UInt32 value = 0;
146
233
        size_t len = 0;
147
659
        while (is_numeric_ascii(*src) && len <= 3) {
148
503
            value = value * DECIMAL_BASE + (*src - '0');
149
503
            ++len;
150
503
            ++src;
151
503
            if (eof()) {
152
77
                break;
153
77
            }
154
503
        }
155
233
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
2
            return false;
157
2
        }
158
231
        result |= value << offset;
159
160
231
        if (offset == 0) {
161
76
            break;
162
76
        }
163
231
    }
164
165
76
    memcpy(dst, &result, sizeof(result));
166
76
    return true;
167
78
}
Unexecuted instantiation: _ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
168
169
/// returns pointer to the right after parsed sequence or null on failed parsing
170
83.3k
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
171
83.3k
    if (parse_ipv4(
172
1.15M
                src, [&src, end]() { return src == end; }, dst)) {
173
81.7k
        return src;
174
81.7k
    }
175
1.59k
    return nullptr;
176
83.3k
}
177
178
/// returns true if whole buffer was parsed successfully
179
83.3k
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
180
83.3k
    return parse_ipv4(src, end, dst) == end;
181
83.3k
}
182
183
/// returns pointer to the right after parsed sequence or null on failed parsing
184
1
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
185
1
    if (parse_ipv4(
186
1
                src, []() { return false; }, dst)) {
187
1
        return src;
188
1
    }
189
0
    return nullptr;
190
1
}
191
192
/// returns true if whole null-terminated string was parsed successfully
193
1
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
194
1
    const char* end = parse_ipv4(src, dst);
195
1
    return end != nullptr && *end == '\0';
196
1
}
197
198
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
199
0
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
200
0
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
201
0
                         : value % base > 1 || carry;
202
0
}
203
204
/// Print integer in desired base, faster than sprintf.
205
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
206
/// But it doesn't matter here.
207
template <UInt32 base, typename T>
208
101k
inline void print_integer(char*& out, T value) {
209
101k
    if (value == 0) {
210
1
        *out++ = '0';
211
101k
    } else {
212
101k
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);
213
214
101k
        char buf[buffer_size];
215
101k
        auto ptr = buf;
216
217
417k
        while (value > 0) {
218
315k
            *ptr = hex_digit_lowercase(value % base);
219
315k
            ++ptr;
220
315k
            value /= base;
221
315k
        }
222
223
        /// Copy to out reversed.
224
417k
        while (ptr != buf) {
225
315k
            --ptr;
226
315k
            *out = *ptr;
227
315k
            ++out;
228
315k
        }
229
101k
    }
230
101k
}
231
232
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
233
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
234
  * bounds checking, unnecessary string copying and length calculation.
235
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
236
  * @param dst         - where to put format result bytes
237
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
238
  */
239
1.48M
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
240
1.48M
    struct {
241
1.48M
        Int64 base, len;
242
1.48M
    } best {-1, 0}, cur {-1, 0};
243
1.48M
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};
244
245
    // the current function logic is processed in big endian manner
246
    // but ipv6 in doris is stored in little-endian byte order
247
    // so transfer to big-endian byte order first
248
    // compatible with parse_ipv6 function in format_ip.h
249
1.48M
    std::reverse(src, src + IPV6_BINARY_LENGTH);
250
251
    /** Preprocess:
252
        *    Copy the input (bytewise) array into a wordwise array.
253
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
254
13.3M
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
255
11.8M
        words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
256
11.8M
    }
257
258
13.3M
    for (size_t i = 0; i < words.size(); i++) {
259
11.8M
        if (words[i] == 0) {
260
11.7M
            if (cur.base == -1) {
261
1.48M
                cur.base = i;
262
1.48M
                cur.len = 1;
263
10.3M
            } else {
264
10.3M
                cur.len++;
265
10.3M
            }
266
11.7M
        } else {
267
101k
            if (cur.base != -1) {
268
28.2k
                if (best.base == -1 || cur.len > best.len) {
269
28.2k
                    best = cur;
270
28.2k
                }
271
28.2k
                cur.base = -1;
272
28.2k
            }
273
101k
        }
274
11.8M
    }
275
276
1.48M
    if (cur.base != -1) {
277
1.45M
        if (best.base == -1 || cur.len > best.len) {
278
1.45M
            best = cur;
279
1.45M
        }
280
1.45M
    }
281
1.48M
    if (best.base != -1 && best.len < 2) {
282
0
        best.base = -1;
283
0
    }
284
285
    /// Format the result.
286
13.3M
    for (size_t i = 0; i < words.size(); i++) {
287
        /// Are we inside the best run of 0x00's?
288
11.8M
        if (best.base != -1) {
289
11.8M
            auto best_base = static_cast<size_t>(best.base);
290
11.8M
            if (i >= best_base && i < (best_base + best.len)) {
291
11.7M
                if (i == best_base) {
292
1.48M
                    *dst++ = ':';
293
1.48M
                }
294
11.7M
                continue;
295
11.7M
            }
296
11.8M
        }
297
        /// Are we following an initial run of 0x00s or any real hex?
298
101k
        if (i != 0) {
299
71.8k
            *dst++ = ':';
300
71.8k
        }
301
        /// Is this address an encapsulated IPv4?
302
101k
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
303
9
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
304
9
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
305
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
306
9
            if constexpr (std::endian::native == std::endian::little) {
307
9
                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
308
9
            }
309
9
            format_ipv4(ipv4_buffer, dst,
310
9
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
311
9
                        "0");
312
            // format_ipv4 has already added a null-terminator for us.
313
9
            return;
314
9
        }
315
101k
        print_integer<16>(dst, words[i]);
316
101k
    }
317
318
    /// Was it a trailing run of 0x00's?
319
1.48M
    if (best.base != -1 &&
320
1.48M
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
321
1.45M
        *dst++ = ':';
322
1.45M
    }
323
1.48M
}
324
325
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
326
*
327
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
328
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
329
*
330
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
331
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
332
*           To parse strings use overloads below.
333
*
334
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
335
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
336
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
337
* @param first_block - preparsed first block
338
* @return            - true if parsed successfully, false otherwise.
339
*/
340
template <typename T, typename EOFfunction>
341
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
342
60.8k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
60.8k
    const auto clear_dst = [dst]() {
344
5.42k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
5.42k
        return false;
346
5.42k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
343
5.42k
    const auto clear_dst = [dst]() {
344
5.42k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
5.42k
        return false;
346
5.42k
    };
Unexecuted instantiation: _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
347
348
60.8k
    if (src == nullptr || eof()) return clear_dst();
349
350
60.8k
    int groups = 0;            /// number of parsed groups
351
60.8k
    unsigned char* iter = dst; /// iterator over dst buffer
352
60.8k
    unsigned char* zptr =
353
60.8k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
60.8k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
60.8k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
60.8k
    bool group_start = true;
368
369
495k
    while (!eof() && groups < 8) {
370
436k
        if (*src == ':') {
371
372k
            ++src;
372
372k
            if (eof()) /// trailing colon is not allowed
373
31
                return clear_dst();
374
375
372k
            group_start = true;
376
377
372k
            if (*src == ':') {
378
3.59k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
152
                    return clear_dst();
380
3.43k
                zptr = iter;
381
3.43k
                ++src;
382
3.43k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
5
                    return clear_dst();
385
5
                }
386
3.43k
                continue;
387
3.43k
            }
388
368k
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
368k
        }
391
392
        /// mixed IPv4 parsing
393
433k
        if (*src == '.') {
394
78
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
0
                return clear_dst();
396
397
78
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
78
            ++src;
401
78
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
78
            --groups;
405
78
            iter -= 2;
406
407
78
            UInt16 num = 0;
408
234
            for (int i = 0; i < 2; ++i) {
409
156
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
156
                unsigned char second = iter[i] & 0x0fu;
411
156
                if (first > 9 || second > 9) return clear_dst();
412
156
                (num *= 100) += first * 10 + second;
413
156
            }
414
78
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
78
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
76
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
76
            iter += 4;
423
76
            groups += 2;
424
76
            break; /// IPv4 block is the last - end of parsing
425
78
        }
426
427
433k
        if (!group_start) /// end of parsing
428
815
            break;
429
432k
        group_start = false;
430
431
432k
        UInt16 val = 0;  /// current decoded group
432
432k
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
1.28M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
1.10M
            UInt8 num = unhex(*src);
436
1.10M
            if (num == 0xFF) break;
437
857k
            (val <<= 4) |= num;
438
857k
        }
439
440
432k
        if (xdigits == 0) /// end of parsing
441
920
            break;
442
443
431k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
431k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
431k
        ++groups;
446
431k
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
60.6k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
55.4k
    if (zptr != nullptr) {
453
3.24k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
2
            return clear_dst();
457
2
        }
458
3.24k
        size_t msize = iter - zptr;
459
3.24k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
3.24k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
3.24k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
55.4k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
55.4k
    return true;
469
55.4k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
342
60.8k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
60.8k
    const auto clear_dst = [dst]() {
344
60.8k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
60.8k
        return false;
346
60.8k
    };
347
348
60.8k
    if (src == nullptr || eof()) return clear_dst();
349
350
60.8k
    int groups = 0;            /// number of parsed groups
351
60.8k
    unsigned char* iter = dst; /// iterator over dst buffer
352
60.8k
    unsigned char* zptr =
353
60.8k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
60.8k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
60.8k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
60.8k
    bool group_start = true;
368
369
495k
    while (!eof() && groups < 8) {
370
436k
        if (*src == ':') {
371
372k
            ++src;
372
372k
            if (eof()) /// trailing colon is not allowed
373
31
                return clear_dst();
374
375
372k
            group_start = true;
376
377
372k
            if (*src == ':') {
378
3.59k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
152
                    return clear_dst();
380
3.43k
                zptr = iter;
381
3.43k
                ++src;
382
3.43k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
5
                    return clear_dst();
385
5
                }
386
3.43k
                continue;
387
3.43k
            }
388
368k
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
368k
        }
391
392
        /// mixed IPv4 parsing
393
433k
        if (*src == '.') {
394
78
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
0
                return clear_dst();
396
397
78
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
78
            ++src;
401
78
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
78
            --groups;
405
78
            iter -= 2;
406
407
78
            UInt16 num = 0;
408
234
            for (int i = 0; i < 2; ++i) {
409
156
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
156
                unsigned char second = iter[i] & 0x0fu;
411
156
                if (first > 9 || second > 9) return clear_dst();
412
156
                (num *= 100) += first * 10 + second;
413
156
            }
414
78
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
78
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
76
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
76
            iter += 4;
423
76
            groups += 2;
424
76
            break; /// IPv4 block is the last - end of parsing
425
78
        }
426
427
433k
        if (!group_start) /// end of parsing
428
815
            break;
429
432k
        group_start = false;
430
431
432k
        UInt16 val = 0;  /// current decoded group
432
432k
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
1.28M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
1.10M
            UInt8 num = unhex(*src);
436
1.10M
            if (num == 0xFF) break;
437
857k
            (val <<= 4) |= num;
438
857k
        }
439
440
432k
        if (xdigits == 0) /// end of parsing
441
920
            break;
442
443
431k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
431k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
431k
        ++groups;
446
431k
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
60.6k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
55.4k
    if (zptr != nullptr) {
453
3.24k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
2
            return clear_dst();
457
2
        }
458
3.24k
        size_t msize = iter - zptr;
459
3.24k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
3.24k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
3.24k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
55.4k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
55.4k
    return true;
469
55.4k
}
Unexecuted instantiation: _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
470
471
/// returns pointer to the right after parsed sequence or null on failed parsing
472
60.8k
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
473
60.8k
    if (parse_ipv6(
474
2.22M
                src, [&src, end]() { return src == end; }, dst))
475
55.4k
        return src;
476
5.42k
    return nullptr;
477
60.8k
}
478
479
/// returns true if whole buffer was parsed successfully
480
60.8k
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
481
60.8k
    return parse_ipv6(src, end, dst) == end;
482
60.8k
}
483
484
/// returns pointer to the right after parsed sequence or null on failed parsing
485
0
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
486
0
    if (parse_ipv6(
487
0
                src, []() { return false; }, dst))
488
0
        return src;
489
0
    return nullptr;
490
0
}
491
492
/// returns true if whole null-terminated string was parsed successfully
493
0
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
494
0
    const char* end = parse_ipv6(src, dst);
495
0
    return end != nullptr && *end == '\0';
496
0
}
497
498
#include "common/compile_check_end.h"
499
} // namespace doris