Coverage Report

Created: 2026-03-14 18:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/format_ip.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <algorithm>
24
#include <array>
25
#include <bit>
26
#include <cstdint>
27
#include <cstring>
28
#include <utility>
29
30
#include "core/types.h"
31
#include "exec/common/hex.h"
32
#include "exec/common/string_utils/string_utils.h"
33
34
constexpr size_t IPV4_BINARY_LENGTH = 4;
35
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
36
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
37
constexpr size_t IPV4_MIN_NUM_VALUE = 0;          //num value of '0.0.0.0'
38
constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255'
39
constexpr int IPV4_MAX_OCTET_VALUE = 255;         //max value of octet
40
constexpr size_t IPV4_OCTET_BITS = 8;
41
constexpr size_t DECIMAL_BASE = 10;
42
constexpr size_t IPV6_BINARY_LENGTH = 16;
43
44
namespace doris {
45
#include "common/compile_check_begin.h"
46
47
extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table;
48
49
/** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd',
50
  * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1".
51
  *
52
  * Any number of the tail bytes can be masked with given mask string.
53
  *
54
  * Assumptions:
55
  *     src is IPV4_BINARY_LENGTH long,
56
  *     dst is IPV4_MAX_TEXT_LENGTH long,
57
  *     mask_tail_octets <= IPV4_BINARY_LENGTH
58
  *     mask_string is NON-NULL, if mask_tail_octets > 0.
59
  *
60
  * Examples:
61
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr);
62
  *         > dst == "127.0.0.1"
63
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx");
64
  *         > dst == "127.0.0.xxx"
65
  *     format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0");
66
  *         > dst == "127.0.0.0"
67
  */
68
inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst,
69
1.51M
                        uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") {
70
1.51M
    const size_t mask_length = mask_string ? strlen(mask_string) : 0;
71
1.51M
    const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets);
72
1.51M
    const size_t padding = std::min(4 - src_size, limit);
73
1.51M
    for (size_t octet = 0; octet < padding; ++octet) {
74
6
        *dst++ = '0';
75
6
        *dst++ = '.';
76
6
    }
77
78
7.55M
    for (size_t octet = 4 - src_size; octet < limit; ++octet) {
79
6.04M
        uint8_t value = 0;
80
        if constexpr (std::endian::native == std::endian::little)
81
6.04M
            value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]);
82
        else
83
            value = static_cast<uint8_t>(src[octet]);
84
6.04M
        const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second);
85
6.04M
        const char* str = one_byte_to_string_lookup_table[value].first;
86
87
6.04M
        memcpy(dst, str, len);
88
6.04M
        dst += len;
89
90
6.04M
        *dst++ = '.';
91
6.04M
    }
92
93
1.51M
    for (size_t mask = 0; mask < mask_tail_octets; ++mask) {
94
12
        memcpy(dst, mask_string, mask_length);
95
12
        dst += mask_length;
96
97
12
        *dst++ = '.';
98
12
    }
99
100
1.51M
    dst--;
101
1.51M
}
102
103
inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0,
104
1.50M
                        const char* mask_string = "xxx") {
105
1.50M
    format_ipv4(src, 4, dst, mask_tail_octets, mask_string);
106
1.50M
}
107
108
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string.
109
 *
110
 * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`,
111
 * which should be long enough.
112
 * That is "127.0.0.1" becomes 0x7f000001.
113
 *
114
 * In case of failure doesn't modify buffer pointed by `dst`.
115
 *
116
 * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
117
 *           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
118
 *           To parse strings use overloads below.
119
 *
120
 * @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
121
 * @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
122
 * @param dst         - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long.
123
 * @param first_octet - preparsed first octet
124
 * @return            - true if parsed successfully, false otherwise.
125
 */
126
template <typename T, typename EOFfunction>
127
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
128
182k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
182k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
182k
    UInt32 result = 0;
134
182k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
182k
    if (first_octet >= 0) {
136
461
        result |= first_octet << offset;
137
461
        offset -= IPV4_OCTET_BITS;
138
461
    }
139
140
712k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
712k
        if (eof()) {
142
66
            return false;
143
66
        }
144
145
712k
        UInt32 value = 0;
146
712k
        size_t len = 0;
147
2.08M
        while (is_numeric_ascii(*src) && len <= 3) {
148
1.54M
            value = value * DECIMAL_BASE + (*src - '0');
149
1.54M
            ++len;
150
1.54M
            ++src;
151
1.54M
            if (eof()) {
152
175k
                break;
153
175k
            }
154
1.54M
        }
155
712k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
6.31k
            return false;
157
6.31k
        }
158
706k
        result |= value << offset;
159
160
706k
        if (offset == 0) {
161
176k
            break;
162
176k
        }
163
706k
    }
164
165
176k
    memcpy(dst, &result, sizeof(result));
166
176k
    return true;
167
182k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
179k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
179k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
179k
    UInt32 result = 0;
134
179k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
179k
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
704k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
704k
        if (eof()) {
142
66
            return false;
143
66
        }
144
145
704k
        UInt32 value = 0;
146
704k
        size_t len = 0;
147
2.06M
        while (is_numeric_ascii(*src) && len <= 3) {
148
1.53M
            value = value * DECIMAL_BASE + (*src - '0');
149
1.53M
            ++len;
150
1.53M
            ++src;
151
1.53M
            if (eof()) {
152
175k
                break;
153
175k
            }
154
1.53M
        }
155
704k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
3.97k
            return false;
157
3.97k
        }
158
700k
        result |= value << offset;
159
160
700k
        if (offset == 0) {
161
175k
            break;
162
175k
        }
163
700k
    }
164
165
175k
    memcpy(dst, &result, sizeof(result));
166
175k
    return true;
167
179k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
3.39k
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
3.39k
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
3.39k
    UInt32 result = 0;
134
3.39k
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
3.39k
    if (first_octet >= 0) {
136
0
        result |= first_octet << offset;
137
0
        offset -= IPV4_OCTET_BITS;
138
0
    }
139
140
6.55k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
6.55k
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
6.55k
        UInt32 value = 0;
146
6.55k
        size_t len = 0;
147
17.0k
        while (is_numeric_ascii(*src) && len <= 3) {
148
10.5k
            value = value * DECIMAL_BASE + (*src - '0');
149
10.5k
            ++len;
150
10.5k
            ++src;
151
10.5k
            if (eof()) {
152
0
                break;
153
0
            }
154
10.5k
        }
155
6.55k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
2.33k
            return false;
157
2.33k
        }
158
4.21k
        result |= value << offset;
159
160
4.21k
        if (offset == 0) {
161
1.05k
            break;
162
1.05k
        }
163
4.21k
    }
164
165
1.05k
    memcpy(dst, &result, sizeof(result));
166
1.05k
    return true;
167
3.39k
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
393
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
393
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
393
    UInt32 result = 0;
134
393
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
393
    if (first_octet >= 0) {
136
393
        result |= first_octet << offset;
137
393
        offset -= IPV4_OCTET_BITS;
138
393
    }
139
140
1.17k
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
1.17k
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
1.17k
        UInt32 value = 0;
146
1.17k
        size_t len = 0;
147
3.13k
        while (is_numeric_ascii(*src) && len <= 3) {
148
2.34k
            value = value * DECIMAL_BASE + (*src - '0');
149
2.34k
            ++len;
150
2.34k
            ++src;
151
2.34k
            if (eof()) {
152
391
                break;
153
391
            }
154
2.34k
        }
155
1.17k
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
4
            return false;
157
4
        }
158
1.17k
        result |= value << offset;
159
160
1.17k
        if (offset == 0) {
161
389
            break;
162
389
        }
163
1.17k
    }
164
165
389
    memcpy(dst, &result, sizeof(result));
166
389
    return true;
167
393
}
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
128
68
inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) {
129
68
    if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) {
130
0
        return false;
131
0
    }
132
133
68
    UInt32 result = 0;
134
68
    int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS;
135
68
    if (first_octet >= 0) {
136
68
        result |= first_octet << offset;
137
68
        offset -= IPV4_OCTET_BITS;
138
68
    }
139
140
204
    for (; true; offset -= IPV4_OCTET_BITS, ++src) {
141
204
        if (eof()) {
142
0
            return false;
143
0
        }
144
145
204
        UInt32 value = 0;
146
204
        size_t len = 0;
147
642
        while (is_numeric_ascii(*src) && len <= 3) {
148
438
            value = value * DECIMAL_BASE + (*src - '0');
149
438
            ++len;
150
438
            ++src;
151
438
            if (eof()) {
152
0
                break;
153
0
            }
154
438
        }
155
204
        if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) {
156
0
            return false;
157
0
        }
158
204
        result |= value << offset;
159
160
204
        if (offset == 0) {
161
68
            break;
162
68
        }
163
204
    }
164
165
68
    memcpy(dst, &result, sizeof(result));
166
68
    return true;
167
68
}
168
169
/// returns pointer to the right after parsed sequence or null on failed parsing
170
179k
inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) {
171
179k
    if (parse_ipv4(
172
2.76M
                src, [&src, end]() { return src == end; }, dst)) {
173
175k
        return src;
174
175k
    }
175
4.03k
    return nullptr;
176
179k
}
177
178
/// returns true if whole buffer was parsed successfully
179
179k
inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) {
180
179k
    return parse_ipv4(src, end, dst) == end;
181
179k
}
182
183
/// returns pointer to the right after parsed sequence or null on failed parsing
184
3.39k
inline const char* parse_ipv4(const char* src, unsigned char* dst) {
185
3.39k
    if (parse_ipv4(
186
3.39k
                src, []() { return false; }, dst)) {
187
1.05k
        return src;
188
1.05k
    }
189
2.33k
    return nullptr;
190
3.39k
}
191
192
/// returns true if whole null-terminated string was parsed successfully
193
3.39k
inline bool parse_ipv4_whole(const char* src, unsigned char* dst) {
194
3.39k
    const char* end = parse_ipv4(src, dst);
195
3.39k
    return end != nullptr && *end == '\0';
196
3.39k
}
197
198
/// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base)
199
0
inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) {
200
0
    return value >= base ? 1 + int_log(value / base, base, value % base || carry)
201
0
                         : value % base > 1 || carry;
202
0
}
203
204
/// Print integer in desired base, faster than sprintf.
205
/// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark
206
/// But it doesn't matter here.
207
template <UInt32 base, typename T>
208
140k
inline void print_integer(char*& out, T value) {
209
140k
    if (value == 0) {
210
177
        *out++ = '0';
211
139k
    } else {
212
139k
        constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false);
213
214
139k
        char buf[buffer_size];
215
139k
        auto ptr = buf;
216
217
584k
        while (value > 0) {
218
444k
            *ptr = hex_digit_lowercase(value % base);
219
444k
            ++ptr;
220
444k
            value /= base;
221
444k
        }
222
223
        /// Copy to out reversed.
224
584k
        while (ptr != buf) {
225
444k
            --ptr;
226
444k
            *out = *ptr;
227
444k
            ++out;
228
444k
        }
229
139k
    }
230
140k
}
231
232
/** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c
233
  * performs significantly faster than the reference implementation due to the absence of sprintf calls,
234
  * bounds checking, unnecessary string copying and length calculation.
235
  * @param src         - pointer to IPv6 (16 bytes) stored in little-endian byte order
236
  * @param dst         - where to put format result bytes
237
  * @param zeroed_tail_bytes_count - the parameter is currently not being used
238
  */
239
1.49M
inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) {
240
1.49M
    struct {
241
1.49M
        Int64 base, len;
242
1.49M
    } best {-1, 0}, cur {-1, 0};
243
1.49M
    std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {};
244
245
    // the current function logic is processed in big endian manner
246
    // but ipv6 in doris is stored in little-endian byte order
247
    // so transfer to big-endian byte order first
248
    // compatible with parse_ipv6 function in format_ip.h
249
1.49M
    std::reverse(src, src + IPV6_BINARY_LENGTH);
250
251
    /** Preprocess:
252
        *    Copy the input (bytewise) array into a wordwise array.
253
        *    Find the longest run of 0x00's in src[] for :: shorthanding. */
254
13.4M
    for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) {
255
11.9M
        words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1];
256
11.9M
    }
257
258
13.4M
    for (size_t i = 0; i < words.size(); i++) {
259
11.9M
        if (words[i] == 0) {
260
11.8M
            if (cur.base == -1) {
261
1.49M
                cur.base = i;
262
1.49M
                cur.len = 1;
263
10.3M
            } else {
264
10.3M
                cur.len++;
265
10.3M
            }
266
11.8M
        } else {
267
140k
            if (cur.base != -1) {
268
36.7k
                if (best.base == -1 || cur.len > best.len) {
269
36.7k
                    best = cur;
270
36.7k
                }
271
36.7k
                cur.base = -1;
272
36.7k
            }
273
140k
        }
274
11.9M
    }
275
276
1.49M
    if (cur.base != -1) {
277
1.45M
        if (best.base == -1 || cur.len > best.len) {
278
1.45M
            best = cur;
279
1.45M
        }
280
1.45M
    }
281
1.49M
    if (best.base != -1 && best.len < 2) {
282
60
        best.base = -1;
283
60
    }
284
285
    /// Format the result.
286
13.4M
    for (size_t i = 0; i < words.size(); i++) {
287
        /// Are we inside the best run of 0x00's?
288
11.9M
        if (best.base != -1) {
289
11.9M
            auto best_base = static_cast<size_t>(best.base);
290
11.9M
            if (i >= best_base && i < (best_base + best.len)) {
291
11.8M
                if (i == best_base) {
292
1.49M
                    *dst++ = ':';
293
1.49M
                }
294
11.8M
                continue;
295
11.8M
            }
296
11.9M
        }
297
        /// Are we following an initial run of 0x00s or any real hex?
298
140k
        if (i != 0) {
299
101k
            *dst++ = ':';
300
101k
        }
301
        /// Is this address an encapsulated IPv4?
302
140k
        if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) {
303
84
            uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0};
304
84
            memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH);
305
            // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format.
306
84
            if constexpr (std::endian::native == std::endian::little) {
307
84
                std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer));
308
84
            }
309
84
            format_ipv4(ipv4_buffer, dst,
310
84
                        std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)),
311
84
                        "0");
312
            // format_ipv4 has already added a null-terminator for us.
313
84
            return;
314
84
        }
315
140k
        print_integer<16>(dst, words[i]);
316
140k
    }
317
318
    /// Was it a trailing run of 0x00's?
319
1.49M
    if (best.base != -1 &&
320
1.49M
        static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) {
321
1.45M
        *dst++ = ':';
322
1.45M
    }
323
1.49M
}
324
325
/** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string.
326
*
327
* Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`,
328
* which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`.
329
*
330
* WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position())
331
*           and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity.
332
*           To parse strings use overloads below.
333
*
334
* @param src         - iterator (reference to pointer) over input string - warning - continuity is not guaranteed.
335
* @param eof         - function returning true if iterator riched the end - warning - can break iterator's continuity.
336
* @param dst         - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long.
337
* @param first_block - preparsed first block
338
* @return            - true if parsed successfully, false otherwise.
339
*/
340
template <typename T, typename EOFfunction>
341
    requires(std::is_same<typename std::remove_cv<T>::type, char>::value)
342
340k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
340k
    const auto clear_dst = [dst]() {
344
8.08k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
8.08k
        return false;
346
8.08k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
343
7.26k
    const auto clear_dst = [dst]() {
344
7.26k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
7.26k
        return false;
346
7.26k
    };
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv
Line
Count
Source
343
820
    const auto clear_dst = [dst]() {
344
820
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
820
        return false;
346
820
    };
347
348
340k
    if (src == nullptr || eof()) return clear_dst();
349
350
340k
    int groups = 0;            /// number of parsed groups
351
340k
    unsigned char* iter = dst; /// iterator over dst buffer
352
340k
    unsigned char* zptr =
353
340k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
340k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
340k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
340k
    bool group_start = true;
368
369
2.83M
    while (!eof() && groups < 8) {
370
2.49M
        if (*src == ':') {
371
2.12M
            ++src;
372
2.12M
            if (eof()) /// trailing colon is not allowed
373
32
                return clear_dst();
374
375
2.12M
            group_start = true;
376
377
2.12M
            if (*src == ':') {
378
62.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
159
                    return clear_dst();
380
61.9k
                zptr = iter;
381
61.9k
                ++src;
382
61.9k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
10
                    return clear_dst();
385
10
                }
386
61.9k
                continue;
387
61.9k
            }
388
2.06M
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
2.06M
        }
391
392
        /// mixed IPv4 parsing
393
2.43M
        if (*src == '.') {
394
1.05k
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
598
                return clear_dst();
396
397
461
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
461
            ++src;
401
461
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
461
            --groups;
405
461
            iter -= 2;
406
407
461
            UInt16 num = 0;
408
1.38k
            for (int i = 0; i < 2; ++i) {
409
922
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
922
                unsigned char second = iter[i] & 0x0fu;
411
922
                if (first > 9 || second > 9) return clear_dst();
412
922
                (num *= 100) += first * 10 + second;
413
922
            }
414
461
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
461
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
457
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
457
            iter += 4;
423
457
            groups += 2;
424
457
            break; /// IPv4 block is the last - end of parsing
425
461
        }
426
427
2.43M
        if (!group_start) /// end of parsing
428
1.61k
            break;
429
2.43M
        group_start = false;
430
431
2.43M
        UInt16 val = 0;  /// current decoded group
432
2.43M
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
9.07M
            UInt8 num = unhex(*src);
436
9.07M
            if (num == 0xFF) break;
437
8.70M
            (val <<= 4) |= num;
438
8.70M
        }
439
440
2.43M
        if (xdigits == 0) /// end of parsing
441
3.04k
            break;
442
443
2.43M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
2.43M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
2.43M
        ++groups;
446
2.43M
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
339k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
332k
    if (zptr != nullptr) {
453
61.7k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
4
            return clear_dst();
457
4
        }
458
61.7k
        size_t msize = iter - zptr;
459
61.7k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
61.7k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
61.7k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
332k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
332k
    return true;
469
332k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
342
338k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
338k
    const auto clear_dst = [dst]() {
344
338k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
338k
        return false;
346
338k
    };
347
348
338k
    if (src == nullptr || eof()) return clear_dst();
349
350
338k
    int groups = 0;            /// number of parsed groups
351
338k
    unsigned char* iter = dst; /// iterator over dst buffer
352
338k
    unsigned char* zptr =
353
338k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
338k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
338k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
338k
    bool group_start = true;
368
369
2.82M
    while (!eof() && groups < 8) {
370
2.49M
        if (*src == ':') {
371
2.12M
            ++src;
372
2.12M
            if (eof()) /// trailing colon is not allowed
373
32
                return clear_dst();
374
375
2.12M
            group_start = true;
376
377
2.12M
            if (*src == ':') {
378
61.1k
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
159
                    return clear_dst();
380
60.9k
                zptr = iter;
381
60.9k
                ++src;
382
60.9k
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
10
                    return clear_dst();
385
10
                }
386
60.9k
                continue;
387
60.9k
            }
388
2.06M
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
2.06M
        }
391
392
        /// mixed IPv4 parsing
393
2.43M
        if (*src == '.') {
394
991
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
598
                return clear_dst();
396
397
393
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
393
            ++src;
401
393
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
393
            --groups;
405
393
            iter -= 2;
406
407
393
            UInt16 num = 0;
408
1.17k
            for (int i = 0; i < 2; ++i) {
409
786
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
786
                unsigned char second = iter[i] & 0x0fu;
411
786
                if (first > 9 || second > 9) return clear_dst();
412
786
                (num *= 100) += first * 10 + second;
413
786
            }
414
393
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
393
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
389
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
389
            iter += 4;
423
389
            groups += 2;
424
389
            break; /// IPv4 block is the last - end of parsing
425
393
        }
426
427
2.43M
        if (!group_start) /// end of parsing
428
823
            break;
429
2.43M
        group_start = false;
430
431
2.43M
        UInt16 val = 0;  /// current decoded group
432
2.43M
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
11.1M
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
9.06M
            UInt8 num = unhex(*src);
436
9.06M
            if (num == 0xFF) break;
437
8.69M
            (val <<= 4) |= num;
438
8.69M
        }
439
440
2.43M
        if (xdigits == 0) /// end of parsing
441
2.13k
            break;
442
443
2.42M
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
2.42M
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
2.42M
        ++groups;
446
2.42M
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
337k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
330k
    if (zptr != nullptr) {
453
60.7k
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
4
            return clear_dst();
457
4
        }
458
60.7k
        size_t msize = iter - zptr;
459
60.7k
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
60.7k
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
60.7k
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
330k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
330k
    return true;
469
330k
}
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i
Line
Count
Source
342
1.87k
inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) {
343
1.87k
    const auto clear_dst = [dst]() {
344
1.87k
        std::memset(dst, '\0', IPV6_BINARY_LENGTH);
345
1.87k
        return false;
346
1.87k
    };
347
348
1.87k
    if (src == nullptr || eof()) return clear_dst();
349
350
1.87k
    int groups = 0;            /// number of parsed groups
351
1.87k
    unsigned char* iter = dst; /// iterator over dst buffer
352
1.87k
    unsigned char* zptr =
353
1.87k
            nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started
354
355
1.87k
    std::memset(dst, '\0', IPV6_BINARY_LENGTH);
356
357
1.87k
    if (first_block >= 0) {
358
0
        *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu);
359
0
        *iter++ = static_cast<unsigned char>(first_block & 0xffu);
360
0
        if (*src == ':') {
361
0
            zptr = iter;
362
0
            ++src;
363
0
        }
364
0
        ++groups;
365
0
    }
366
367
1.87k
    bool group_start = true;
368
369
6.09k
    while (!eof() && groups < 8) {
370
5.99k
        if (*src == ':') {
371
2.41k
            ++src;
372
2.41k
            if (eof()) /// trailing colon is not allowed
373
0
                return clear_dst();
374
375
2.41k
            group_start = true;
376
377
2.41k
            if (*src == ':') {
378
948
                if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed
379
0
                    return clear_dst();
380
948
                zptr = iter;
381
948
                ++src;
382
948
                if (!eof() && *src == ':') {
383
                    /// more than one all-zeroes block is not allowed
384
0
                    return clear_dst();
385
0
                }
386
948
                continue;
387
948
            }
388
1.46k
            if (groups == 0) /// leading colon is not allowed
389
0
                return clear_dst();
390
1.46k
        }
391
392
        /// mixed IPv4 parsing
393
5.04k
        if (*src == '.') {
394
68
            if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first
395
0
                return clear_dst();
396
397
68
            if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group
398
0
                return clear_dst();
399
400
68
            ++src;
401
68
            if (eof()) return clear_dst();
402
403
            /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4
404
68
            --groups;
405
68
            iter -= 2;
406
407
68
            UInt16 num = 0;
408
204
            for (int i = 0; i < 2; ++i) {
409
136
                unsigned char first = (iter[i] >> 4) & 0x0fu;
410
136
                unsigned char second = iter[i] & 0x0fu;
411
136
                if (first > 9 || second > 9) return clear_dst();
412
136
                (num *= 100) += first * 10 + second;
413
136
            }
414
68
            if (num > 255) return clear_dst();
415
416
            /// parse IPv4 with known first octet
417
68
            if (!parse_ipv4(src, eof, iter, num)) return clear_dst();
418
419
            if constexpr (std::endian::native == std::endian::little)
420
68
                std::reverse(iter, iter + IPV4_BINARY_LENGTH);
421
422
68
            iter += 4;
423
68
            groups += 2;
424
68
            break; /// IPv4 block is the last - end of parsing
425
68
        }
426
427
4.97k
        if (!group_start) /// end of parsing
428
793
            break;
429
4.18k
        group_start = false;
430
431
4.18k
        UInt16 val = 0;  /// current decoded group
432
4.18k
        int xdigits = 0; /// number of decoded hex digits in current group
433
434
14.7k
        for (; !eof() && xdigits < 4; ++src, ++xdigits) {
435
12.9k
            UInt8 num = unhex(*src);
436
12.9k
            if (num == 0xFF) break;
437
10.6k
            (val <<= 4) |= num;
438
10.6k
        }
439
440
4.18k
        if (xdigits == 0) /// end of parsing
441
907
            break;
442
443
3.27k
        *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu);
444
3.27k
        *iter++ = static_cast<unsigned char>(val & 0xffu);
445
3.27k
        ++groups;
446
3.27k
    }
447
448
    /// either all 8 groups or all-zeroes block should be present
449
1.87k
    if (groups < 8 && zptr == nullptr) return clear_dst();
450
451
    /// process all-zeroes block
452
1.05k
    if (zptr != nullptr) {
453
948
        if (groups == 8) {
454
            /// all-zeroes block at least should be one
455
            /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid
456
0
            return clear_dst();
457
0
        }
458
948
        size_t msize = iter - zptr;
459
948
        std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize);
460
948
        std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst));
461
948
    }
462
463
    /// the current function logic is processed in big endian manner
464
    /// but ipv6 in doris is stored in little-endian byte order
465
    /// so transfer to little-endian
466
1.05k
    std::reverse(dst, dst + IPV6_BINARY_LENGTH);
467
468
1.05k
    return true;
469
1.05k
}
470
471
/// returns pointer to the right after parsed sequence or null on failed parsing
472
338k
inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) {
473
338k
    if (parse_ipv6(
474
16.4M
                src, [&src, end]() { return src == end; }, dst))
475
330k
        return src;
476
7.26k
    return nullptr;
477
338k
}
478
479
/// returns true if whole buffer was parsed successfully
480
338k
inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) {
481
338k
    return parse_ipv6(src, end, dst) == end;
482
338k
}
483
484
/// returns pointer to the right after parsed sequence or null on failed parsing
485
1.87k
inline const char* parse_ipv6(const char* src, unsigned char* dst) {
486
1.87k
    if (parse_ipv6(
487
1.87k
                src, []() { return false; }, dst))
488
1.05k
        return src;
489
820
    return nullptr;
490
1.87k
}
491
492
/// returns true if whole null-terminated string was parsed successfully
493
1.87k
inline bool parse_ipv6_whole(const char* src, unsigned char* dst) {
494
1.87k
    const char* end = parse_ipv6(src, dst);
495
1.87k
    return end != nullptr && *end == '\0';
496
1.87k
}
497
498
#include "common/compile_check_end.h"
499
} // namespace doris