be/src/exec/common/format_ip.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <algorithm> |
24 | | #include <array> |
25 | | #include <bit> |
26 | | #include <cstdint> |
27 | | #include <cstring> |
28 | | #include <utility> |
29 | | |
30 | | #include "core/types.h" |
31 | | #include "exec/common/hex.h" |
32 | | #include "exec/common/string_utils/string_utils.h" |
33 | | |
34 | | constexpr size_t IPV4_BINARY_LENGTH = 4; |
35 | | constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. |
36 | | constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; |
37 | | constexpr size_t IPV4_MIN_NUM_VALUE = 0; //num value of '0.0.0.0' |
38 | | constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255' |
39 | | constexpr int IPV4_MAX_OCTET_VALUE = 255; //max value of octet |
40 | | constexpr size_t IPV4_OCTET_BITS = 8; |
41 | | constexpr size_t DECIMAL_BASE = 10; |
42 | | constexpr size_t IPV6_BINARY_LENGTH = 16; |
43 | | |
44 | | namespace doris { |
45 | | |
46 | | extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table; |
47 | | |
48 | | /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', |
49 | | * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1". |
50 | | * |
51 | | * Any number of the tail bytes can be masked with given mask string. |
52 | | * |
53 | | * Assumptions: |
54 | | * src is IPV4_BINARY_LENGTH long, |
55 | | * dst is IPV4_MAX_TEXT_LENGTH long, |
56 | | * mask_tail_octets <= IPV4_BINARY_LENGTH |
57 | | * mask_string is NON-NULL, if mask_tail_octets > 0. |
58 | | * |
59 | | * Examples: |
60 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); |
61 | | * > dst == "127.0.0.1" |
62 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); |
63 | | * > dst == "127.0.0.xxx" |
64 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); |
65 | | * > dst == "127.0.0.0" |
66 | | */ |
67 | | inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst, |
68 | 1.49M | uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") { |
69 | 1.49M | const size_t mask_length = mask_string ? strlen(mask_string) : 0; |
70 | 1.49M | const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); |
71 | 1.49M | const size_t padding = std::min(4 - src_size, limit); |
72 | 1.49M | for (size_t octet = 0; octet < padding; ++octet) { |
73 | 0 | *dst++ = '0'; |
74 | 0 | *dst++ = '.'; |
75 | 0 | } |
76 | | |
77 | 7.49M | for (size_t octet = 4 - src_size; octet < limit; ++octet) { |
78 | 5.99M | uint8_t value = 0; |
79 | | if constexpr (std::endian::native == std::endian::little) |
80 | 5.99M | value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]); |
81 | | else |
82 | | value = static_cast<uint8_t>(src[octet]); |
83 | 5.99M | const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second); |
84 | 5.99M | const char* str = one_byte_to_string_lookup_table[value].first; |
85 | | |
86 | 5.99M | memcpy(dst, str, len); |
87 | 5.99M | dst += len; |
88 | | |
89 | 5.99M | *dst++ = '.'; |
90 | 5.99M | } |
91 | | |
92 | 1.49M | for (size_t mask = 0; mask < mask_tail_octets; ++mask) { |
93 | 6 | memcpy(dst, mask_string, mask_length); |
94 | 6 | dst += mask_length; |
95 | | |
96 | 6 | *dst++ = '.'; |
97 | 6 | } |
98 | | |
99 | 1.49M | dst--; |
100 | 1.49M | } |
101 | | |
102 | | inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0, |
103 | 1.49M | const char* mask_string = "xxx") { |
104 | 1.49M | format_ipv4(src, 4, dst, mask_tail_octets, mask_string); |
105 | 1.49M | } |
106 | | |
107 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. |
108 | | * |
109 | | * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, |
110 | | * which should be long enough. |
111 | | * That is "127.0.0.1" becomes 0x7f000001. |
112 | | * |
113 | | * In case of failure doesn't modify buffer pointed by `dst`. |
114 | | * |
115 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
116 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
117 | | * To parse strings use overloads below. |
118 | | * |
119 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
120 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
121 | | * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. |
122 | | * @param first_octet - preparsed first octet |
123 | | * @return - true if parsed successfully, false otherwise. |
124 | | */ |
125 | | template <typename T, typename EOFfunction> |
126 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
127 | 83.4k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { |
128 | 83.4k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { |
129 | 0 | return false; |
130 | 0 | } |
131 | | |
132 | 83.4k | UInt32 result = 0; |
133 | 83.4k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; |
134 | 83.4k | if (first_octet >= 0) { |
135 | 78 | result |= first_octet << offset; |
136 | 78 | offset -= IPV4_OCTET_BITS; |
137 | 78 | } |
138 | | |
139 | 329k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { |
140 | 329k | if (eof()) { |
141 | 61 | return false; |
142 | 61 | } |
143 | | |
144 | 329k | UInt32 value = 0; |
145 | 329k | size_t len = 0; |
146 | 824k | while (is_numeric_ascii(*src) && len <= 3) { |
147 | 576k | value = value * DECIMAL_BASE + (*src - '0'); |
148 | 576k | ++len; |
149 | 576k | ++src; |
150 | 576k | if (eof()) { |
151 | 81.8k | break; |
152 | 81.8k | } |
153 | 576k | } |
154 | 329k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { |
155 | 1.53k | return false; |
156 | 1.53k | } |
157 | 328k | result |= value << offset; |
158 | | |
159 | 328k | if (offset == 0) { |
160 | 81.8k | break; |
161 | 81.8k | } |
162 | 328k | } |
163 | | |
164 | 81.8k | memcpy(dst, &result, sizeof(result)); |
165 | 81.8k | return true; |
166 | 83.4k | } _ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 127 | 83.3k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 128 | 83.3k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 129 | 0 | return false; | 130 | 0 | } | 131 | | | 132 | 83.3k | UInt32 result = 0; | 133 | 83.3k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 134 | 83.3k | if (first_octet >= 0) { | 135 | 0 | result |= first_octet << offset; | 136 | 0 | offset -= IPV4_OCTET_BITS; | 137 | 0 | } | 138 | | | 139 | 329k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 140 | 329k | if (eof()) { | 141 | 61 | return false; | 142 | 61 | } | 143 | | | 144 | 329k | UInt32 value = 0; | 145 | 329k | size_t len = 0; | 146 | 823k | while (is_numeric_ascii(*src) && len <= 3) { | 147 | 576k | value = value * DECIMAL_BASE + (*src - '0'); | 148 | 576k | ++len; | 149 | 576k | ++src; | 150 | 576k | if (eof()) { | 151 | 81.7k | break; | 152 | 81.7k | } | 153 | 576k | } | 154 | 329k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 155 | 1.53k | return false; | 156 | 1.53k | } | 157 | 327k | result |= value << offset; | 158 | | | 159 | 327k | if (offset == 0) { | 160 | 81.7k | break; | 161 | 81.7k | } | 162 | 327k | } | 163 | | | 164 | 81.7k | memcpy(dst, &result, sizeof(result)); | 165 | 81.7k | return true; | 166 | 83.3k | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 127 | 1 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 128 | 1 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 129 | 0 | return false; | 130 | 0 | } | 131 | | | 132 | 1 | UInt32 result = 0; | 133 | 1 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 134 | 1 | if (first_octet >= 0) { | 135 | 0 | result |= first_octet << offset; | 136 | 0 | offset -= IPV4_OCTET_BITS; | 137 | 0 | } | 138 | | | 139 | 4 | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 140 | 4 | if (eof()) { | 141 | 0 | return false; | 142 | 0 | } | 143 | | | 144 | 4 | UInt32 value = 0; | 145 | 4 | size_t len = 0; | 146 | 10 | while (is_numeric_ascii(*src) && len <= 3) { | 147 | 6 | value = value * DECIMAL_BASE + (*src - '0'); | 148 | 6 | ++len; | 149 | 6 | ++src; | 150 | 6 | if (eof()) { | 151 | 0 | break; | 152 | 0 | } | 153 | 6 | } | 154 | 4 | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 155 | 0 | return false; | 156 | 0 | } | 157 | 4 | result |= value << offset; | 158 | | | 159 | 4 | if (offset == 0) { | 160 | 1 | break; | 161 | 1 | } | 162 | 4 | } | 163 | | | 164 | 1 | memcpy(dst, &result, sizeof(result)); | 165 | 1 | return true; | 166 | 1 | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 127 | 78 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 128 | 78 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 129 | 0 | return false; | 130 | 0 | } | 131 | | | 132 | 78 | UInt32 result = 0; | 133 | 78 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 134 | 78 | if (first_octet >= 0) { | 135 | 78 | result |= first_octet << offset; | 136 | 78 | offset -= IPV4_OCTET_BITS; | 137 | 78 | } | 138 | | | 139 | 233 | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 140 | 233 | if (eof()) { | 141 | 0 | return false; | 142 | 0 | } | 143 | | | 144 | 233 | UInt32 value = 0; | 145 | 233 | size_t len = 0; | 146 | 659 | while (is_numeric_ascii(*src) && len <= 3) { | 147 | 503 | value = value * DECIMAL_BASE + (*src - '0'); | 148 | 503 | ++len; | 149 | 503 | ++src; | 150 | 503 | if (eof()) { | 151 | 77 | break; | 152 | 77 | } | 153 | 503 | } | 154 | 233 | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 155 | 2 | return false; | 156 | 2 | } | 157 | 231 | result |= value << offset; | 158 | | | 159 | 231 | if (offset == 0) { | 160 | 76 | break; | 161 | 76 | } | 162 | 231 | } | 163 | | | 164 | 76 | memcpy(dst, &result, sizeof(result)); | 165 | 76 | return true; | 166 | 78 | } |
Unexecuted instantiation: _ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i |
167 | | |
168 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
169 | 83.3k | inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) { |
170 | 83.3k | if (parse_ipv4( |
171 | 1.15M | src, [&src, end]() { return src == end; }, dst)) { |
172 | 81.7k | return src; |
173 | 81.7k | } |
174 | 1.59k | return nullptr; |
175 | 83.3k | } |
176 | | |
177 | | /// returns true if whole buffer was parsed successfully |
178 | 83.3k | inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) { |
179 | 83.3k | return parse_ipv4(src, end, dst) == end; |
180 | 83.3k | } |
181 | | |
182 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
183 | 1 | inline const char* parse_ipv4(const char* src, unsigned char* dst) { |
184 | 1 | if (parse_ipv4( |
185 | 1 | src, []() { return false; }, dst)) { |
186 | 1 | return src; |
187 | 1 | } |
188 | 0 | return nullptr; |
189 | 1 | } |
190 | | |
191 | | /// returns true if whole null-terminated string was parsed successfully |
192 | 1 | inline bool parse_ipv4_whole(const char* src, unsigned char* dst) { |
193 | 1 | const char* end = parse_ipv4(src, dst); |
194 | 1 | return end != nullptr && *end == '\0'; |
195 | 1 | } |
196 | | |
197 | | /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) |
198 | 0 | inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) { |
199 | 0 | return value >= base ? 1 + int_log(value / base, base, value % base || carry) |
200 | 0 | : value % base > 1 || carry; |
201 | 0 | } |
202 | | |
203 | | /// Print integer in desired base, faster than sprintf. |
204 | | /// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark |
205 | | /// But it doesn't matter here. |
206 | | template <UInt32 base, typename T> |
207 | 101k | inline void print_integer(char*& out, T value) { |
208 | 101k | if (value == 0) { |
209 | 1 | *out++ = '0'; |
210 | 101k | } else { |
211 | 101k | constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false); |
212 | | |
213 | 101k | char buf[buffer_size]; |
214 | 101k | auto ptr = buf; |
215 | | |
216 | 417k | while (value > 0) { |
217 | 315k | *ptr = hex_digit_lowercase(value % base); |
218 | 315k | ++ptr; |
219 | 315k | value /= base; |
220 | 315k | } |
221 | | |
222 | | /// Copy to out reversed. |
223 | 417k | while (ptr != buf) { |
224 | 315k | --ptr; |
225 | 315k | *out = *ptr; |
226 | 315k | ++out; |
227 | 315k | } |
228 | 101k | } |
229 | 101k | } |
230 | | |
231 | | /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
232 | | * performs significantly faster than the reference implementation due to the absence of sprintf calls, |
233 | | * bounds checking, unnecessary string copying and length calculation. |
234 | | * @param src - pointer to IPv6 (16 bytes) stored in little-endian byte order |
235 | | * @param dst - where to put format result bytes |
236 | | * @param zeroed_tail_bytes_count - the parameter is currently not being used |
237 | | */ |
238 | 1.48M | inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) { |
239 | 1.48M | struct { |
240 | 1.48M | Int64 base, len; |
241 | 1.48M | } best {-1, 0}, cur {-1, 0}; |
242 | 1.48M | std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {}; |
243 | | |
244 | | // the current function logic is processed in big endian manner |
245 | | // but ipv6 in doris is stored in little-endian byte order |
246 | | // so transfer to big-endian byte order first |
247 | | // compatible with parse_ipv6 function in format_ip.h |
248 | 1.48M | std::reverse(src, src + IPV6_BINARY_LENGTH); |
249 | | |
250 | | /** Preprocess: |
251 | | * Copy the input (bytewise) array into a wordwise array. |
252 | | * Find the longest run of 0x00's in src[] for :: shorthanding. */ |
253 | 13.3M | for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) { |
254 | 11.8M | words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1]; |
255 | 11.8M | } |
256 | | |
257 | 13.3M | for (size_t i = 0; i < words.size(); i++) { |
258 | 11.8M | if (words[i] == 0) { |
259 | 11.7M | if (cur.base == -1) { |
260 | 1.48M | cur.base = i; |
261 | 1.48M | cur.len = 1; |
262 | 10.3M | } else { |
263 | 10.3M | cur.len++; |
264 | 10.3M | } |
265 | 11.7M | } else { |
266 | 101k | if (cur.base != -1) { |
267 | 28.2k | if (best.base == -1 || cur.len > best.len) { |
268 | 28.2k | best = cur; |
269 | 28.2k | } |
270 | 28.2k | cur.base = -1; |
271 | 28.2k | } |
272 | 101k | } |
273 | 11.8M | } |
274 | | |
275 | 1.48M | if (cur.base != -1) { |
276 | 1.45M | if (best.base == -1 || cur.len > best.len) { |
277 | 1.45M | best = cur; |
278 | 1.45M | } |
279 | 1.45M | } |
280 | 1.48M | if (best.base != -1 && best.len < 2) { |
281 | 0 | best.base = -1; |
282 | 0 | } |
283 | | |
284 | | /// Format the result. |
285 | 13.3M | for (size_t i = 0; i < words.size(); i++) { |
286 | | /// Are we inside the best run of 0x00's? |
287 | 11.8M | if (best.base != -1) { |
288 | 11.8M | auto best_base = static_cast<size_t>(best.base); |
289 | 11.8M | if (i >= best_base && i < (best_base + best.len)) { |
290 | 11.7M | if (i == best_base) { |
291 | 1.48M | *dst++ = ':'; |
292 | 1.48M | } |
293 | 11.7M | continue; |
294 | 11.7M | } |
295 | 11.8M | } |
296 | | /// Are we following an initial run of 0x00s or any real hex? |
297 | 101k | if (i != 0) { |
298 | 71.8k | *dst++ = ':'; |
299 | 71.8k | } |
300 | | /// Is this address an encapsulated IPv4? |
301 | 101k | if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { |
302 | 9 | uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; |
303 | 9 | memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); |
304 | | // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. |
305 | 9 | if constexpr (std::endian::native == std::endian::little) { |
306 | 9 | std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); |
307 | 9 | } |
308 | 9 | format_ipv4(ipv4_buffer, dst, |
309 | 9 | std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), |
310 | 9 | "0"); |
311 | | // format_ipv4 has already added a null-terminator for us. |
312 | 9 | return; |
313 | 9 | } |
314 | 101k | print_integer<16>(dst, words[i]); |
315 | 101k | } |
316 | | |
317 | | /// Was it a trailing run of 0x00's? |
318 | 1.48M | if (best.base != -1 && |
319 | 1.48M | static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) { |
320 | 1.45M | *dst++ = ':'; |
321 | 1.45M | } |
322 | 1.48M | } |
323 | | |
324 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. |
325 | | * |
326 | | * Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`, |
327 | | * which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. |
328 | | * |
329 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
330 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
331 | | * To parse strings use overloads below. |
332 | | * |
333 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
334 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
335 | | * @param dst - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long. |
336 | | * @param first_block - preparsed first block |
337 | | * @return - true if parsed successfully, false otherwise. |
338 | | */ |
339 | | template <typename T, typename EOFfunction> |
340 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
341 | 60.8k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { |
342 | 60.8k | const auto clear_dst = [dst]() { |
343 | 5.42k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
344 | 5.42k | return false; |
345 | 5.42k | }; _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv Line | Count | Source | 342 | 5.42k | const auto clear_dst = [dst]() { | 343 | 5.42k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 344 | 5.42k | return false; | 345 | 5.42k | }; |
Unexecuted instantiation: _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv |
346 | | |
347 | 60.8k | if (src == nullptr || eof()) return clear_dst(); |
348 | | |
349 | 60.8k | int groups = 0; /// number of parsed groups |
350 | 60.8k | unsigned char* iter = dst; /// iterator over dst buffer |
351 | 60.8k | unsigned char* zptr = |
352 | 60.8k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started |
353 | | |
354 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
355 | | |
356 | 60.8k | if (first_block >= 0) { |
357 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); |
358 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); |
359 | 0 | if (*src == ':') { |
360 | 0 | zptr = iter; |
361 | 0 | ++src; |
362 | 0 | } |
363 | 0 | ++groups; |
364 | 0 | } |
365 | | |
366 | 60.8k | bool group_start = true; |
367 | | |
368 | 495k | while (!eof() && groups < 8) { |
369 | 436k | if (*src == ':') { |
370 | 372k | ++src; |
371 | 372k | if (eof()) /// trailing colon is not allowed |
372 | 31 | return clear_dst(); |
373 | | |
374 | 372k | group_start = true; |
375 | | |
376 | 372k | if (*src == ':') { |
377 | 3.59k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed |
378 | 152 | return clear_dst(); |
379 | 3.43k | zptr = iter; |
380 | 3.43k | ++src; |
381 | 3.43k | if (!eof() && *src == ':') { |
382 | | /// more than one all-zeroes block is not allowed |
383 | 5 | return clear_dst(); |
384 | 5 | } |
385 | 3.43k | continue; |
386 | 3.43k | } |
387 | 368k | if (groups == 0) /// leading colon is not allowed |
388 | 0 | return clear_dst(); |
389 | 368k | } |
390 | | |
391 | | /// mixed IPv4 parsing |
392 | 433k | if (*src == '.') { |
393 | 78 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first |
394 | 0 | return clear_dst(); |
395 | | |
396 | 78 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group |
397 | 0 | return clear_dst(); |
398 | | |
399 | 78 | ++src; |
400 | 78 | if (eof()) return clear_dst(); |
401 | | |
402 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 |
403 | 78 | --groups; |
404 | 78 | iter -= 2; |
405 | | |
406 | 78 | UInt16 num = 0; |
407 | 234 | for (int i = 0; i < 2; ++i) { |
408 | 156 | unsigned char first = (iter[i] >> 4) & 0x0fu; |
409 | 156 | unsigned char second = iter[i] & 0x0fu; |
410 | 156 | if (first > 9 || second > 9) return clear_dst(); |
411 | 156 | (num *= 100) += first * 10 + second; |
412 | 156 | } |
413 | 78 | if (num > 255) return clear_dst(); |
414 | | |
415 | | /// parse IPv4 with known first octet |
416 | 78 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); |
417 | | |
418 | | if constexpr (std::endian::native == std::endian::little) |
419 | 76 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); |
420 | | |
421 | 76 | iter += 4; |
422 | 76 | groups += 2; |
423 | 76 | break; /// IPv4 block is the last - end of parsing |
424 | 78 | } |
425 | | |
426 | 433k | if (!group_start) /// end of parsing |
427 | 815 | break; |
428 | 432k | group_start = false; |
429 | | |
430 | 432k | UInt16 val = 0; /// current decoded group |
431 | 432k | int xdigits = 0; /// number of decoded hex digits in current group |
432 | | |
433 | 1.28M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { |
434 | 1.10M | UInt8 num = unhex(*src); |
435 | 1.10M | if (num == 0xFF) break; |
436 | 857k | (val <<= 4) |= num; |
437 | 857k | } |
438 | | |
439 | 432k | if (xdigits == 0) /// end of parsing |
440 | 920 | break; |
441 | | |
442 | 431k | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
443 | 431k | *iter++ = static_cast<unsigned char>(val & 0xffu); |
444 | 431k | ++groups; |
445 | 431k | } |
446 | | |
447 | | /// either all 8 groups or all-zeroes block should be present |
448 | 60.6k | if (groups < 8 && zptr == nullptr) return clear_dst(); |
449 | | |
450 | | /// process all-zeroes block |
451 | 55.4k | if (zptr != nullptr) { |
452 | 3.24k | if (groups == 8) { |
453 | | /// all-zeroes block at least should be one |
454 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid |
455 | 2 | return clear_dst(); |
456 | 2 | } |
457 | 3.24k | size_t msize = iter - zptr; |
458 | 3.24k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); |
459 | 3.24k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); |
460 | 3.24k | } |
461 | | |
462 | | /// the current function logic is processed in big endian manner |
463 | | /// but ipv6 in doris is stored in little-endian byte order |
464 | | /// so transfer to little-endian |
465 | 55.4k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); |
466 | | |
467 | 55.4k | return true; |
468 | 55.4k | } _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 341 | 60.8k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { | 342 | 60.8k | const auto clear_dst = [dst]() { | 343 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 344 | 60.8k | return false; | 345 | 60.8k | }; | 346 | | | 347 | 60.8k | if (src == nullptr || eof()) return clear_dst(); | 348 | | | 349 | 60.8k | int groups = 0; /// number of parsed groups | 350 | 60.8k | unsigned char* iter = dst; /// iterator over dst buffer | 351 | 60.8k | unsigned char* zptr = | 352 | 60.8k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started | 353 | | | 354 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 355 | | | 356 | 60.8k | if (first_block >= 0) { | 357 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); | 358 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); | 359 | 0 | if (*src == ':') { | 360 | 0 | zptr = iter; | 361 | 0 | ++src; | 362 | 0 | } | 363 | 0 | ++groups; | 364 | 0 | } | 365 | | | 366 | 60.8k | bool group_start = true; | 367 | | | 368 | 495k | while (!eof() && groups < 8) { | 369 | 436k | if (*src == ':') { | 370 | 372k | ++src; | 371 | 372k | if (eof()) /// trailing colon is not allowed | 372 | 31 | return clear_dst(); | 373 | | | 374 | 372k | group_start = true; | 375 | | | 376 | 372k | if (*src == ':') { | 377 | 3.59k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed | 378 | 152 | return clear_dst(); | 379 | 3.43k | zptr = iter; | 380 | 3.43k | ++src; | 381 | 3.43k | if (!eof() && *src == ':') { | 382 | | /// more than one all-zeroes block is not allowed | 383 | 5 | return clear_dst(); | 384 | 5 | } | 385 | 3.43k | continue; | 386 | 3.43k | } | 387 | 368k | if (groups == 0) /// leading colon is not allowed | 388 | 0 | return clear_dst(); | 389 | 368k | } | 390 | | | 391 | | /// mixed IPv4 parsing | 392 | 433k | if (*src == '.') { | 393 | 78 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first | 394 | 0 | return clear_dst(); | 395 | | | 396 | 78 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group | 397 | 0 | return clear_dst(); | 398 | | | 399 | 78 | ++src; | 400 | 78 | if (eof()) return clear_dst(); | 401 | | | 402 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 | 403 | 78 | --groups; | 404 | 78 | iter -= 2; | 405 | | | 406 | 78 | UInt16 num = 0; | 407 | 234 | for (int i = 0; i < 2; ++i) { | 408 | 156 | unsigned char first = (iter[i] >> 4) & 0x0fu; | 409 | 156 | unsigned char second = iter[i] & 0x0fu; | 410 | 156 | if (first > 9 || second > 9) return clear_dst(); | 411 | 156 | (num *= 100) += first * 10 + second; | 412 | 156 | } | 413 | 78 | if (num > 255) return clear_dst(); | 414 | | | 415 | | /// parse IPv4 with known first octet | 416 | 78 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); | 417 | | | 418 | | if constexpr (std::endian::native == std::endian::little) | 419 | 76 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); | 420 | | | 421 | 76 | iter += 4; | 422 | 76 | groups += 2; | 423 | 76 | break; /// IPv4 block is the last - end of parsing | 424 | 78 | } | 425 | | | 426 | 433k | if (!group_start) /// end of parsing | 427 | 815 | break; | 428 | 432k | group_start = false; | 429 | | | 430 | 432k | UInt16 val = 0; /// current decoded group | 431 | 432k | int xdigits = 0; /// number of decoded hex digits in current group | 432 | | | 433 | 1.28M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { | 434 | 1.10M | UInt8 num = unhex(*src); | 435 | 1.10M | if (num == 0xFF) break; | 436 | 857k | (val <<= 4) |= num; | 437 | 857k | } | 438 | | | 439 | 432k | if (xdigits == 0) /// end of parsing | 440 | 920 | break; | 441 | | | 442 | 431k | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); | 443 | 431k | *iter++ = static_cast<unsigned char>(val & 0xffu); | 444 | 431k | ++groups; | 445 | 431k | } | 446 | | | 447 | | /// either all 8 groups or all-zeroes block should be present | 448 | 60.6k | if (groups < 8 && zptr == nullptr) return clear_dst(); | 449 | | | 450 | | /// process all-zeroes block | 451 | 55.4k | if (zptr != nullptr) { | 452 | 3.24k | if (groups == 8) { | 453 | | /// all-zeroes block at least should be one | 454 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid | 455 | 2 | return clear_dst(); | 456 | 2 | } | 457 | 3.24k | size_t msize = iter - zptr; | 458 | 3.24k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); | 459 | 3.24k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); | 460 | 3.24k | } | 461 | | | 462 | | /// the current function logic is processed in big endian manner | 463 | | /// but ipv6 in doris is stored in little-endian byte order | 464 | | /// so transfer to little-endian | 465 | 55.4k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); | 466 | | | 467 | 55.4k | return true; | 468 | 55.4k | } |
Unexecuted instantiation: _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i |
469 | | |
470 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
471 | 60.8k | inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) { |
472 | 60.8k | if (parse_ipv6( |
473 | 2.22M | src, [&src, end]() { return src == end; }, dst)) |
474 | 55.4k | return src; |
475 | 5.42k | return nullptr; |
476 | 60.8k | } |
477 | | |
478 | | /// returns true if whole buffer was parsed successfully |
479 | 60.8k | inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) { |
480 | 60.8k | return parse_ipv6(src, end, dst) == end; |
481 | 60.8k | } |
482 | | |
483 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
484 | 0 | inline const char* parse_ipv6(const char* src, unsigned char* dst) { |
485 | 0 | if (parse_ipv6( |
486 | 0 | src, []() { return false; }, dst)) |
487 | 0 | return src; |
488 | 0 | return nullptr; |
489 | 0 | } |
490 | | |
491 | | /// returns true if whole null-terminated string was parsed successfully |
492 | 0 | inline bool parse_ipv6_whole(const char* src, unsigned char* dst) { |
493 | 0 | const char* end = parse_ipv6(src, dst); |
494 | 0 | return end != nullptr && *end == '\0'; |
495 | 0 | } |
496 | | |
497 | | } // namespace doris |