be/src/exec/common/format_ip.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <algorithm> |
24 | | #include <array> |
25 | | #include <bit> |
26 | | #include <cstdint> |
27 | | #include <cstring> |
28 | | #include <utility> |
29 | | |
30 | | #include "core/types.h" |
31 | | #include "exec/common/hex.h" |
32 | | #include "exec/common/string_utils/string_utils.h" |
33 | | |
34 | | constexpr size_t IPV4_BINARY_LENGTH = 4; |
35 | | constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. |
36 | | constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; |
37 | | constexpr size_t IPV4_MIN_NUM_VALUE = 0; //num value of '0.0.0.0' |
38 | | constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255' |
39 | | constexpr int IPV4_MAX_OCTET_VALUE = 255; //max value of octet |
40 | | constexpr size_t IPV4_OCTET_BITS = 8; |
41 | | constexpr size_t DECIMAL_BASE = 10; |
42 | | constexpr size_t IPV6_BINARY_LENGTH = 16; |
43 | | |
44 | | namespace doris { |
45 | | #include "common/compile_check_begin.h" |
46 | | |
47 | | extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table; |
48 | | |
49 | | /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', |
50 | | * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1". |
51 | | * |
52 | | * Any number of the tail bytes can be masked with given mask string. |
53 | | * |
54 | | * Assumptions: |
55 | | * src is IPV4_BINARY_LENGTH long, |
56 | | * dst is IPV4_MAX_TEXT_LENGTH long, |
57 | | * mask_tail_octets <= IPV4_BINARY_LENGTH |
58 | | * mask_string is NON-NULL, if mask_tail_octets > 0. |
59 | | * |
60 | | * Examples: |
61 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); |
62 | | * > dst == "127.0.0.1" |
63 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); |
64 | | * > dst == "127.0.0.xxx" |
65 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); |
66 | | * > dst == "127.0.0.0" |
67 | | */ |
68 | | inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst, |
69 | 1.49M | uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") { |
70 | 1.49M | const size_t mask_length = mask_string ? strlen(mask_string) : 0; |
71 | 1.49M | const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); |
72 | 1.49M | const size_t padding = std::min(4 - src_size, limit); |
73 | 1.49M | for (size_t octet = 0; octet < padding; ++octet) { |
74 | 0 | *dst++ = '0'; |
75 | 0 | *dst++ = '.'; |
76 | 0 | } |
77 | | |
78 | 7.49M | for (size_t octet = 4 - src_size; octet < limit; ++octet) { |
79 | 5.99M | uint8_t value = 0; |
80 | | if constexpr (std::endian::native == std::endian::little) |
81 | 5.99M | value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]); |
82 | | else |
83 | | value = static_cast<uint8_t>(src[octet]); |
84 | 5.99M | const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second); |
85 | 5.99M | const char* str = one_byte_to_string_lookup_table[value].first; |
86 | | |
87 | 5.99M | memcpy(dst, str, len); |
88 | 5.99M | dst += len; |
89 | | |
90 | 5.99M | *dst++ = '.'; |
91 | 5.99M | } |
92 | | |
93 | 1.49M | for (size_t mask = 0; mask < mask_tail_octets; ++mask) { |
94 | 6 | memcpy(dst, mask_string, mask_length); |
95 | 6 | dst += mask_length; |
96 | | |
97 | 6 | *dst++ = '.'; |
98 | 6 | } |
99 | | |
100 | 1.49M | dst--; |
101 | 1.49M | } |
102 | | |
103 | | inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0, |
104 | 1.49M | const char* mask_string = "xxx") { |
105 | 1.49M | format_ipv4(src, 4, dst, mask_tail_octets, mask_string); |
106 | 1.49M | } |
107 | | |
108 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. |
109 | | * |
110 | | * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, |
111 | | * which should be long enough. |
112 | | * That is "127.0.0.1" becomes 0x7f000001. |
113 | | * |
114 | | * In case of failure doesn't modify buffer pointed by `dst`. |
115 | | * |
116 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
117 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
118 | | * To parse strings use overloads below. |
119 | | * |
120 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
121 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
122 | | * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. |
123 | | * @param first_octet - preparsed first octet |
124 | | * @return - true if parsed successfully, false otherwise. |
125 | | */ |
126 | | template <typename T, typename EOFfunction> |
127 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
128 | 83.4k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { |
129 | 83.4k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { |
130 | 0 | return false; |
131 | 0 | } |
132 | | |
133 | 83.4k | UInt32 result = 0; |
134 | 83.4k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; |
135 | 83.4k | if (first_octet >= 0) { |
136 | 78 | result |= first_octet << offset; |
137 | 78 | offset -= IPV4_OCTET_BITS; |
138 | 78 | } |
139 | | |
140 | 329k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { |
141 | 329k | if (eof()) { |
142 | 61 | return false; |
143 | 61 | } |
144 | | |
145 | 329k | UInt32 value = 0; |
146 | 329k | size_t len = 0; |
147 | 824k | while (is_numeric_ascii(*src) && len <= 3) { |
148 | 576k | value = value * DECIMAL_BASE + (*src - '0'); |
149 | 576k | ++len; |
150 | 576k | ++src; |
151 | 576k | if (eof()) { |
152 | 81.8k | break; |
153 | 81.8k | } |
154 | 576k | } |
155 | 329k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { |
156 | 1.53k | return false; |
157 | 1.53k | } |
158 | 328k | result |= value << offset; |
159 | | |
160 | 328k | if (offset == 0) { |
161 | 81.8k | break; |
162 | 81.8k | } |
163 | 328k | } |
164 | | |
165 | 81.8k | memcpy(dst, &result, sizeof(result)); |
166 | 81.8k | return true; |
167 | 83.4k | } _ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 83.3k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 83.3k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 83.3k | UInt32 result = 0; | 134 | 83.3k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 83.3k | if (first_octet >= 0) { | 136 | 0 | result |= first_octet << offset; | 137 | 0 | offset -= IPV4_OCTET_BITS; | 138 | 0 | } | 139 | | | 140 | 329k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 329k | if (eof()) { | 142 | 61 | return false; | 143 | 61 | } | 144 | | | 145 | 329k | UInt32 value = 0; | 146 | 329k | size_t len = 0; | 147 | 823k | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 576k | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 576k | ++len; | 150 | 576k | ++src; | 151 | 576k | if (eof()) { | 152 | 81.7k | break; | 153 | 81.7k | } | 154 | 576k | } | 155 | 329k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 1.53k | return false; | 157 | 1.53k | } | 158 | 327k | result |= value << offset; | 159 | | | 160 | 327k | if (offset == 0) { | 161 | 81.7k | break; | 162 | 81.7k | } | 163 | 327k | } | 164 | | | 165 | 81.7k | memcpy(dst, &result, sizeof(result)); | 166 | 81.7k | return true; | 167 | 83.3k | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 1 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 1 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 1 | UInt32 result = 0; | 134 | 1 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 1 | if (first_octet >= 0) { | 136 | 0 | result |= first_octet << offset; | 137 | 0 | offset -= IPV4_OCTET_BITS; | 138 | 0 | } | 139 | | | 140 | 4 | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 4 | if (eof()) { | 142 | 0 | return false; | 143 | 0 | } | 144 | | | 145 | 4 | UInt32 value = 0; | 146 | 4 | size_t len = 0; | 147 | 10 | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 6 | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 6 | ++len; | 150 | 6 | ++src; | 151 | 6 | if (eof()) { | 152 | 0 | break; | 153 | 0 | } | 154 | 6 | } | 155 | 4 | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 0 | return false; | 157 | 0 | } | 158 | 4 | result |= value << offset; | 159 | | | 160 | 4 | if (offset == 0) { | 161 | 1 | break; | 162 | 1 | } | 163 | 4 | } | 164 | | | 165 | 1 | memcpy(dst, &result, sizeof(result)); | 166 | 1 | return true; | 167 | 1 | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 78 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 78 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 78 | UInt32 result = 0; | 134 | 78 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 78 | if (first_octet >= 0) { | 136 | 78 | result |= first_octet << offset; | 137 | 78 | offset -= IPV4_OCTET_BITS; | 138 | 78 | } | 139 | | | 140 | 233 | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 233 | if (eof()) { | 142 | 0 | return false; | 143 | 0 | } | 144 | | | 145 | 233 | UInt32 value = 0; | 146 | 233 | size_t len = 0; | 147 | 659 | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 503 | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 503 | ++len; | 150 | 503 | ++src; | 151 | 503 | if (eof()) { | 152 | 77 | break; | 153 | 77 | } | 154 | 503 | } | 155 | 233 | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 2 | return false; | 157 | 2 | } | 158 | 231 | result |= value << offset; | 159 | | | 160 | 231 | if (offset == 0) { | 161 | 76 | break; | 162 | 76 | } | 163 | 231 | } | 164 | | | 165 | 76 | memcpy(dst, &result, sizeof(result)); | 166 | 76 | return true; | 167 | 78 | } |
Unexecuted instantiation: _ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i |
168 | | |
169 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
170 | 83.3k | inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) { |
171 | 83.3k | if (parse_ipv4( |
172 | 1.15M | src, [&src, end]() { return src == end; }, dst)) { |
173 | 81.7k | return src; |
174 | 81.7k | } |
175 | 1.59k | return nullptr; |
176 | 83.3k | } |
177 | | |
178 | | /// returns true if whole buffer was parsed successfully |
179 | 83.3k | inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) { |
180 | 83.3k | return parse_ipv4(src, end, dst) == end; |
181 | 83.3k | } |
182 | | |
183 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
184 | 1 | inline const char* parse_ipv4(const char* src, unsigned char* dst) { |
185 | 1 | if (parse_ipv4( |
186 | 1 | src, []() { return false; }, dst)) { |
187 | 1 | return src; |
188 | 1 | } |
189 | 0 | return nullptr; |
190 | 1 | } |
191 | | |
192 | | /// returns true if whole null-terminated string was parsed successfully |
193 | 1 | inline bool parse_ipv4_whole(const char* src, unsigned char* dst) { |
194 | 1 | const char* end = parse_ipv4(src, dst); |
195 | 1 | return end != nullptr && *end == '\0'; |
196 | 1 | } |
197 | | |
198 | | /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) |
199 | 0 | inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) { |
200 | 0 | return value >= base ? 1 + int_log(value / base, base, value % base || carry) |
201 | 0 | : value % base > 1 || carry; |
202 | 0 | } |
203 | | |
204 | | /// Print integer in desired base, faster than sprintf. |
205 | | /// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark |
206 | | /// But it doesn't matter here. |
207 | | template <UInt32 base, typename T> |
208 | 101k | inline void print_integer(char*& out, T value) { |
209 | 101k | if (value == 0) { |
210 | 1 | *out++ = '0'; |
211 | 101k | } else { |
212 | 101k | constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false); |
213 | | |
214 | 101k | char buf[buffer_size]; |
215 | 101k | auto ptr = buf; |
216 | | |
217 | 417k | while (value > 0) { |
218 | 315k | *ptr = hex_digit_lowercase(value % base); |
219 | 315k | ++ptr; |
220 | 315k | value /= base; |
221 | 315k | } |
222 | | |
223 | | /// Copy to out reversed. |
224 | 417k | while (ptr != buf) { |
225 | 315k | --ptr; |
226 | 315k | *out = *ptr; |
227 | 315k | ++out; |
228 | 315k | } |
229 | 101k | } |
230 | 101k | } |
231 | | |
232 | | /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
233 | | * performs significantly faster than the reference implementation due to the absence of sprintf calls, |
234 | | * bounds checking, unnecessary string copying and length calculation. |
235 | | * @param src - pointer to IPv6 (16 bytes) stored in little-endian byte order |
236 | | * @param dst - where to put format result bytes |
237 | | * @param zeroed_tail_bytes_count - the parameter is currently not being used |
238 | | */ |
239 | 1.48M | inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) { |
240 | 1.48M | struct { |
241 | 1.48M | Int64 base, len; |
242 | 1.48M | } best {-1, 0}, cur {-1, 0}; |
243 | 1.48M | std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {}; |
244 | | |
245 | | // the current function logic is processed in big endian manner |
246 | | // but ipv6 in doris is stored in little-endian byte order |
247 | | // so transfer to big-endian byte order first |
248 | | // compatible with parse_ipv6 function in format_ip.h |
249 | 1.48M | std::reverse(src, src + IPV6_BINARY_LENGTH); |
250 | | |
251 | | /** Preprocess: |
252 | | * Copy the input (bytewise) array into a wordwise array. |
253 | | * Find the longest run of 0x00's in src[] for :: shorthanding. */ |
254 | 13.3M | for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) { |
255 | 11.8M | words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1]; |
256 | 11.8M | } |
257 | | |
258 | 13.3M | for (size_t i = 0; i < words.size(); i++) { |
259 | 11.8M | if (words[i] == 0) { |
260 | 11.7M | if (cur.base == -1) { |
261 | 1.48M | cur.base = i; |
262 | 1.48M | cur.len = 1; |
263 | 10.3M | } else { |
264 | 10.3M | cur.len++; |
265 | 10.3M | } |
266 | 11.7M | } else { |
267 | 101k | if (cur.base != -1) { |
268 | 28.2k | if (best.base == -1 || cur.len > best.len) { |
269 | 28.2k | best = cur; |
270 | 28.2k | } |
271 | 28.2k | cur.base = -1; |
272 | 28.2k | } |
273 | 101k | } |
274 | 11.8M | } |
275 | | |
276 | 1.48M | if (cur.base != -1) { |
277 | 1.45M | if (best.base == -1 || cur.len > best.len) { |
278 | 1.45M | best = cur; |
279 | 1.45M | } |
280 | 1.45M | } |
281 | 1.48M | if (best.base != -1 && best.len < 2) { |
282 | 0 | best.base = -1; |
283 | 0 | } |
284 | | |
285 | | /// Format the result. |
286 | 13.3M | for (size_t i = 0; i < words.size(); i++) { |
287 | | /// Are we inside the best run of 0x00's? |
288 | 11.8M | if (best.base != -1) { |
289 | 11.8M | auto best_base = static_cast<size_t>(best.base); |
290 | 11.8M | if (i >= best_base && i < (best_base + best.len)) { |
291 | 11.7M | if (i == best_base) { |
292 | 1.48M | *dst++ = ':'; |
293 | 1.48M | } |
294 | 11.7M | continue; |
295 | 11.7M | } |
296 | 11.8M | } |
297 | | /// Are we following an initial run of 0x00s or any real hex? |
298 | 101k | if (i != 0) { |
299 | 71.8k | *dst++ = ':'; |
300 | 71.8k | } |
301 | | /// Is this address an encapsulated IPv4? |
302 | 101k | if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { |
303 | 9 | uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; |
304 | 9 | memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); |
305 | | // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. |
306 | 9 | if constexpr (std::endian::native == std::endian::little) { |
307 | 9 | std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); |
308 | 9 | } |
309 | 9 | format_ipv4(ipv4_buffer, dst, |
310 | 9 | std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), |
311 | 9 | "0"); |
312 | | // format_ipv4 has already added a null-terminator for us. |
313 | 9 | return; |
314 | 9 | } |
315 | 101k | print_integer<16>(dst, words[i]); |
316 | 101k | } |
317 | | |
318 | | /// Was it a trailing run of 0x00's? |
319 | 1.48M | if (best.base != -1 && |
320 | 1.48M | static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) { |
321 | 1.45M | *dst++ = ':'; |
322 | 1.45M | } |
323 | 1.48M | } |
324 | | |
325 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. |
326 | | * |
327 | | * Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`, |
328 | | * which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. |
329 | | * |
330 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
331 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
332 | | * To parse strings use overloads below. |
333 | | * |
334 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
335 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
336 | | * @param dst - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long. |
337 | | * @param first_block - preparsed first block |
338 | | * @return - true if parsed successfully, false otherwise. |
339 | | */ |
340 | | template <typename T, typename EOFfunction> |
341 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
342 | 60.8k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { |
343 | 60.8k | const auto clear_dst = [dst]() { |
344 | 5.42k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
345 | 5.42k | return false; |
346 | 5.42k | }; _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv Line | Count | Source | 343 | 5.42k | const auto clear_dst = [dst]() { | 344 | 5.42k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 5.42k | return false; | 346 | 5.42k | }; |
Unexecuted instantiation: _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv |
347 | | |
348 | 60.8k | if (src == nullptr || eof()) return clear_dst(); |
349 | | |
350 | 60.8k | int groups = 0; /// number of parsed groups |
351 | 60.8k | unsigned char* iter = dst; /// iterator over dst buffer |
352 | 60.8k | unsigned char* zptr = |
353 | 60.8k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started |
354 | | |
355 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
356 | | |
357 | 60.8k | if (first_block >= 0) { |
358 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); |
359 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); |
360 | 0 | if (*src == ':') { |
361 | 0 | zptr = iter; |
362 | 0 | ++src; |
363 | 0 | } |
364 | 0 | ++groups; |
365 | 0 | } |
366 | | |
367 | 60.8k | bool group_start = true; |
368 | | |
369 | 495k | while (!eof() && groups < 8) { |
370 | 436k | if (*src == ':') { |
371 | 372k | ++src; |
372 | 372k | if (eof()) /// trailing colon is not allowed |
373 | 31 | return clear_dst(); |
374 | | |
375 | 372k | group_start = true; |
376 | | |
377 | 372k | if (*src == ':') { |
378 | 3.59k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed |
379 | 152 | return clear_dst(); |
380 | 3.43k | zptr = iter; |
381 | 3.43k | ++src; |
382 | 3.43k | if (!eof() && *src == ':') { |
383 | | /// more than one all-zeroes block is not allowed |
384 | 5 | return clear_dst(); |
385 | 5 | } |
386 | 3.43k | continue; |
387 | 3.43k | } |
388 | 368k | if (groups == 0) /// leading colon is not allowed |
389 | 0 | return clear_dst(); |
390 | 368k | } |
391 | | |
392 | | /// mixed IPv4 parsing |
393 | 433k | if (*src == '.') { |
394 | 78 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first |
395 | 0 | return clear_dst(); |
396 | | |
397 | 78 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group |
398 | 0 | return clear_dst(); |
399 | | |
400 | 78 | ++src; |
401 | 78 | if (eof()) return clear_dst(); |
402 | | |
403 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 |
404 | 78 | --groups; |
405 | 78 | iter -= 2; |
406 | | |
407 | 78 | UInt16 num = 0; |
408 | 234 | for (int i = 0; i < 2; ++i) { |
409 | 156 | unsigned char first = (iter[i] >> 4) & 0x0fu; |
410 | 156 | unsigned char second = iter[i] & 0x0fu; |
411 | 156 | if (first > 9 || second > 9) return clear_dst(); |
412 | 156 | (num *= 100) += first * 10 + second; |
413 | 156 | } |
414 | 78 | if (num > 255) return clear_dst(); |
415 | | |
416 | | /// parse IPv4 with known first octet |
417 | 78 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); |
418 | | |
419 | | if constexpr (std::endian::native == std::endian::little) |
420 | 76 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); |
421 | | |
422 | 76 | iter += 4; |
423 | 76 | groups += 2; |
424 | 76 | break; /// IPv4 block is the last - end of parsing |
425 | 78 | } |
426 | | |
427 | 433k | if (!group_start) /// end of parsing |
428 | 815 | break; |
429 | 432k | group_start = false; |
430 | | |
431 | 432k | UInt16 val = 0; /// current decoded group |
432 | 432k | int xdigits = 0; /// number of decoded hex digits in current group |
433 | | |
434 | 1.28M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { |
435 | 1.10M | UInt8 num = unhex(*src); |
436 | 1.10M | if (num == 0xFF) break; |
437 | 857k | (val <<= 4) |= num; |
438 | 857k | } |
439 | | |
440 | 432k | if (xdigits == 0) /// end of parsing |
441 | 920 | break; |
442 | | |
443 | 431k | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
444 | 431k | *iter++ = static_cast<unsigned char>(val & 0xffu); |
445 | 431k | ++groups; |
446 | 431k | } |
447 | | |
448 | | /// either all 8 groups or all-zeroes block should be present |
449 | 60.6k | if (groups < 8 && zptr == nullptr) return clear_dst(); |
450 | | |
451 | | /// process all-zeroes block |
452 | 55.4k | if (zptr != nullptr) { |
453 | 3.24k | if (groups == 8) { |
454 | | /// all-zeroes block at least should be one |
455 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid |
456 | 2 | return clear_dst(); |
457 | 2 | } |
458 | 3.24k | size_t msize = iter - zptr; |
459 | 3.24k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); |
460 | 3.24k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); |
461 | 3.24k | } |
462 | | |
463 | | /// the current function logic is processed in big endian manner |
464 | | /// but ipv6 in doris is stored in little-endian byte order |
465 | | /// so transfer to little-endian |
466 | 55.4k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); |
467 | | |
468 | 55.4k | return true; |
469 | 55.4k | } _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 342 | 60.8k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { | 343 | 60.8k | const auto clear_dst = [dst]() { | 344 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 60.8k | return false; | 346 | 60.8k | }; | 347 | | | 348 | 60.8k | if (src == nullptr || eof()) return clear_dst(); | 349 | | | 350 | 60.8k | int groups = 0; /// number of parsed groups | 351 | 60.8k | unsigned char* iter = dst; /// iterator over dst buffer | 352 | 60.8k | unsigned char* zptr = | 353 | 60.8k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started | 354 | | | 355 | 60.8k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 356 | | | 357 | 60.8k | if (first_block >= 0) { | 358 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); | 359 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); | 360 | 0 | if (*src == ':') { | 361 | 0 | zptr = iter; | 362 | 0 | ++src; | 363 | 0 | } | 364 | 0 | ++groups; | 365 | 0 | } | 366 | | | 367 | 60.8k | bool group_start = true; | 368 | | | 369 | 495k | while (!eof() && groups < 8) { | 370 | 436k | if (*src == ':') { | 371 | 372k | ++src; | 372 | 372k | if (eof()) /// trailing colon is not allowed | 373 | 31 | return clear_dst(); | 374 | | | 375 | 372k | group_start = true; | 376 | | | 377 | 372k | if (*src == ':') { | 378 | 3.59k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed | 379 | 152 | return clear_dst(); | 380 | 3.43k | zptr = iter; | 381 | 3.43k | ++src; | 382 | 3.43k | if (!eof() && *src == ':') { | 383 | | /// more than one all-zeroes block is not allowed | 384 | 5 | return clear_dst(); | 385 | 5 | } | 386 | 3.43k | continue; | 387 | 3.43k | } | 388 | 368k | if (groups == 0) /// leading colon is not allowed | 389 | 0 | return clear_dst(); | 390 | 368k | } | 391 | | | 392 | | /// mixed IPv4 parsing | 393 | 433k | if (*src == '.') { | 394 | 78 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first | 395 | 0 | return clear_dst(); | 396 | | | 397 | 78 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group | 398 | 0 | return clear_dst(); | 399 | | | 400 | 78 | ++src; | 401 | 78 | if (eof()) return clear_dst(); | 402 | | | 403 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 | 404 | 78 | --groups; | 405 | 78 | iter -= 2; | 406 | | | 407 | 78 | UInt16 num = 0; | 408 | 234 | for (int i = 0; i < 2; ++i) { | 409 | 156 | unsigned char first = (iter[i] >> 4) & 0x0fu; | 410 | 156 | unsigned char second = iter[i] & 0x0fu; | 411 | 156 | if (first > 9 || second > 9) return clear_dst(); | 412 | 156 | (num *= 100) += first * 10 + second; | 413 | 156 | } | 414 | 78 | if (num > 255) return clear_dst(); | 415 | | | 416 | | /// parse IPv4 with known first octet | 417 | 78 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); | 418 | | | 419 | | if constexpr (std::endian::native == std::endian::little) | 420 | 76 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); | 421 | | | 422 | 76 | iter += 4; | 423 | 76 | groups += 2; | 424 | 76 | break; /// IPv4 block is the last - end of parsing | 425 | 78 | } | 426 | | | 427 | 433k | if (!group_start) /// end of parsing | 428 | 815 | break; | 429 | 432k | group_start = false; | 430 | | | 431 | 432k | UInt16 val = 0; /// current decoded group | 432 | 432k | int xdigits = 0; /// number of decoded hex digits in current group | 433 | | | 434 | 1.28M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { | 435 | 1.10M | UInt8 num = unhex(*src); | 436 | 1.10M | if (num == 0xFF) break; | 437 | 857k | (val <<= 4) |= num; | 438 | 857k | } | 439 | | | 440 | 432k | if (xdigits == 0) /// end of parsing | 441 | 920 | break; | 442 | | | 443 | 431k | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); | 444 | 431k | *iter++ = static_cast<unsigned char>(val & 0xffu); | 445 | 431k | ++groups; | 446 | 431k | } | 447 | | | 448 | | /// either all 8 groups or all-zeroes block should be present | 449 | 60.6k | if (groups < 8 && zptr == nullptr) return clear_dst(); | 450 | | | 451 | | /// process all-zeroes block | 452 | 55.4k | if (zptr != nullptr) { | 453 | 3.24k | if (groups == 8) { | 454 | | /// all-zeroes block at least should be one | 455 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid | 456 | 2 | return clear_dst(); | 457 | 2 | } | 458 | 3.24k | size_t msize = iter - zptr; | 459 | 3.24k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); | 460 | 3.24k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); | 461 | 3.24k | } | 462 | | | 463 | | /// the current function logic is processed in big endian manner | 464 | | /// but ipv6 in doris is stored in little-endian byte order | 465 | | /// so transfer to little-endian | 466 | 55.4k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); | 467 | | | 468 | 55.4k | return true; | 469 | 55.4k | } |
Unexecuted instantiation: _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i |
470 | | |
471 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
472 | 60.8k | inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) { |
473 | 60.8k | if (parse_ipv6( |
474 | 2.22M | src, [&src, end]() { return src == end; }, dst)) |
475 | 55.4k | return src; |
476 | 5.42k | return nullptr; |
477 | 60.8k | } |
478 | | |
479 | | /// returns true if whole buffer was parsed successfully |
480 | 60.8k | inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) { |
481 | 60.8k | return parse_ipv6(src, end, dst) == end; |
482 | 60.8k | } |
483 | | |
484 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
485 | 0 | inline const char* parse_ipv6(const char* src, unsigned char* dst) { |
486 | 0 | if (parse_ipv6( |
487 | 0 | src, []() { return false; }, dst)) |
488 | 0 | return src; |
489 | 0 | return nullptr; |
490 | 0 | } |
491 | | |
492 | | /// returns true if whole null-terminated string was parsed successfully |
493 | 0 | inline bool parse_ipv6_whole(const char* src, unsigned char* dst) { |
494 | 0 | const char* end = parse_ipv6(src, dst); |
495 | 0 | return end != nullptr && *end == '\0'; |
496 | 0 | } |
497 | | |
498 | | #include "common/compile_check_end.h" |
499 | | } // namespace doris |