be/src/exec/common/format_ip.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/formatIPv6.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <algorithm> |
24 | | #include <array> |
25 | | #include <bit> |
26 | | #include <cstdint> |
27 | | #include <cstring> |
28 | | #include <utility> |
29 | | |
30 | | #include "core/types.h" |
31 | | #include "exec/common/hex.h" |
32 | | #include "exec/common/string_utils/string_utils.h" |
33 | | |
34 | | constexpr size_t IPV4_BINARY_LENGTH = 4; |
35 | | constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. |
36 | | constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; |
37 | | constexpr size_t IPV4_MIN_NUM_VALUE = 0; //num value of '0.0.0.0' |
38 | | constexpr size_t IPV4_MAX_NUM_VALUE = 4294967295; //num value of '255.255.255.255' |
39 | | constexpr int IPV4_MAX_OCTET_VALUE = 255; //max value of octet |
40 | | constexpr size_t IPV4_OCTET_BITS = 8; |
41 | | constexpr size_t DECIMAL_BASE = 10; |
42 | | constexpr size_t IPV6_BINARY_LENGTH = 16; |
43 | | |
44 | | namespace doris { |
45 | | #include "common/compile_check_begin.h" |
46 | | |
47 | | extern const std::array<std::pair<const char*, size_t>, 256> one_byte_to_string_lookup_table; |
48 | | |
49 | | /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', |
50 | | * expects in out to be in BE-format, that is 0x7f000001 => "127.0.0.1". |
51 | | * |
52 | | * Any number of the tail bytes can be masked with given mask string. |
53 | | * |
54 | | * Assumptions: |
55 | | * src is IPV4_BINARY_LENGTH long, |
56 | | * dst is IPV4_MAX_TEXT_LENGTH long, |
57 | | * mask_tail_octets <= IPV4_BINARY_LENGTH |
58 | | * mask_string is NON-NULL, if mask_tail_octets > 0. |
59 | | * |
60 | | * Examples: |
61 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); |
62 | | * > dst == "127.0.0.1" |
63 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); |
64 | | * > dst == "127.0.0.xxx" |
65 | | * format_ipv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); |
66 | | * > dst == "127.0.0.0" |
67 | | */ |
68 | | inline void format_ipv4(const unsigned char* src, size_t src_size, char*& dst, |
69 | 1.51M | uint8_t mask_tail_octets = 0, const char* mask_string = "xxx") { |
70 | 1.51M | const size_t mask_length = mask_string ? strlen(mask_string) : 0; |
71 | 1.51M | const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); |
72 | 1.51M | const size_t padding = std::min(4 - src_size, limit); |
73 | 1.51M | for (size_t octet = 0; octet < padding; ++octet) { |
74 | 6 | *dst++ = '0'; |
75 | 6 | *dst++ = '.'; |
76 | 6 | } |
77 | | |
78 | 7.55M | for (size_t octet = 4 - src_size; octet < limit; ++octet) { |
79 | 6.04M | uint8_t value = 0; |
80 | | if constexpr (std::endian::native == std::endian::little) |
81 | 6.04M | value = static_cast<uint8_t>(src[IPV4_BINARY_LENGTH - octet - 1]); |
82 | | else |
83 | | value = static_cast<uint8_t>(src[octet]); |
84 | 6.04M | const uint8_t len = static_cast<uint8_t>(one_byte_to_string_lookup_table[value].second); |
85 | 6.04M | const char* str = one_byte_to_string_lookup_table[value].first; |
86 | | |
87 | 6.04M | memcpy(dst, str, len); |
88 | 6.04M | dst += len; |
89 | | |
90 | 6.04M | *dst++ = '.'; |
91 | 6.04M | } |
92 | | |
93 | 1.51M | for (size_t mask = 0; mask < mask_tail_octets; ++mask) { |
94 | 12 | memcpy(dst, mask_string, mask_length); |
95 | 12 | dst += mask_length; |
96 | | |
97 | 12 | *dst++ = '.'; |
98 | 12 | } |
99 | | |
100 | 1.51M | dst--; |
101 | 1.51M | } |
102 | | |
103 | | inline void format_ipv4(const unsigned char* src, char*& dst, uint8_t mask_tail_octets = 0, |
104 | 1.50M | const char* mask_string = "xxx") { |
105 | 1.50M | format_ipv4(src, 4, dst, mask_tail_octets, mask_string); |
106 | 1.50M | } |
107 | | |
108 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. |
109 | | * |
110 | | * Parses the input string `src` and stores binary host-endian value into buffer pointed by `dst`, |
111 | | * which should be long enough. |
112 | | * That is "127.0.0.1" becomes 0x7f000001. |
113 | | * |
114 | | * In case of failure doesn't modify buffer pointed by `dst`. |
115 | | * |
116 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
117 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
118 | | * To parse strings use overloads below. |
119 | | * |
120 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
121 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
122 | | * @param dst - where to put output bytes, expected to be non-null and at IPV4_BINARY_LENGTH-long. |
123 | | * @param first_octet - preparsed first octet |
124 | | * @return - true if parsed successfully, false otherwise. |
125 | | */ |
126 | | template <typename T, typename EOFfunction> |
127 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
128 | 182k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { |
129 | 182k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { |
130 | 0 | return false; |
131 | 0 | } |
132 | | |
133 | 182k | UInt32 result = 0; |
134 | 182k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; |
135 | 182k | if (first_octet >= 0) { |
136 | 461 | result |= first_octet << offset; |
137 | 461 | offset -= IPV4_OCTET_BITS; |
138 | 461 | } |
139 | | |
140 | 712k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { |
141 | 712k | if (eof()) { |
142 | 66 | return false; |
143 | 66 | } |
144 | | |
145 | 712k | UInt32 value = 0; |
146 | 712k | size_t len = 0; |
147 | 2.08M | while (is_numeric_ascii(*src) && len <= 3) { |
148 | 1.54M | value = value * DECIMAL_BASE + (*src - '0'); |
149 | 1.54M | ++len; |
150 | 1.54M | ++src; |
151 | 1.54M | if (eof()) { |
152 | 175k | break; |
153 | 175k | } |
154 | 1.54M | } |
155 | 712k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { |
156 | 6.31k | return false; |
157 | 6.31k | } |
158 | 706k | result |= value << offset; |
159 | | |
160 | 706k | if (offset == 0) { |
161 | 176k | break; |
162 | 176k | } |
163 | 706k | } |
164 | | |
165 | 176k | memcpy(dst, &result, sizeof(result)); |
166 | 176k | return true; |
167 | 182k | } _ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 179k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 179k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 179k | UInt32 result = 0; | 134 | 179k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 179k | if (first_octet >= 0) { | 136 | 0 | result |= first_octet << offset; | 137 | 0 | offset -= IPV4_OCTET_BITS; | 138 | 0 | } | 139 | | | 140 | 704k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 704k | if (eof()) { | 142 | 66 | return false; | 143 | 66 | } | 144 | | | 145 | 704k | UInt32 value = 0; | 146 | 704k | size_t len = 0; | 147 | 2.06M | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 1.53M | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 1.53M | ++len; | 150 | 1.53M | ++src; | 151 | 1.53M | if (eof()) { | 152 | 175k | break; | 153 | 175k | } | 154 | 1.53M | } | 155 | 704k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 3.97k | return false; | 157 | 3.97k | } | 158 | 700k | result |= value << offset; | 159 | | | 160 | 700k | if (offset == 0) { | 161 | 175k | break; | 162 | 175k | } | 163 | 700k | } | 164 | | | 165 | 175k | memcpy(dst, &result, sizeof(result)); | 166 | 175k | return true; | 167 | 179k | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv4EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 3.39k | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 3.39k | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 3.39k | UInt32 result = 0; | 134 | 3.39k | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 3.39k | if (first_octet >= 0) { | 136 | 0 | result |= first_octet << offset; | 137 | 0 | offset -= IPV4_OCTET_BITS; | 138 | 0 | } | 139 | | | 140 | 6.55k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 6.55k | if (eof()) { | 142 | 0 | return false; | 143 | 0 | } | 144 | | | 145 | 6.55k | UInt32 value = 0; | 146 | 6.55k | size_t len = 0; | 147 | 17.0k | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 10.5k | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 10.5k | ++len; | 150 | 10.5k | ++src; | 151 | 10.5k | if (eof()) { | 152 | 0 | break; | 153 | 0 | } | 154 | 10.5k | } | 155 | 6.55k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 2.33k | return false; | 157 | 2.33k | } | 158 | 4.21k | result |= value << offset; | 159 | | | 160 | 4.21k | if (offset == 0) { | 161 | 1.05k | break; | 162 | 1.05k | } | 163 | 4.21k | } | 164 | | | 165 | 1.05k | memcpy(dst, &result, sizeof(result)); | 166 | 1.05k | return true; | 167 | 3.39k | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 393 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 393 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 393 | UInt32 result = 0; | 134 | 393 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 393 | if (first_octet >= 0) { | 136 | 393 | result |= first_octet << offset; | 137 | 393 | offset -= IPV4_OCTET_BITS; | 138 | 393 | } | 139 | | | 140 | 1.17k | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 1.17k | if (eof()) { | 142 | 0 | return false; | 143 | 0 | } | 144 | | | 145 | 1.17k | UInt32 value = 0; | 146 | 1.17k | size_t len = 0; | 147 | 3.13k | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 2.34k | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 2.34k | ++len; | 150 | 2.34k | ++src; | 151 | 2.34k | if (eof()) { | 152 | 391 | break; | 153 | 391 | } | 154 | 2.34k | } | 155 | 1.17k | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 4 | return false; | 157 | 4 | } | 158 | 1.17k | result |= value << offset; | 159 | | | 160 | 1.17k | if (offset == 0) { | 161 | 389 | break; | 162 | 389 | } | 163 | 1.17k | } | 164 | | | 165 | 389 | memcpy(dst, &result, sizeof(result)); | 166 | 389 | return true; | 167 | 393 | } |
_ZN5doris10parse_ipv4IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 128 | 68 | inline bool parse_ipv4(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_octet = -1) { | 129 | 68 | if (src == nullptr || first_octet > IPV4_MAX_OCTET_VALUE) { | 130 | 0 | return false; | 131 | 0 | } | 132 | | | 133 | 68 | UInt32 result = 0; | 134 | 68 | int offset = (IPV4_BINARY_LENGTH - 1) * IPV4_OCTET_BITS; | 135 | 68 | if (first_octet >= 0) { | 136 | 68 | result |= first_octet << offset; | 137 | 68 | offset -= IPV4_OCTET_BITS; | 138 | 68 | } | 139 | | | 140 | 204 | for (; true; offset -= IPV4_OCTET_BITS, ++src) { | 141 | 204 | if (eof()) { | 142 | 0 | return false; | 143 | 0 | } | 144 | | | 145 | 204 | UInt32 value = 0; | 146 | 204 | size_t len = 0; | 147 | 642 | while (is_numeric_ascii(*src) && len <= 3) { | 148 | 438 | value = value * DECIMAL_BASE + (*src - '0'); | 149 | 438 | ++len; | 150 | 438 | ++src; | 151 | 438 | if (eof()) { | 152 | 0 | break; | 153 | 0 | } | 154 | 438 | } | 155 | 204 | if (len == 0 || value > IPV4_MAX_OCTET_VALUE || (offset > 0 && (eof() || *src != '.'))) { | 156 | 0 | return false; | 157 | 0 | } | 158 | 204 | result |= value << offset; | 159 | | | 160 | 204 | if (offset == 0) { | 161 | 68 | break; | 162 | 68 | } | 163 | 204 | } | 164 | | | 165 | 68 | memcpy(dst, &result, sizeof(result)); | 166 | 68 | return true; | 167 | 68 | } |
|
168 | | |
169 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
170 | 179k | inline const char* parse_ipv4(const char* src, const char* end, unsigned char* dst) { |
171 | 179k | if (parse_ipv4( |
172 | 2.76M | src, [&src, end]() { return src == end; }, dst)) { |
173 | 175k | return src; |
174 | 175k | } |
175 | 4.03k | return nullptr; |
176 | 179k | } |
177 | | |
178 | | /// returns true if whole buffer was parsed successfully |
179 | 179k | inline bool parse_ipv4_whole(const char* src, const char* end, unsigned char* dst) { |
180 | 179k | return parse_ipv4(src, end, dst) == end; |
181 | 179k | } |
182 | | |
183 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
184 | 3.39k | inline const char* parse_ipv4(const char* src, unsigned char* dst) { |
185 | 3.39k | if (parse_ipv4( |
186 | 3.39k | src, []() { return false; }, dst)) { |
187 | 1.05k | return src; |
188 | 1.05k | } |
189 | 2.33k | return nullptr; |
190 | 3.39k | } |
191 | | |
192 | | /// returns true if whole null-terminated string was parsed successfully |
193 | 3.39k | inline bool parse_ipv4_whole(const char* src, unsigned char* dst) { |
194 | 3.39k | const char* end = parse_ipv4(src, dst); |
195 | 3.39k | return end != nullptr && *end == '\0'; |
196 | 3.39k | } |
197 | | |
198 | | /// integer logarithm, return ceil(log(value, base)) (the smallest integer greater or equal than log(value, base) |
199 | 0 | inline constexpr UInt32 int_log(const UInt32 value, const UInt32 base, const bool carry) { |
200 | 0 | return value >= base ? 1 + int_log(value / base, base, value % base || carry) |
201 | 0 | : value % base > 1 || carry; |
202 | 0 | } |
203 | | |
204 | | /// Print integer in desired base, faster than sprintf. |
205 | | /// NOTE This is not the best way. See https://github.com/miloyip/itoa-benchmark |
206 | | /// But it doesn't matter here. |
207 | | template <UInt32 base, typename T> |
208 | 140k | inline void print_integer(char*& out, T value) { |
209 | 140k | if (value == 0) { |
210 | 177 | *out++ = '0'; |
211 | 139k | } else { |
212 | 139k | constexpr size_t buffer_size = sizeof(T) * int_log(256, base, false); |
213 | | |
214 | 139k | char buf[buffer_size]; |
215 | 139k | auto ptr = buf; |
216 | | |
217 | 584k | while (value > 0) { |
218 | 444k | *ptr = hex_digit_lowercase(value % base); |
219 | 444k | ++ptr; |
220 | 444k | value /= base; |
221 | 444k | } |
222 | | |
223 | | /// Copy to out reversed. |
224 | 584k | while (ptr != buf) { |
225 | 444k | --ptr; |
226 | 444k | *out = *ptr; |
227 | 444k | ++out; |
228 | 444k | } |
229 | 139k | } |
230 | 140k | } |
231 | | |
232 | | /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
233 | | * performs significantly faster than the reference implementation due to the absence of sprintf calls, |
234 | | * bounds checking, unnecessary string copying and length calculation. |
235 | | * @param src - pointer to IPv6 (16 bytes) stored in little-endian byte order |
236 | | * @param dst - where to put format result bytes |
237 | | * @param zeroed_tail_bytes_count - the parameter is currently not being used |
238 | | */ |
239 | 1.49M | inline void format_ipv6(unsigned char* src, char*& dst, uint8_t zeroed_tail_bytes_count = 0) { |
240 | 1.49M | struct { |
241 | 1.49M | Int64 base, len; |
242 | 1.49M | } best {-1, 0}, cur {-1, 0}; |
243 | 1.49M | std::array<UInt16, IPV6_BINARY_LENGTH / sizeof(UInt16)> words {}; |
244 | | |
245 | | // the current function logic is processed in big endian manner |
246 | | // but ipv6 in doris is stored in little-endian byte order |
247 | | // so transfer to big-endian byte order first |
248 | | // compatible with parse_ipv6 function in format_ip.h |
249 | 1.49M | std::reverse(src, src + IPV6_BINARY_LENGTH); |
250 | | |
251 | | /** Preprocess: |
252 | | * Copy the input (bytewise) array into a wordwise array. |
253 | | * Find the longest run of 0x00's in src[] for :: shorthanding. */ |
254 | 13.4M | for (size_t i = 0; i < (IPV6_BINARY_LENGTH - zeroed_tail_bytes_count); i += 2) { |
255 | 11.9M | words[i / 2] = (uint16_t)(src[i] << 8) | src[i + 1]; |
256 | 11.9M | } |
257 | | |
258 | 13.4M | for (size_t i = 0; i < words.size(); i++) { |
259 | 11.9M | if (words[i] == 0) { |
260 | 11.8M | if (cur.base == -1) { |
261 | 1.49M | cur.base = i; |
262 | 1.49M | cur.len = 1; |
263 | 10.3M | } else { |
264 | 10.3M | cur.len++; |
265 | 10.3M | } |
266 | 11.8M | } else { |
267 | 140k | if (cur.base != -1) { |
268 | 36.7k | if (best.base == -1 || cur.len > best.len) { |
269 | 36.7k | best = cur; |
270 | 36.7k | } |
271 | 36.7k | cur.base = -1; |
272 | 36.7k | } |
273 | 140k | } |
274 | 11.9M | } |
275 | | |
276 | 1.49M | if (cur.base != -1) { |
277 | 1.45M | if (best.base == -1 || cur.len > best.len) { |
278 | 1.45M | best = cur; |
279 | 1.45M | } |
280 | 1.45M | } |
281 | 1.49M | if (best.base != -1 && best.len < 2) { |
282 | 60 | best.base = -1; |
283 | 60 | } |
284 | | |
285 | | /// Format the result. |
286 | 13.4M | for (size_t i = 0; i < words.size(); i++) { |
287 | | /// Are we inside the best run of 0x00's? |
288 | 11.9M | if (best.base != -1) { |
289 | 11.9M | auto best_base = static_cast<size_t>(best.base); |
290 | 11.9M | if (i >= best_base && i < (best_base + best.len)) { |
291 | 11.8M | if (i == best_base) { |
292 | 1.49M | *dst++ = ':'; |
293 | 1.49M | } |
294 | 11.8M | continue; |
295 | 11.8M | } |
296 | 11.9M | } |
297 | | /// Are we following an initial run of 0x00s or any real hex? |
298 | 140k | if (i != 0) { |
299 | 101k | *dst++ = ':'; |
300 | 101k | } |
301 | | /// Is this address an encapsulated IPv4? |
302 | 140k | if (i == 6 && best.base == 0 && (best.len == 6 || (best.len == 5 && words[5] == 0xffffu))) { |
303 | 84 | uint8_t ipv4_buffer[IPV4_BINARY_LENGTH] = {0}; |
304 | 84 | memcpy(ipv4_buffer, src + 12, IPV4_BINARY_LENGTH); |
305 | | // Due to historical reasons format_ipv4() takes ipv4 in BE format, but inside ipv6 we store it in LE-format. |
306 | 84 | if constexpr (std::endian::native == std::endian::little) { |
307 | 84 | std::reverse(std::begin(ipv4_buffer), std::end(ipv4_buffer)); |
308 | 84 | } |
309 | 84 | format_ipv4(ipv4_buffer, dst, |
310 | 84 | std::min(zeroed_tail_bytes_count, static_cast<uint8_t>(IPV4_BINARY_LENGTH)), |
311 | 84 | "0"); |
312 | | // format_ipv4 has already added a null-terminator for us. |
313 | 84 | return; |
314 | 84 | } |
315 | 140k | print_integer<16>(dst, words[i]); |
316 | 140k | } |
317 | | |
318 | | /// Was it a trailing run of 0x00's? |
319 | 1.49M | if (best.base != -1 && |
320 | 1.49M | static_cast<size_t>(best.base) + static_cast<size_t>(best.len) == words.size()) { |
321 | 1.45M | *dst++ = ':'; |
322 | 1.45M | } |
323 | 1.49M | } |
324 | | |
325 | | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. |
326 | | * |
327 | | * Parses the input string `src` and stores binary little-endian value into buffer pointed by `dst`, |
328 | | * which should be long enough. In case of failure zeroes IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. |
329 | | * |
330 | | * WARNING - this function is adapted to work with ReadBuffer, where src is the position reference (ReadBuffer::position()) |
331 | | * and eof is the ReadBuffer::eof() - therefore algorithm below does not rely on buffer's continuity. |
332 | | * To parse strings use overloads below. |
333 | | * |
334 | | * @param src - iterator (reference to pointer) over input string - warning - continuity is not guaranteed. |
335 | | * @param eof - function returning true if iterator riched the end - warning - can break iterator's continuity. |
336 | | * @param dst - where to put output bytes in little-endian byte order, expected to be non-null and at IPV6_BINARY_LENGTH-long. |
337 | | * @param first_block - preparsed first block |
338 | | * @return - true if parsed successfully, false otherwise. |
339 | | */ |
340 | | template <typename T, typename EOFfunction> |
341 | | requires(std::is_same<typename std::remove_cv<T>::type, char>::value) |
342 | 340k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { |
343 | 340k | const auto clear_dst = [dst]() { |
344 | 8.08k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
345 | 8.08k | return false; |
346 | 8.08k | }; _ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv Line | Count | Source | 343 | 7.26k | const auto clear_dst = [dst]() { | 344 | 7.26k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 7.26k | return false; | 346 | 7.26k | }; |
_ZZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_iENKUlvE_clEv Line | Count | Source | 343 | 820 | const auto clear_dst = [dst]() { | 344 | 820 | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 820 | return false; | 346 | 820 | }; |
|
347 | | |
348 | 340k | if (src == nullptr || eof()) return clear_dst(); |
349 | | |
350 | 340k | int groups = 0; /// number of parsed groups |
351 | 340k | unsigned char* iter = dst; /// iterator over dst buffer |
352 | 340k | unsigned char* zptr = |
353 | 340k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started |
354 | | |
355 | 340k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); |
356 | | |
357 | 340k | if (first_block >= 0) { |
358 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); |
359 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); |
360 | 0 | if (*src == ':') { |
361 | 0 | zptr = iter; |
362 | 0 | ++src; |
363 | 0 | } |
364 | 0 | ++groups; |
365 | 0 | } |
366 | | |
367 | 340k | bool group_start = true; |
368 | | |
369 | 2.83M | while (!eof() && groups < 8) { |
370 | 2.49M | if (*src == ':') { |
371 | 2.12M | ++src; |
372 | 2.12M | if (eof()) /// trailing colon is not allowed |
373 | 32 | return clear_dst(); |
374 | | |
375 | 2.12M | group_start = true; |
376 | | |
377 | 2.12M | if (*src == ':') { |
378 | 62.1k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed |
379 | 159 | return clear_dst(); |
380 | 61.9k | zptr = iter; |
381 | 61.9k | ++src; |
382 | 61.9k | if (!eof() && *src == ':') { |
383 | | /// more than one all-zeroes block is not allowed |
384 | 10 | return clear_dst(); |
385 | 10 | } |
386 | 61.9k | continue; |
387 | 61.9k | } |
388 | 2.06M | if (groups == 0) /// leading colon is not allowed |
389 | 0 | return clear_dst(); |
390 | 2.06M | } |
391 | | |
392 | | /// mixed IPv4 parsing |
393 | 2.43M | if (*src == '.') { |
394 | 1.05k | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first |
395 | 598 | return clear_dst(); |
396 | | |
397 | 461 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group |
398 | 0 | return clear_dst(); |
399 | | |
400 | 461 | ++src; |
401 | 461 | if (eof()) return clear_dst(); |
402 | | |
403 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 |
404 | 461 | --groups; |
405 | 461 | iter -= 2; |
406 | | |
407 | 461 | UInt16 num = 0; |
408 | 1.38k | for (int i = 0; i < 2; ++i) { |
409 | 922 | unsigned char first = (iter[i] >> 4) & 0x0fu; |
410 | 922 | unsigned char second = iter[i] & 0x0fu; |
411 | 922 | if (first > 9 || second > 9) return clear_dst(); |
412 | 922 | (num *= 100) += first * 10 + second; |
413 | 922 | } |
414 | 461 | if (num > 255) return clear_dst(); |
415 | | |
416 | | /// parse IPv4 with known first octet |
417 | 461 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); |
418 | | |
419 | | if constexpr (std::endian::native == std::endian::little) |
420 | 457 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); |
421 | | |
422 | 457 | iter += 4; |
423 | 457 | groups += 2; |
424 | 457 | break; /// IPv4 block is the last - end of parsing |
425 | 461 | } |
426 | | |
427 | 2.43M | if (!group_start) /// end of parsing |
428 | 1.61k | break; |
429 | 2.43M | group_start = false; |
430 | | |
431 | 2.43M | UInt16 val = 0; /// current decoded group |
432 | 2.43M | int xdigits = 0; /// number of decoded hex digits in current group |
433 | | |
434 | 11.1M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { |
435 | 9.07M | UInt8 num = unhex(*src); |
436 | 9.07M | if (num == 0xFF) break; |
437 | 8.70M | (val <<= 4) |= num; |
438 | 8.70M | } |
439 | | |
440 | 2.43M | if (xdigits == 0) /// end of parsing |
441 | 3.04k | break; |
442 | | |
443 | 2.43M | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
444 | 2.43M | *iter++ = static_cast<unsigned char>(val & 0xffu); |
445 | 2.43M | ++groups; |
446 | 2.43M | } |
447 | | |
448 | | /// either all 8 groups or all-zeroes block should be present |
449 | 339k | if (groups < 8 && zptr == nullptr) return clear_dst(); |
450 | | |
451 | | /// process all-zeroes block |
452 | 332k | if (zptr != nullptr) { |
453 | 61.7k | if (groups == 8) { |
454 | | /// all-zeroes block at least should be one |
455 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid |
456 | 4 | return clear_dst(); |
457 | 4 | } |
458 | 61.7k | size_t msize = iter - zptr; |
459 | 61.7k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); |
460 | 61.7k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); |
461 | 61.7k | } |
462 | | |
463 | | /// the current function logic is processed in big endian manner |
464 | | /// but ipv6 in doris is stored in little-endian byte order |
465 | | /// so transfer to little-endian |
466 | 332k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); |
467 | | |
468 | 332k | return true; |
469 | 332k | } _ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_S2_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 342 | 338k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { | 343 | 338k | const auto clear_dst = [dst]() { | 344 | 338k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 338k | return false; | 346 | 338k | }; | 347 | | | 348 | 338k | if (src == nullptr || eof()) return clear_dst(); | 349 | | | 350 | 338k | int groups = 0; /// number of parsed groups | 351 | 338k | unsigned char* iter = dst; /// iterator over dst buffer | 352 | 338k | unsigned char* zptr = | 353 | 338k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started | 354 | | | 355 | 338k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 356 | | | 357 | 338k | if (first_block >= 0) { | 358 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); | 359 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); | 360 | 0 | if (*src == ':') { | 361 | 0 | zptr = iter; | 362 | 0 | ++src; | 363 | 0 | } | 364 | 0 | ++groups; | 365 | 0 | } | 366 | | | 367 | 338k | bool group_start = true; | 368 | | | 369 | 2.82M | while (!eof() && groups < 8) { | 370 | 2.49M | if (*src == ':') { | 371 | 2.12M | ++src; | 372 | 2.12M | if (eof()) /// trailing colon is not allowed | 373 | 32 | return clear_dst(); | 374 | | | 375 | 2.12M | group_start = true; | 376 | | | 377 | 2.12M | if (*src == ':') { | 378 | 61.1k | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed | 379 | 159 | return clear_dst(); | 380 | 60.9k | zptr = iter; | 381 | 60.9k | ++src; | 382 | 60.9k | if (!eof() && *src == ':') { | 383 | | /// more than one all-zeroes block is not allowed | 384 | 10 | return clear_dst(); | 385 | 10 | } | 386 | 60.9k | continue; | 387 | 60.9k | } | 388 | 2.06M | if (groups == 0) /// leading colon is not allowed | 389 | 0 | return clear_dst(); | 390 | 2.06M | } | 391 | | | 392 | | /// mixed IPv4 parsing | 393 | 2.43M | if (*src == '.') { | 394 | 991 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first | 395 | 598 | return clear_dst(); | 396 | | | 397 | 393 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group | 398 | 0 | return clear_dst(); | 399 | | | 400 | 393 | ++src; | 401 | 393 | if (eof()) return clear_dst(); | 402 | | | 403 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 | 404 | 393 | --groups; | 405 | 393 | iter -= 2; | 406 | | | 407 | 393 | UInt16 num = 0; | 408 | 1.17k | for (int i = 0; i < 2; ++i) { | 409 | 786 | unsigned char first = (iter[i] >> 4) & 0x0fu; | 410 | 786 | unsigned char second = iter[i] & 0x0fu; | 411 | 786 | if (first > 9 || second > 9) return clear_dst(); | 412 | 786 | (num *= 100) += first * 10 + second; | 413 | 786 | } | 414 | 393 | if (num > 255) return clear_dst(); | 415 | | | 416 | | /// parse IPv4 with known first octet | 417 | 393 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); | 418 | | | 419 | | if constexpr (std::endian::native == std::endian::little) | 420 | 389 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); | 421 | | | 422 | 389 | iter += 4; | 423 | 389 | groups += 2; | 424 | 389 | break; /// IPv4 block is the last - end of parsing | 425 | 393 | } | 426 | | | 427 | 2.43M | if (!group_start) /// end of parsing | 428 | 823 | break; | 429 | 2.43M | group_start = false; | 430 | | | 431 | 2.43M | UInt16 val = 0; /// current decoded group | 432 | 2.43M | int xdigits = 0; /// number of decoded hex digits in current group | 433 | | | 434 | 11.1M | for (; !eof() && xdigits < 4; ++src, ++xdigits) { | 435 | 9.06M | UInt8 num = unhex(*src); | 436 | 9.06M | if (num == 0xFF) break; | 437 | 8.69M | (val <<= 4) |= num; | 438 | 8.69M | } | 439 | | | 440 | 2.43M | if (xdigits == 0) /// end of parsing | 441 | 2.13k | break; | 442 | | | 443 | 2.42M | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); | 444 | 2.42M | *iter++ = static_cast<unsigned char>(val & 0xffu); | 445 | 2.42M | ++groups; | 446 | 2.42M | } | 447 | | | 448 | | /// either all 8 groups or all-zeroes block should be present | 449 | 337k | if (groups < 8 && zptr == nullptr) return clear_dst(); | 450 | | | 451 | | /// process all-zeroes block | 452 | 330k | if (zptr != nullptr) { | 453 | 60.7k | if (groups == 8) { | 454 | | /// all-zeroes block at least should be one | 455 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid | 456 | 4 | return clear_dst(); | 457 | 4 | } | 458 | 60.7k | size_t msize = iter - zptr; | 459 | 60.7k | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); | 460 | 60.7k | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); | 461 | 60.7k | } | 462 | | | 463 | | /// the current function logic is processed in big endian manner | 464 | | /// but ipv6 in doris is stored in little-endian byte order | 465 | | /// so transfer to little-endian | 466 | 330k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); | 467 | | | 468 | 330k | return true; | 469 | 330k | } |
_ZN5doris10parse_ipv6IKcZNS_10parse_ipv6EPS1_PhEUlvE_Qsr3std7is_sameINSt9remove_cvIT_E4typeEcEE5valueEEbRPS6_T0_S3_i Line | Count | Source | 342 | 1.87k | inline bool parse_ipv6(T*& src, EOFfunction eof, unsigned char* dst, int32_t first_block = -1) { | 343 | 1.87k | const auto clear_dst = [dst]() { | 344 | 1.87k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 345 | 1.87k | return false; | 346 | 1.87k | }; | 347 | | | 348 | 1.87k | if (src == nullptr || eof()) return clear_dst(); | 349 | | | 350 | 1.87k | int groups = 0; /// number of parsed groups | 351 | 1.87k | unsigned char* iter = dst; /// iterator over dst buffer | 352 | 1.87k | unsigned char* zptr = | 353 | 1.87k | nullptr; /// pointer into dst buffer array where all-zeroes block ("::") is started | 354 | | | 355 | 1.87k | std::memset(dst, '\0', IPV6_BINARY_LENGTH); | 356 | | | 357 | 1.87k | if (first_block >= 0) { | 358 | 0 | *iter++ = static_cast<unsigned char>((first_block >> 8) & 0xffu); | 359 | 0 | *iter++ = static_cast<unsigned char>(first_block & 0xffu); | 360 | 0 | if (*src == ':') { | 361 | 0 | zptr = iter; | 362 | 0 | ++src; | 363 | 0 | } | 364 | 0 | ++groups; | 365 | 0 | } | 366 | | | 367 | 1.87k | bool group_start = true; | 368 | | | 369 | 6.09k | while (!eof() && groups < 8) { | 370 | 5.99k | if (*src == ':') { | 371 | 2.41k | ++src; | 372 | 2.41k | if (eof()) /// trailing colon is not allowed | 373 | 0 | return clear_dst(); | 374 | | | 375 | 2.41k | group_start = true; | 376 | | | 377 | 2.41k | if (*src == ':') { | 378 | 948 | if (zptr != nullptr) /// multiple all-zeroes blocks are not allowed | 379 | 0 | return clear_dst(); | 380 | 948 | zptr = iter; | 381 | 948 | ++src; | 382 | 948 | if (!eof() && *src == ':') { | 383 | | /// more than one all-zeroes block is not allowed | 384 | 0 | return clear_dst(); | 385 | 0 | } | 386 | 948 | continue; | 387 | 948 | } | 388 | 1.46k | if (groups == 0) /// leading colon is not allowed | 389 | 0 | return clear_dst(); | 390 | 1.46k | } | 391 | | | 392 | | /// mixed IPv4 parsing | 393 | 5.04k | if (*src == '.') { | 394 | 68 | if (groups <= 1 && zptr == nullptr) /// IPv4 block can't be the first | 395 | 0 | return clear_dst(); | 396 | | | 397 | 68 | if (group_start) /// first octet of IPv4 should be already parsed as an IPv6 group | 398 | 0 | return clear_dst(); | 399 | | | 400 | 68 | ++src; | 401 | 68 | if (eof()) return clear_dst(); | 402 | | | 403 | | /// last parsed group should be reinterpreted as a decimal value - it's the first octet of IPv4 | 404 | 68 | --groups; | 405 | 68 | iter -= 2; | 406 | | | 407 | 68 | UInt16 num = 0; | 408 | 204 | for (int i = 0; i < 2; ++i) { | 409 | 136 | unsigned char first = (iter[i] >> 4) & 0x0fu; | 410 | 136 | unsigned char second = iter[i] & 0x0fu; | 411 | 136 | if (first > 9 || second > 9) return clear_dst(); | 412 | 136 | (num *= 100) += first * 10 + second; | 413 | 136 | } | 414 | 68 | if (num > 255) return clear_dst(); | 415 | | | 416 | | /// parse IPv4 with known first octet | 417 | 68 | if (!parse_ipv4(src, eof, iter, num)) return clear_dst(); | 418 | | | 419 | | if constexpr (std::endian::native == std::endian::little) | 420 | 68 | std::reverse(iter, iter + IPV4_BINARY_LENGTH); | 421 | | | 422 | 68 | iter += 4; | 423 | 68 | groups += 2; | 424 | 68 | break; /// IPv4 block is the last - end of parsing | 425 | 68 | } | 426 | | | 427 | 4.97k | if (!group_start) /// end of parsing | 428 | 793 | break; | 429 | 4.18k | group_start = false; | 430 | | | 431 | 4.18k | UInt16 val = 0; /// current decoded group | 432 | 4.18k | int xdigits = 0; /// number of decoded hex digits in current group | 433 | | | 434 | 14.7k | for (; !eof() && xdigits < 4; ++src, ++xdigits) { | 435 | 12.9k | UInt8 num = unhex(*src); | 436 | 12.9k | if (num == 0xFF) break; | 437 | 10.6k | (val <<= 4) |= num; | 438 | 10.6k | } | 439 | | | 440 | 4.18k | if (xdigits == 0) /// end of parsing | 441 | 907 | break; | 442 | | | 443 | 3.27k | *iter++ = static_cast<unsigned char>((val >> 8) & 0xffu); | 444 | 3.27k | *iter++ = static_cast<unsigned char>(val & 0xffu); | 445 | 3.27k | ++groups; | 446 | 3.27k | } | 447 | | | 448 | | /// either all 8 groups or all-zeroes block should be present | 449 | 1.87k | if (groups < 8 && zptr == nullptr) return clear_dst(); | 450 | | | 451 | | /// process all-zeroes block | 452 | 1.05k | if (zptr != nullptr) { | 453 | 948 | if (groups == 8) { | 454 | | /// all-zeroes block at least should be one | 455 | | /// 2001:0db8:86a3::08d3:1319:8a2e:0370:7344 not valid | 456 | 0 | return clear_dst(); | 457 | 0 | } | 458 | 948 | size_t msize = iter - zptr; | 459 | 948 | std::memmove(dst + IPV6_BINARY_LENGTH - msize, zptr, msize); | 460 | 948 | std::memset(zptr, '\0', IPV6_BINARY_LENGTH - (iter - dst)); | 461 | 948 | } | 462 | | | 463 | | /// the current function logic is processed in big endian manner | 464 | | /// but ipv6 in doris is stored in little-endian byte order | 465 | | /// so transfer to little-endian | 466 | 1.05k | std::reverse(dst, dst + IPV6_BINARY_LENGTH); | 467 | | | 468 | 1.05k | return true; | 469 | 1.05k | } |
|
470 | | |
471 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
472 | 338k | inline const char* parse_ipv6(const char* src, const char* end, unsigned char* dst) { |
473 | 338k | if (parse_ipv6( |
474 | 16.4M | src, [&src, end]() { return src == end; }, dst)) |
475 | 330k | return src; |
476 | 7.26k | return nullptr; |
477 | 338k | } |
478 | | |
479 | | /// returns true if whole buffer was parsed successfully |
480 | 338k | inline bool parse_ipv6_whole(const char* src, const char* end, unsigned char* dst) { |
481 | 338k | return parse_ipv6(src, end, dst) == end; |
482 | 338k | } |
483 | | |
484 | | /// returns pointer to the right after parsed sequence or null on failed parsing |
485 | 1.87k | inline const char* parse_ipv6(const char* src, unsigned char* dst) { |
486 | 1.87k | if (parse_ipv6( |
487 | 1.87k | src, []() { return false; }, dst)) |
488 | 1.05k | return src; |
489 | 820 | return nullptr; |
490 | 1.87k | } |
491 | | |
492 | | /// returns true if whole null-terminated string was parsed successfully |
493 | 1.87k | inline bool parse_ipv6_whole(const char* src, unsigned char* dst) { |
494 | 1.87k | const char* end = parse_ipv6(src, dst); |
495 | 1.87k | return end != nullptr && *end == '\0'; |
496 | 1.87k | } |
497 | | |
498 | | #include "common/compile_check_end.h" |
499 | | } // namespace doris |