/root/doris/be/src/geo/wkb_parse.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "wkb_parse.h" |
19 | | |
20 | | #include <cstddef> |
21 | | #include <istream> |
22 | | #include <sstream> |
23 | | #include <utility> |
24 | | #include <vector> |
25 | | |
26 | | #include "geo/ByteOrderDataInStream.h" |
27 | | #include "geo/ByteOrderValues.h" |
28 | | #include "geo/geo_types.h" |
29 | | #include "geo/wkb_parse_ctx.h" |
30 | | #include "geo_tobinary_type.h" |
31 | | |
32 | | namespace doris { |
33 | | |
34 | 684 | unsigned char ASCIIHexToUChar(char val) { |
35 | 684 | switch (val) { |
36 | 585 | case '0': |
37 | 585 | return 0; |
38 | 21 | case '1': |
39 | 21 | return 1; |
40 | 6 | case '2': |
41 | 6 | return 2; |
42 | 15 | case '3': |
43 | 15 | return 3; |
44 | 17 | case '4': |
45 | 17 | return 4; |
46 | 0 | case '5': |
47 | 0 | return 5; |
48 | 1 | case '6': |
49 | 1 | return 6; |
50 | 0 | case '7': |
51 | 0 | return 7; |
52 | 2 | case '8': |
53 | 2 | return 8; |
54 | 0 | case '9': |
55 | 0 | return 9; |
56 | 2 | case 'A': |
57 | 2 | case 'a': |
58 | 2 | return 10; |
59 | 0 | case 'B': |
60 | 0 | case 'b': |
61 | 0 | return 11; |
62 | 0 | case 'C': |
63 | 0 | case 'c': |
64 | 0 | return 12; |
65 | 0 | case 'D': |
66 | 0 | case 'd': |
67 | 0 | return 13; |
68 | 2 | case 'E': |
69 | 2 | case 'e': |
70 | 2 | return 14; |
71 | 33 | case 'F': |
72 | 33 | case 'f': |
73 | 33 | return 15; |
74 | 0 | default: |
75 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
76 | 684 | } |
77 | 684 | } |
78 | | |
79 | 16 | GeoParseStatus WkbParse::parse_wkb(std::istream& is, std::unique_ptr<GeoShape>& shape) { |
80 | 16 | WkbParseContext ctx; |
81 | | |
82 | 16 | WkbParse::read_hex(is, ctx); |
83 | 16 | if (ctx.parse_status == GEO_PARSE_OK) { |
84 | 4 | shape = std::move(ctx.shape); |
85 | 4 | } |
86 | 16 | return ctx.parse_status; |
87 | 16 | } |
88 | | |
89 | 16 | void WkbParse::read_hex(std::istream& is, WkbParseContext& ctx) { |
90 | | // setup input/output stream |
91 | 16 | std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out); |
92 | | |
93 | 358 | while (true) { |
94 | 358 | const int input_high = is.get(); |
95 | 358 | if (input_high == std::char_traits<char>::eof()) { |
96 | 15 | break; |
97 | 15 | } |
98 | | |
99 | 343 | const int input_low = is.get(); |
100 | 343 | if (input_low == std::char_traits<char>::eof()) { |
101 | 1 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
102 | 1 | return; |
103 | 1 | } |
104 | | |
105 | 342 | const char high = static_cast<char>(input_high); |
106 | 342 | const char low = static_cast<char>(input_low); |
107 | | |
108 | 342 | const unsigned char result_high = ASCIIHexToUChar(high); |
109 | 342 | const unsigned char result_low = ASCIIHexToUChar(low); |
110 | | |
111 | 342 | const auto value = static_cast<unsigned char>((result_high << 4) + result_low); |
112 | | |
113 | | // write the value to the output stream |
114 | 342 | os << value; |
115 | 342 | } |
116 | 15 | WkbParse::read(os, ctx); |
117 | 15 | } |
118 | | |
119 | 15 | void WkbParse::read(std::istream& is, WkbParseContext& ctx) { |
120 | 15 | is.seekg(0, std::ios::end); |
121 | 15 | auto size = is.tellg(); |
122 | 15 | is.seekg(0, std::ios::beg); |
123 | | |
124 | | // Check if size is valid |
125 | 15 | if (size <= 0) { |
126 | 1 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
127 | 1 | return; |
128 | 1 | } |
129 | | |
130 | 14 | std::vector<unsigned char> buf(static_cast<size_t>(size)); |
131 | 14 | if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) { |
132 | 0 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
133 | 0 | return; |
134 | 0 | } |
135 | | |
136 | | // Ensure we have at least one byte for byte order |
137 | 14 | if (buf.empty()) { |
138 | 0 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
139 | 0 | return; |
140 | 0 | } |
141 | | |
142 | | // First read the byte order using machine endian |
143 | 14 | auto byteOrder = buf[0]; |
144 | | |
145 | | // Create ByteOrderDataInStream with the correct byte order |
146 | 14 | if (byteOrder == byteOrder::wkbNDR) { |
147 | 6 | ctx.dis = ByteOrderDataInStream(buf.data(), buf.size()); |
148 | 6 | ctx.dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); |
149 | 8 | } else if (byteOrder == byteOrder::wkbXDR) { |
150 | 1 | ctx.dis = ByteOrderDataInStream(buf.data(), buf.size()); |
151 | 1 | ctx.dis.setOrder(ByteOrderValues::ENDIAN_BIG); |
152 | 7 | } else { |
153 | 7 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
154 | 7 | return; |
155 | 7 | } |
156 | | |
157 | 7 | std::unique_ptr<GeoShape> shape = readGeometry(ctx); |
158 | 7 | if (!shape) { |
159 | 3 | ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
160 | 3 | return; |
161 | 3 | } |
162 | | |
163 | 4 | ctx.shape = std::move(shape); |
164 | 4 | } |
165 | | |
166 | 7 | std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext& ctx) { |
167 | 7 | try { |
168 | | // Ensure we have enough data to read |
169 | 7 | if (ctx.dis.size() < 5) { // At least 1 byte for order and 4 bytes for type |
170 | 1 | return nullptr; |
171 | 1 | } |
172 | | |
173 | | // Skip the byte order as we've already handled it |
174 | 6 | ctx.dis.readByte(); |
175 | | |
176 | 6 | uint32_t typeInt = ctx.dis.readUnsigned(); |
177 | | |
178 | | // Check if geometry has SRID |
179 | 6 | bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; |
180 | | |
181 | | // Read SRID if present |
182 | 6 | if (has_srid) { |
183 | 1 | ctx.dis.readUnsigned(); // Read and store SRID if needed |
184 | 1 | } |
185 | | |
186 | | // Get the base geometry type |
187 | 6 | uint32_t geometryType = typeInt & WKB_TYPE_MASK; |
188 | | |
189 | 6 | std::unique_ptr<GeoShape> shape; |
190 | | |
191 | 6 | switch (geometryType) { |
192 | 3 | case wkbType::wkbPoint: |
193 | 3 | shape = readPoint(ctx); |
194 | 3 | break; |
195 | 1 | case wkbType::wkbLine: |
196 | 1 | shape = readLine(ctx); |
197 | 1 | break; |
198 | 1 | case wkbType::wkbPolygon: |
199 | 1 | shape = readPolygon(ctx); |
200 | 1 | break; |
201 | 1 | default: |
202 | 1 | return nullptr; |
203 | 6 | } |
204 | | |
205 | 5 | return shape; |
206 | 6 | } catch (...) { |
207 | | // Handle any exceptions from reading operations |
208 | 0 | return nullptr; |
209 | 0 | } |
210 | 7 | } |
211 | | |
212 | 3 | std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext& ctx) { |
213 | 3 | GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); |
214 | 3 | if (coords.list.empty()) { |
215 | 0 | return nullptr; |
216 | 0 | } |
217 | | |
218 | 3 | std::unique_ptr<GeoPoint> point = GeoPoint::create_unique(); |
219 | 3 | if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { |
220 | 0 | return nullptr; |
221 | 0 | } |
222 | | |
223 | 3 | return point; |
224 | 3 | } |
225 | | |
226 | 1 | std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext& ctx) { |
227 | 1 | uint32_t size = ctx.dis.readUnsigned(); |
228 | 1 | if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { |
229 | 0 | return nullptr; |
230 | 0 | } |
231 | | |
232 | 1 | GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); |
233 | 1 | if (coords.list.empty()) { |
234 | 0 | return nullptr; |
235 | 0 | } |
236 | | |
237 | 1 | std::unique_ptr<GeoLine> line = GeoLine::create_unique(); |
238 | 1 | if (!line || line->from_coords(coords) != GEO_PARSE_OK) { |
239 | 0 | return nullptr; |
240 | 0 | } |
241 | | |
242 | 1 | return line; |
243 | 1 | } |
244 | | |
245 | 1 | std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext& ctx) { |
246 | 1 | uint32_t num_loops = ctx.dis.readUnsigned(); |
247 | 1 | if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { |
248 | 0 | return nullptr; |
249 | 0 | } |
250 | | |
251 | 1 | GeoCoordinateListList coordss; |
252 | 1 | for (uint32_t i = 0; i < num_loops; ++i) { |
253 | 1 | uint32_t size = ctx.dis.readUnsigned(); |
254 | 1 | if (size < 3) { // A polygon loop must have at least 3 points |
255 | 1 | return nullptr; |
256 | 1 | } |
257 | | |
258 | 0 | auto coords = std::make_unique<GeoCoordinateList>(); |
259 | 0 | *coords = WkbParse::readCoordinateList(size, ctx); |
260 | 0 | if (coords->list.empty()) { |
261 | 0 | return nullptr; |
262 | 0 | } |
263 | 0 | coordss.add(std::move(coords)); |
264 | 0 | } |
265 | | |
266 | 0 | std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique(); |
267 | 0 | if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { |
268 | 0 | return nullptr; |
269 | 0 | } |
270 | | |
271 | 0 | return polygon; |
272 | 0 | } |
273 | | |
274 | 4 | GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext& ctx) { |
275 | 4 | GeoCoordinateList coords; |
276 | 9 | for (uint32_t i = 0; i < size; i++) { |
277 | 5 | if (!readCoordinate(ctx)) { |
278 | 0 | return GeoCoordinateList(); |
279 | 0 | } |
280 | 5 | unsigned int j = 0; |
281 | 5 | GeoCoordinate coord; |
282 | 5 | coord.x = ctx.ordValues[j++]; |
283 | 5 | coord.y = ctx.ordValues[j++]; |
284 | 5 | coords.add(coord); |
285 | 5 | } |
286 | 4 | return coords; |
287 | 4 | } |
288 | | |
289 | 2 | GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext& ctx) { |
290 | 2 | uint64_t minSize = 0; |
291 | 2 | constexpr uint64_t minCoordSize = 2 * sizeof(double); |
292 | | //constexpr uint64_t minPtSize = (1+4) + minCoordSize; |
293 | | //constexpr uint64_t minLineSize = (1+4+4); // empty line |
294 | 2 | constexpr uint64_t minLoopSize = 4; // empty loop |
295 | | //constexpr uint64_t minPolySize = (1+4+4); // empty polygon |
296 | | //constexpr uint64_t minGeomSize = minLineSize; |
297 | | |
298 | 2 | switch (wkbType) { |
299 | 1 | case wkbLine: |
300 | 1 | minSize = size * minCoordSize; |
301 | 1 | break; |
302 | 1 | case wkbPolygon: |
303 | 1 | minSize = size * minLoopSize; |
304 | 1 | break; |
305 | 2 | } |
306 | 2 | if (ctx.dis.size() < minSize) { |
307 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
308 | 0 | } |
309 | 2 | return GEO_PARSE_OK; |
310 | 2 | } |
311 | 5 | bool WkbParse::readCoordinate(WkbParseContext& ctx) { |
312 | 15 | for (std::size_t i = 0; i < ctx.inputDimension; ++i) { |
313 | 10 | ctx.ordValues[i] = ctx.dis.readDouble(); |
314 | 10 | } |
315 | | |
316 | 5 | return true; |
317 | 5 | } |
318 | | |
319 | | } // namespace doris |