/root/doris/be/src/geo/wkb_parse.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "wkb_parse.h" |
19 | | |
20 | | #include <cstddef> |
21 | | #include <istream> |
22 | | #include <sstream> |
23 | | #include <vector> |
24 | | |
25 | | #include "geo/ByteOrderDataInStream.h" |
26 | | #include "geo/ByteOrderValues.h" |
27 | | #include "geo/geo_types.h" |
28 | | #include "geo/wkb_parse_ctx.h" |
29 | | #include "geo_tobinary_type.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | 0 | unsigned char ASCIIHexToUChar(char val) { |
34 | 0 | switch (val) { |
35 | 0 | case '0': |
36 | 0 | return 0; |
37 | 0 | case '1': |
38 | 0 | return 1; |
39 | 0 | case '2': |
40 | 0 | return 2; |
41 | 0 | case '3': |
42 | 0 | return 3; |
43 | 0 | case '4': |
44 | 0 | return 4; |
45 | 0 | case '5': |
46 | 0 | return 5; |
47 | 0 | case '6': |
48 | 0 | return 6; |
49 | 0 | case '7': |
50 | 0 | return 7; |
51 | 0 | case '8': |
52 | 0 | return 8; |
53 | 0 | case '9': |
54 | 0 | return 9; |
55 | 0 | case 'A': |
56 | 0 | case 'a': |
57 | 0 | return 10; |
58 | 0 | case 'B': |
59 | 0 | case 'b': |
60 | 0 | return 11; |
61 | 0 | case 'C': |
62 | 0 | case 'c': |
63 | 0 | return 12; |
64 | 0 | case 'D': |
65 | 0 | case 'd': |
66 | 0 | return 13; |
67 | 0 | case 'E': |
68 | 0 | case 'e': |
69 | 0 | return 14; |
70 | 0 | case 'F': |
71 | 0 | case 'f': |
72 | 0 | return 15; |
73 | 0 | default: |
74 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | 0 | GeoParseStatus WkbParse::parse_wkb(std::istream& is, GeoShape** shape) { |
79 | 0 | WkbParseContext ctx; |
80 | |
|
81 | 0 | ctx = *(WkbParse::read_hex(is, &ctx)); |
82 | 0 | if (ctx.parse_status == GEO_PARSE_OK) { |
83 | 0 | *shape = ctx.shape; |
84 | 0 | } else { |
85 | 0 | ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR; |
86 | 0 | } |
87 | 0 | return ctx.parse_status; |
88 | 0 | } |
89 | | |
90 | 0 | WkbParseContext* WkbParse::read_hex(std::istream& is, WkbParseContext* ctx) { |
91 | | // setup input/output stream |
92 | 0 | std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out); |
93 | |
|
94 | 0 | while (true) { |
95 | 0 | const int input_high = is.get(); |
96 | 0 | if (input_high == std::char_traits<char>::eof()) { |
97 | 0 | break; |
98 | 0 | } |
99 | | |
100 | 0 | const int input_low = is.get(); |
101 | 0 | if (input_low == std::char_traits<char>::eof()) { |
102 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
103 | 0 | return ctx; |
104 | 0 | } |
105 | | |
106 | 0 | const char high = static_cast<char>(input_high); |
107 | 0 | const char low = static_cast<char>(input_low); |
108 | |
|
109 | 0 | const unsigned char result_high = ASCIIHexToUChar(high); |
110 | 0 | const unsigned char result_low = ASCIIHexToUChar(low); |
111 | |
|
112 | 0 | const unsigned char value = static_cast<unsigned char>((result_high << 4) + result_low); |
113 | | |
114 | | // write the value to the output stream |
115 | 0 | os << value; |
116 | 0 | } |
117 | 0 | return WkbParse::read(os, ctx); |
118 | 0 | } |
119 | | |
120 | 0 | WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { |
121 | 0 | is.seekg(0, std::ios::end); |
122 | 0 | auto size = is.tellg(); |
123 | 0 | is.seekg(0, std::ios::beg); |
124 | | |
125 | | // Check if size is valid |
126 | 0 | if (size <= 0) { |
127 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
128 | 0 | return ctx; |
129 | 0 | } |
130 | | |
131 | 0 | std::vector<unsigned char> buf(static_cast<size_t>(size)); |
132 | 0 | if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) { |
133 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
134 | 0 | return ctx; |
135 | 0 | } |
136 | | |
137 | | // Ensure we have at least one byte for byte order |
138 | 0 | if (buf.empty()) { |
139 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
140 | 0 | return ctx; |
141 | 0 | } |
142 | | |
143 | | // First read the byte order using machine endian |
144 | 0 | auto byteOrder = buf[0]; |
145 | | |
146 | | // Create ByteOrderDataInStream with the correct byte order |
147 | 0 | if (byteOrder == byteOrder::wkbNDR) { |
148 | 0 | ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); |
149 | 0 | ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); |
150 | 0 | } else if (byteOrder == byteOrder::wkbXDR) { |
151 | 0 | ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); |
152 | 0 | ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); |
153 | 0 | } else { |
154 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
155 | 0 | return ctx; |
156 | 0 | } |
157 | | |
158 | 0 | std::unique_ptr<GeoShape> shape = readGeometry(ctx); |
159 | 0 | if (!shape) { |
160 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
161 | 0 | return ctx; |
162 | 0 | } |
163 | | |
164 | 0 | ctx->shape = shape.release(); |
165 | 0 | return ctx; |
166 | 0 | } |
167 | | |
168 | 0 | std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) { |
169 | 0 | try { |
170 | | // Ensure we have enough data to read |
171 | 0 | if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type |
172 | 0 | return nullptr; |
173 | 0 | } |
174 | | |
175 | | // Skip the byte order as we've already handled it |
176 | 0 | ctx->dis.readByte(); |
177 | |
|
178 | 0 | uint32_t typeInt = ctx->dis.readUnsigned(); |
179 | | |
180 | | // Check if geometry has SRID |
181 | 0 | bool has_srid = (typeInt & WKB_SRID_FLAG) != 0; |
182 | | |
183 | | // Read SRID if present |
184 | 0 | if (has_srid) { |
185 | 0 | ctx->dis.readUnsigned(); // Read and store SRID if needed |
186 | 0 | } |
187 | | |
188 | | // Get the base geometry type |
189 | 0 | uint32_t geometryType = typeInt & WKB_TYPE_MASK; |
190 | |
|
191 | 0 | std::unique_ptr<GeoShape> shape; |
192 | |
|
193 | 0 | switch (geometryType) { |
194 | 0 | case wkbType::wkbPoint: |
195 | 0 | shape = readPoint(ctx); |
196 | 0 | break; |
197 | 0 | case wkbType::wkbLine: |
198 | 0 | shape = readLine(ctx); |
199 | 0 | break; |
200 | 0 | case wkbType::wkbPolygon: |
201 | 0 | shape = readPolygon(ctx); |
202 | 0 | break; |
203 | 0 | default: |
204 | 0 | return nullptr; |
205 | 0 | } |
206 | | |
207 | 0 | return shape; |
208 | 0 | } catch (...) { |
209 | | // Handle any exceptions from reading operations |
210 | 0 | return nullptr; |
211 | 0 | } |
212 | 0 | } |
213 | | |
214 | 0 | std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) { |
215 | 0 | GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); |
216 | 0 | if (coords.list.empty()) { |
217 | 0 | return nullptr; |
218 | 0 | } |
219 | | |
220 | 0 | std::unique_ptr<GeoPoint> point = GeoPoint::create_unique(); |
221 | 0 | if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) { |
222 | 0 | return nullptr; |
223 | 0 | } |
224 | | |
225 | 0 | return point; |
226 | 0 | } |
227 | | |
228 | 0 | std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) { |
229 | 0 | uint32_t size = ctx->dis.readUnsigned(); |
230 | 0 | if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) { |
231 | 0 | return nullptr; |
232 | 0 | } |
233 | | |
234 | 0 | GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); |
235 | 0 | if (coords.list.empty()) { |
236 | 0 | return nullptr; |
237 | 0 | } |
238 | | |
239 | 0 | std::unique_ptr<GeoLine> line = GeoLine::create_unique(); |
240 | 0 | if (!line || line->from_coords(coords) != GEO_PARSE_OK) { |
241 | 0 | return nullptr; |
242 | 0 | } |
243 | | |
244 | 0 | return line; |
245 | 0 | } |
246 | | |
247 | 0 | std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) { |
248 | 0 | uint32_t num_loops = ctx->dis.readUnsigned(); |
249 | 0 | if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) { |
250 | 0 | return nullptr; |
251 | 0 | } |
252 | | |
253 | 0 | GeoCoordinateListList coordss; |
254 | 0 | for (uint32_t i = 0; i < num_loops; ++i) { |
255 | 0 | uint32_t size = ctx->dis.readUnsigned(); |
256 | 0 | if (size < 3) { // A polygon loop must have at least 3 points |
257 | 0 | return nullptr; |
258 | 0 | } |
259 | | |
260 | 0 | auto coords = std::make_unique<GeoCoordinateList>(); |
261 | 0 | *coords = WkbParse::readCoordinateList(size, ctx); |
262 | 0 | if (coords->list.empty()) { |
263 | 0 | return nullptr; |
264 | 0 | } |
265 | 0 | coordss.add(coords.release()); |
266 | 0 | } |
267 | | |
268 | 0 | std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique(); |
269 | 0 | if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) { |
270 | 0 | return nullptr; |
271 | 0 | } |
272 | | |
273 | 0 | return polygon; |
274 | 0 | } |
275 | | |
276 | 0 | GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { |
277 | 0 | GeoCoordinateList coords; |
278 | 0 | for (uint32_t i = 0; i < size; i++) { |
279 | 0 | if (!readCoordinate(ctx)) { |
280 | 0 | return GeoCoordinateList(); |
281 | 0 | } |
282 | 0 | unsigned int j = 0; |
283 | 0 | GeoCoordinate coord; |
284 | 0 | coord.x = ctx->ordValues[j++]; |
285 | 0 | coord.y = ctx->ordValues[j++]; |
286 | 0 | coords.add(coord); |
287 | 0 | } |
288 | 0 | return coords; |
289 | 0 | } |
290 | | |
291 | 0 | GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext* ctx) { |
292 | 0 | uint64_t minSize = 0; |
293 | 0 | constexpr uint64_t minCoordSize = 2 * sizeof(double); |
294 | | //constexpr uint64_t minPtSize = (1+4) + minCoordSize; |
295 | | //constexpr uint64_t minLineSize = (1+4+4); // empty line |
296 | 0 | constexpr uint64_t minLoopSize = 4; // empty loop |
297 | | //constexpr uint64_t minPolySize = (1+4+4); // empty polygon |
298 | | //constexpr uint64_t minGeomSize = minLineSize; |
299 | |
|
300 | 0 | switch (wkbType) { |
301 | 0 | case wkbLine: |
302 | 0 | minSize = size * minCoordSize; |
303 | 0 | break; |
304 | 0 | case wkbPolygon: |
305 | 0 | minSize = size * minLoopSize; |
306 | 0 | break; |
307 | 0 | } |
308 | 0 | if (ctx->dis.size() < minSize) { |
309 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
310 | 0 | } |
311 | 0 | return GEO_PARSE_OK; |
312 | 0 | } |
313 | 0 | bool WkbParse::readCoordinate(WkbParseContext* ctx) { |
314 | 0 | for (std::size_t i = 0; i < ctx->inputDimension; ++i) { |
315 | 0 | ctx->ordValues[i] = ctx->dis.readDouble(); |
316 | 0 | } |
317 | |
|
318 | 0 | return true; |
319 | 0 | } |
320 | | |
321 | | } // namespace doris |