/root/doris/be/src/geo/wkb_parse.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "wkb_parse.h" |
19 | | |
20 | | #include <cstddef> |
21 | | #include <istream> |
22 | | #include <sstream> |
23 | | #include <vector> |
24 | | |
25 | | #include "geo/ByteOrderDataInStream.h" |
26 | | #include "geo/ByteOrderValues.h" |
27 | | #include "geo/geo_types.h" |
28 | | #include "geo/wkb_parse_ctx.h" |
29 | | #include "geo_tobinary_type.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | 0 | unsigned char ASCIIHexToUChar(char val) { |
34 | 0 | switch (val) { |
35 | 0 | case '0': |
36 | 0 | return 0; |
37 | 0 | case '1': |
38 | 0 | return 1; |
39 | 0 | case '2': |
40 | 0 | return 2; |
41 | 0 | case '3': |
42 | 0 | return 3; |
43 | 0 | case '4': |
44 | 0 | return 4; |
45 | 0 | case '5': |
46 | 0 | return 5; |
47 | 0 | case '6': |
48 | 0 | return 6; |
49 | 0 | case '7': |
50 | 0 | return 7; |
51 | 0 | case '8': |
52 | 0 | return 8; |
53 | 0 | case '9': |
54 | 0 | return 9; |
55 | 0 | case 'A': |
56 | 0 | case 'a': |
57 | 0 | return 10; |
58 | 0 | case 'B': |
59 | 0 | case 'b': |
60 | 0 | return 11; |
61 | 0 | case 'C': |
62 | 0 | case 'c': |
63 | 0 | return 12; |
64 | 0 | case 'D': |
65 | 0 | case 'd': |
66 | 0 | return 13; |
67 | 0 | case 'E': |
68 | 0 | case 'e': |
69 | 0 | return 14; |
70 | 0 | case 'F': |
71 | 0 | case 'f': |
72 | 0 | return 15; |
73 | 0 | default: |
74 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | 0 | GeoParseStatus WkbParse::parse_wkb(std::istream& is, GeoShape** shape) { |
79 | 0 | WkbParseContext ctx; |
80 | |
|
81 | 0 | ctx = *(WkbParse::read_hex(is, &ctx)); |
82 | 0 | if (ctx.parse_status == GEO_PARSE_OK) { |
83 | 0 | *shape = ctx.shape; |
84 | 0 | } else { |
85 | 0 | ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR; |
86 | 0 | } |
87 | 0 | return ctx.parse_status; |
88 | 0 | } |
89 | | |
90 | 0 | WkbParseContext* WkbParse::read_hex(std::istream& is, WkbParseContext* ctx) { |
91 | | // setup input/output stream |
92 | 0 | std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out); |
93 | |
|
94 | 0 | while (true) { |
95 | 0 | const int input_high = is.get(); |
96 | 0 | if (input_high == std::char_traits<char>::eof()) { |
97 | 0 | break; |
98 | 0 | } |
99 | | |
100 | 0 | const int input_low = is.get(); |
101 | 0 | if (input_low == std::char_traits<char>::eof()) { |
102 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
103 | 0 | return ctx; |
104 | 0 | } |
105 | | |
106 | 0 | const char high = static_cast<char>(input_high); |
107 | 0 | const char low = static_cast<char>(input_low); |
108 | |
|
109 | 0 | const unsigned char result_high = ASCIIHexToUChar(high); |
110 | 0 | const unsigned char result_low = ASCIIHexToUChar(low); |
111 | |
|
112 | 0 | const unsigned char value = static_cast<unsigned char>((result_high << 4) + result_low); |
113 | | |
114 | | // write the value to the output stream |
115 | 0 | os << value; |
116 | 0 | } |
117 | 0 | return WkbParse::read(os, ctx); |
118 | 0 | } |
119 | | |
120 | 0 | WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) { |
121 | 0 | is.seekg(0, std::ios::end); |
122 | 0 | auto size = is.tellg(); |
123 | 0 | is.seekg(0, std::ios::beg); |
124 | |
|
125 | 0 | std::vector<unsigned char> buf(static_cast<size_t>(size)); |
126 | 0 | is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size)); |
127 | |
|
128 | 0 | ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian |
129 | |
|
130 | 0 | ctx->shape = readGeometry(ctx).release(); |
131 | |
|
132 | 0 | if (!ctx->shape) { |
133 | 0 | ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR; |
134 | 0 | } |
135 | 0 | return ctx; |
136 | 0 | } |
137 | | |
138 | 0 | std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) { |
139 | | // determine byte order |
140 | 0 | unsigned char byteOrder = ctx->dis.readByte(); |
141 | | |
142 | | // default is machine endian |
143 | 0 | if (byteOrder == byteOrder::wkbNDR) { |
144 | 0 | ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE); |
145 | 0 | } else if (byteOrder == byteOrder::wkbXDR) { |
146 | 0 | ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG); |
147 | 0 | } |
148 | |
|
149 | 0 | uint32_t typeInt = ctx->dis.readUnsigned(); |
150 | |
|
151 | 0 | uint32_t geometryType = (typeInt & 0xffff) % 1000; |
152 | |
|
153 | 0 | std::unique_ptr<GeoShape> shape; |
154 | |
|
155 | 0 | switch (geometryType) { |
156 | 0 | case wkbType::wkbPoint: |
157 | 0 | shape.reset(readPoint(ctx).release()); |
158 | 0 | break; |
159 | 0 | case wkbType::wkbLine: |
160 | 0 | shape.reset(readLine(ctx).release()); |
161 | 0 | break; |
162 | 0 | case wkbType::wkbPolygon: |
163 | 0 | shape.reset(readPolygon(ctx).release()); |
164 | 0 | break; |
165 | 0 | default: |
166 | 0 | return nullptr; |
167 | 0 | } |
168 | 0 | return shape; |
169 | 0 | } |
170 | | |
171 | 0 | std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) { |
172 | 0 | GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx); |
173 | 0 | std::unique_ptr<GeoPoint> point = GeoPoint::create_unique(); |
174 | |
|
175 | 0 | if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) { |
176 | 0 | return point; |
177 | 0 | } else { |
178 | 0 | return nullptr; |
179 | 0 | } |
180 | 0 | } |
181 | | |
182 | 0 | std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) { |
183 | 0 | uint32_t size = ctx->dis.readUnsigned(); |
184 | 0 | minMemSize(wkbLine, size, ctx); |
185 | |
|
186 | 0 | GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx); |
187 | 0 | std::unique_ptr<GeoLine> line = GeoLine::create_unique(); |
188 | |
|
189 | 0 | if (line->from_coords(coords) == GEO_PARSE_OK) { |
190 | 0 | return line; |
191 | 0 | } else { |
192 | 0 | return nullptr; |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | 0 | std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) { |
197 | 0 | uint32_t num_loops = ctx->dis.readUnsigned(); |
198 | 0 | minMemSize(wkbPolygon, num_loops, ctx); |
199 | 0 | GeoCoordinateListList coordss; |
200 | 0 | for (int i = 0; i < num_loops; ++i) { |
201 | 0 | uint32_t size = ctx->dis.readUnsigned(); |
202 | 0 | GeoCoordinateList* coords = new GeoCoordinateList(); |
203 | 0 | *coords = WkbParse::readCoordinateList(size, ctx); |
204 | 0 | coordss.add(coords); |
205 | 0 | } |
206 | |
|
207 | 0 | std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique(); |
208 | |
|
209 | 0 | if (polygon->from_coords(coordss) == GEO_PARSE_OK) { |
210 | 0 | return polygon; |
211 | 0 | } else { |
212 | 0 | return nullptr; |
213 | 0 | } |
214 | 0 | } |
215 | | |
216 | 0 | GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) { |
217 | 0 | GeoCoordinateList coords; |
218 | 0 | for (uint32_t i = 0; i < size; i++) { |
219 | 0 | readCoordinate(ctx); |
220 | 0 | unsigned int j = 0; |
221 | 0 | GeoCoordinate coord; |
222 | 0 | coord.x = ctx->ordValues[j++]; |
223 | 0 | coord.y = ctx->ordValues[j++]; |
224 | 0 | coords.add(coord); |
225 | 0 | } |
226 | |
|
227 | 0 | return coords; |
228 | 0 | } |
229 | | |
230 | 0 | GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext* ctx) { |
231 | 0 | uint64_t minSize = 0; |
232 | 0 | constexpr uint64_t minCoordSize = 2 * sizeof(double); |
233 | | //constexpr uint64_t minPtSize = (1+4) + minCoordSize; |
234 | | //constexpr uint64_t minLineSize = (1+4+4); // empty line |
235 | 0 | constexpr uint64_t minLoopSize = 4; // empty loop |
236 | | //constexpr uint64_t minPolySize = (1+4+4); // empty polygon |
237 | | //constexpr uint64_t minGeomSize = minLineSize; |
238 | |
|
239 | 0 | switch (wkbType) { |
240 | 0 | case wkbLine: |
241 | 0 | minSize = size * minCoordSize; |
242 | 0 | break; |
243 | 0 | case wkbPolygon: |
244 | 0 | minSize = size * minLoopSize; |
245 | 0 | break; |
246 | 0 | } |
247 | 0 | if (ctx->dis.size() < minSize) { |
248 | 0 | return GEO_PARSE_WKB_SYNTAX_ERROR; |
249 | 0 | } |
250 | 0 | return GEO_PARSE_OK; |
251 | 0 | } |
252 | 0 | bool WkbParse::readCoordinate(WkbParseContext* ctx) { |
253 | 0 | for (std::size_t i = 0; i < ctx->inputDimension; ++i) { |
254 | 0 | ctx->ordValues[i] = ctx->dis.readDouble(); |
255 | 0 | } |
256 | |
|
257 | 0 | return true; |
258 | 0 | } |
259 | | |
260 | | } // namespace doris |