Coverage Report

Created: 2025-12-04 21:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/geo/wkb_parse.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "wkb_parse.h"
19
20
#include <cstddef>
21
#include <istream>
22
#include <sstream>
23
#include <utility>
24
#include <vector>
25
26
#include "geo/ByteOrderDataInStream.h"
27
#include "geo/ByteOrderValues.h"
28
#include "geo/geo_types.h"
29
#include "geo/wkb_parse_ctx.h"
30
#include "geo_tobinary_type.h"
31
32
namespace doris {
33
34
684
unsigned char ASCIIHexToUChar(char val) {
35
684
    switch (val) {
36
585
    case '0':
37
585
        return 0;
38
21
    case '1':
39
21
        return 1;
40
6
    case '2':
41
6
        return 2;
42
15
    case '3':
43
15
        return 3;
44
17
    case '4':
45
17
        return 4;
46
0
    case '5':
47
0
        return 5;
48
1
    case '6':
49
1
        return 6;
50
0
    case '7':
51
0
        return 7;
52
2
    case '8':
53
2
        return 8;
54
0
    case '9':
55
0
        return 9;
56
2
    case 'A':
57
2
    case 'a':
58
2
        return 10;
59
0
    case 'B':
60
0
    case 'b':
61
0
        return 11;
62
0
    case 'C':
63
0
    case 'c':
64
0
        return 12;
65
0
    case 'D':
66
0
    case 'd':
67
0
        return 13;
68
2
    case 'E':
69
2
    case 'e':
70
2
        return 14;
71
33
    case 'F':
72
33
    case 'f':
73
33
        return 15;
74
0
    default:
75
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
76
684
    }
77
684
}
78
79
16
GeoParseStatus WkbParse::parse_wkb(std::istream& is, std::unique_ptr<GeoShape>& shape) {
80
16
    WkbParseContext ctx;
81
82
16
    WkbParse::read_hex(is, ctx);
83
16
    if (ctx.parse_status == GEO_PARSE_OK) {
84
4
        shape = std::move(ctx.shape);
85
4
    }
86
16
    return ctx.parse_status;
87
16
}
88
89
16
void WkbParse::read_hex(std::istream& is, WkbParseContext& ctx) {
90
    // setup input/output stream
91
16
    std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
92
93
358
    while (true) {
94
358
        const int input_high = is.get();
95
358
        if (input_high == std::char_traits<char>::eof()) {
96
15
            break;
97
15
        }
98
99
343
        const int input_low = is.get();
100
343
        if (input_low == std::char_traits<char>::eof()) {
101
1
            ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
102
1
            return;
103
1
        }
104
105
342
        const char high = static_cast<char>(input_high);
106
342
        const char low = static_cast<char>(input_low);
107
108
342
        const unsigned char result_high = ASCIIHexToUChar(high);
109
342
        const unsigned char result_low = ASCIIHexToUChar(low);
110
111
342
        const auto value = static_cast<unsigned char>((result_high << 4) + result_low);
112
113
        // write the value to the output stream
114
342
        os << value;
115
342
    }
116
15
    WkbParse::read(os, ctx);
117
15
}
118
119
15
void WkbParse::read(std::istream& is, WkbParseContext& ctx) {
120
15
    is.seekg(0, std::ios::end);
121
15
    auto size = is.tellg();
122
15
    is.seekg(0, std::ios::beg);
123
124
    // Check if size is valid
125
15
    if (size <= 0) {
126
1
        ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
127
1
        return;
128
1
    }
129
130
14
    std::vector<unsigned char> buf(static_cast<size_t>(size));
131
14
    if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) {
132
0
        ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
133
0
        return;
134
0
    }
135
136
    // Ensure we have at least one byte for byte order
137
14
    if (buf.empty()) {
138
0
        ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
139
0
        return;
140
0
    }
141
142
    // First read the byte order using machine endian
143
14
    auto byteOrder = buf[0];
144
145
    // Create ByteOrderDataInStream with the correct byte order
146
14
    if (byteOrder == byteOrder::wkbNDR) {
147
6
        ctx.dis = ByteOrderDataInStream(buf.data(), buf.size());
148
6
        ctx.dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
149
8
    } else if (byteOrder == byteOrder::wkbXDR) {
150
1
        ctx.dis = ByteOrderDataInStream(buf.data(), buf.size());
151
1
        ctx.dis.setOrder(ByteOrderValues::ENDIAN_BIG);
152
7
    } else {
153
7
        ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
154
7
        return;
155
7
    }
156
157
7
    std::unique_ptr<GeoShape> shape = readGeometry(ctx);
158
7
    if (!shape) {
159
3
        ctx.parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
160
3
        return;
161
3
    }
162
163
4
    ctx.shape = std::move(shape);
164
4
}
165
166
7
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext& ctx) {
167
7
    try {
168
        // Ensure we have enough data to read
169
7
        if (ctx.dis.size() < 5) { // At least 1 byte for order and 4 bytes for type
170
1
            return nullptr;
171
1
        }
172
173
        // Skip the byte order as we've already handled it
174
6
        ctx.dis.readByte();
175
176
6
        uint32_t typeInt = ctx.dis.readUnsigned();
177
178
        // Check if geometry has SRID
179
6
        bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
180
181
        // Read SRID if present
182
6
        if (has_srid) {
183
1
            ctx.dis.readUnsigned(); // Read and store SRID if needed
184
1
        }
185
186
        // Get the base geometry type
187
6
        uint32_t geometryType = typeInt & WKB_TYPE_MASK;
188
189
6
        std::unique_ptr<GeoShape> shape;
190
191
6
        switch (geometryType) {
192
3
        case wkbType::wkbPoint:
193
3
            shape = readPoint(ctx);
194
3
            break;
195
1
        case wkbType::wkbLine:
196
1
            shape = readLine(ctx);
197
1
            break;
198
1
        case wkbType::wkbPolygon:
199
1
            shape = readPolygon(ctx);
200
1
            break;
201
1
        default:
202
1
            return nullptr;
203
6
        }
204
205
5
        return shape;
206
6
    } catch (...) {
207
        // Handle any exceptions from reading operations
208
0
        return nullptr;
209
0
    }
210
7
}
211
212
3
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext& ctx) {
213
3
    GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
214
3
    if (coords.list.empty()) {
215
0
        return nullptr;
216
0
    }
217
218
3
    std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
219
3
    if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
220
0
        return nullptr;
221
0
    }
222
223
3
    return point;
224
3
}
225
226
1
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext& ctx) {
227
1
    uint32_t size = ctx.dis.readUnsigned();
228
1
    if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
229
0
        return nullptr;
230
0
    }
231
232
1
    GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
233
1
    if (coords.list.empty()) {
234
0
        return nullptr;
235
0
    }
236
237
1
    std::unique_ptr<GeoLine> line = GeoLine::create_unique();
238
1
    if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
239
0
        return nullptr;
240
0
    }
241
242
1
    return line;
243
1
}
244
245
1
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext& ctx) {
246
1
    uint32_t num_loops = ctx.dis.readUnsigned();
247
1
    if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
248
0
        return nullptr;
249
0
    }
250
251
1
    GeoCoordinateListList coordss;
252
1
    for (uint32_t i = 0; i < num_loops; ++i) {
253
1
        uint32_t size = ctx.dis.readUnsigned();
254
1
        if (size < 3) { // A polygon loop must have at least 3 points
255
1
            return nullptr;
256
1
        }
257
258
0
        auto coords = std::make_unique<GeoCoordinateList>();
259
0
        *coords = WkbParse::readCoordinateList(size, ctx);
260
0
        if (coords->list.empty()) {
261
0
            return nullptr;
262
0
        }
263
0
        coordss.add(std::move(coords));
264
0
    }
265
266
0
    std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
267
0
    if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
268
0
        return nullptr;
269
0
    }
270
271
0
    return polygon;
272
0
}
273
274
4
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext& ctx) {
275
4
    GeoCoordinateList coords;
276
9
    for (uint32_t i = 0; i < size; i++) {
277
5
        if (!readCoordinate(ctx)) {
278
0
            return GeoCoordinateList();
279
0
        }
280
5
        unsigned int j = 0;
281
5
        GeoCoordinate coord;
282
5
        coord.x = ctx.ordValues[j++];
283
5
        coord.y = ctx.ordValues[j++];
284
5
        coords.add(coord);
285
5
    }
286
4
    return coords;
287
4
}
288
289
2
GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext& ctx) {
290
2
    uint64_t minSize = 0;
291
2
    constexpr uint64_t minCoordSize = 2 * sizeof(double);
292
    //constexpr uint64_t minPtSize = (1+4) + minCoordSize;
293
    //constexpr uint64_t minLineSize = (1+4+4); // empty line
294
2
    constexpr uint64_t minLoopSize = 4; // empty loop
295
    //constexpr uint64_t minPolySize = (1+4+4); // empty polygon
296
    //constexpr uint64_t minGeomSize = minLineSize;
297
298
2
    switch (wkbType) {
299
1
    case wkbLine:
300
1
        minSize = size * minCoordSize;
301
1
        break;
302
1
    case wkbPolygon:
303
1
        minSize = size * minLoopSize;
304
1
        break;
305
2
    }
306
2
    if (ctx.dis.size() < minSize) {
307
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
308
0
    }
309
2
    return GEO_PARSE_OK;
310
2
}
311
5
bool WkbParse::readCoordinate(WkbParseContext& ctx) {
312
15
    for (std::size_t i = 0; i < ctx.inputDimension; ++i) {
313
10
        ctx.ordValues[i] = ctx.dis.readDouble();
314
10
    }
315
316
5
    return true;
317
5
}
318
319
} // namespace doris