Coverage Report

Created: 2025-03-12 11:55

/root/doris/be/src/geo/wkb_parse.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "wkb_parse.h"
19
20
#include <cstddef>
21
#include <istream>
22
#include <sstream>
23
#include <vector>
24
25
#include "geo/ByteOrderDataInStream.h"
26
#include "geo/ByteOrderValues.h"
27
#include "geo/geo_types.h"
28
#include "geo/wkb_parse_ctx.h"
29
#include "geo_tobinary_type.h"
30
31
namespace doris {
32
33
0
unsigned char ASCIIHexToUChar(char val) {
34
0
    switch (val) {
35
0
    case '0':
36
0
        return 0;
37
0
    case '1':
38
0
        return 1;
39
0
    case '2':
40
0
        return 2;
41
0
    case '3':
42
0
        return 3;
43
0
    case '4':
44
0
        return 4;
45
0
    case '5':
46
0
        return 5;
47
0
    case '6':
48
0
        return 6;
49
0
    case '7':
50
0
        return 7;
51
0
    case '8':
52
0
        return 8;
53
0
    case '9':
54
0
        return 9;
55
0
    case 'A':
56
0
    case 'a':
57
0
        return 10;
58
0
    case 'B':
59
0
    case 'b':
60
0
        return 11;
61
0
    case 'C':
62
0
    case 'c':
63
0
        return 12;
64
0
    case 'D':
65
0
    case 'd':
66
0
        return 13;
67
0
    case 'E':
68
0
    case 'e':
69
0
        return 14;
70
0
    case 'F':
71
0
    case 'f':
72
0
        return 15;
73
0
    default:
74
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
75
0
    }
76
0
}
77
78
0
GeoParseStatus WkbParse::parse_wkb(std::istream& is, GeoShape** shape) {
79
0
    WkbParseContext ctx;
80
81
0
    ctx = *(WkbParse::read_hex(is, &ctx));
82
0
    if (ctx.parse_status == GEO_PARSE_OK) {
83
0
        *shape = ctx.shape;
84
0
    } else {
85
0
        ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR;
86
0
    }
87
0
    return ctx.parse_status;
88
0
}
89
90
0
WkbParseContext* WkbParse::read_hex(std::istream& is, WkbParseContext* ctx) {
91
    // setup input/output stream
92
0
    std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
93
94
0
    while (true) {
95
0
        const int input_high = is.get();
96
0
        if (input_high == std::char_traits<char>::eof()) {
97
0
            break;
98
0
        }
99
100
0
        const int input_low = is.get();
101
0
        if (input_low == std::char_traits<char>::eof()) {
102
0
            ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
103
0
            return ctx;
104
0
        }
105
106
0
        const char high = static_cast<char>(input_high);
107
0
        const char low = static_cast<char>(input_low);
108
109
0
        const unsigned char result_high = ASCIIHexToUChar(high);
110
0
        const unsigned char result_low = ASCIIHexToUChar(low);
111
112
0
        const unsigned char value = static_cast<unsigned char>((result_high << 4) + result_low);
113
114
        // write the value to the output stream
115
0
        os << value;
116
0
    }
117
0
    return WkbParse::read(os, ctx);
118
0
}
119
120
0
WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) {
121
0
    is.seekg(0, std::ios::end);
122
0
    auto size = is.tellg();
123
0
    is.seekg(0, std::ios::beg);
124
125
    // Check if size is valid
126
0
    if (size <= 0) {
127
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
128
0
        return ctx;
129
0
    }
130
131
0
    std::vector<unsigned char> buf(static_cast<size_t>(size));
132
0
    if (!is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size))) {
133
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
134
0
        return ctx;
135
0
    }
136
137
    // Ensure we have at least one byte for byte order
138
0
    if (buf.empty()) {
139
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
140
0
        return ctx;
141
0
    }
142
143
    // First read the byte order using machine endian
144
0
    auto byteOrder = buf[0];
145
146
    // Create ByteOrderDataInStream with the correct byte order
147
0
    if (byteOrder == byteOrder::wkbNDR) {
148
0
        ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
149
0
        ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
150
0
    } else if (byteOrder == byteOrder::wkbXDR) {
151
0
        ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
152
0
        ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
153
0
    } else {
154
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
155
0
        return ctx;
156
0
    }
157
158
0
    std::unique_ptr<GeoShape> shape = readGeometry(ctx);
159
0
    if (!shape) {
160
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
161
0
        return ctx;
162
0
    }
163
164
0
    ctx->shape = shape.release();
165
0
    return ctx;
166
0
}
167
168
0
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
169
0
    try {
170
        // Ensure we have enough data to read
171
0
        if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes for type
172
0
            return nullptr;
173
0
        }
174
175
        // Skip the byte order as we've already handled it
176
0
        ctx->dis.readByte();
177
178
0
        uint32_t typeInt = ctx->dis.readUnsigned();
179
180
        // Check if geometry has SRID
181
0
        bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
182
183
        // Read SRID if present
184
0
        if (has_srid) {
185
0
            ctx->dis.readUnsigned(); // Read and store SRID if needed
186
0
        }
187
188
        // Get the base geometry type
189
0
        uint32_t geometryType = typeInt & WKB_TYPE_MASK;
190
191
0
        std::unique_ptr<GeoShape> shape;
192
193
0
        switch (geometryType) {
194
0
        case wkbType::wkbPoint:
195
0
            shape = readPoint(ctx);
196
0
            break;
197
0
        case wkbType::wkbLine:
198
0
            shape = readLine(ctx);
199
0
            break;
200
0
        case wkbType::wkbPolygon:
201
0
            shape = readPolygon(ctx);
202
0
            break;
203
0
        default:
204
0
            return nullptr;
205
0
        }
206
207
0
        return shape;
208
0
    } catch (...) {
209
        // Handle any exceptions from reading operations
210
0
        return nullptr;
211
0
    }
212
0
}
213
214
0
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
215
0
    GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
216
0
    if (coords.list.empty()) {
217
0
        return nullptr;
218
0
    }
219
220
0
    std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
221
0
    if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
222
0
        return nullptr;
223
0
    }
224
225
0
    return point;
226
0
}
227
228
0
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
229
0
    uint32_t size = ctx->dis.readUnsigned();
230
0
    if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
231
0
        return nullptr;
232
0
    }
233
234
0
    GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
235
0
    if (coords.list.empty()) {
236
0
        return nullptr;
237
0
    }
238
239
0
    std::unique_ptr<GeoLine> line = GeoLine::create_unique();
240
0
    if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
241
0
        return nullptr;
242
0
    }
243
244
0
    return line;
245
0
}
246
247
0
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
248
0
    uint32_t num_loops = ctx->dis.readUnsigned();
249
0
    if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
250
0
        return nullptr;
251
0
    }
252
253
0
    GeoCoordinateListList coordss;
254
0
    for (uint32_t i = 0; i < num_loops; ++i) {
255
0
        uint32_t size = ctx->dis.readUnsigned();
256
0
        if (size < 3) { // A polygon loop must have at least 3 points
257
0
            return nullptr;
258
0
        }
259
260
0
        auto coords = std::make_unique<GeoCoordinateList>();
261
0
        *coords = WkbParse::readCoordinateList(size, ctx);
262
0
        if (coords->list.empty()) {
263
0
            return nullptr;
264
0
        }
265
0
        coordss.add(coords.release());
266
0
    }
267
268
0
    std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
269
0
    if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
270
0
        return nullptr;
271
0
    }
272
273
0
    return polygon;
274
0
}
275
276
0
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) {
277
0
    GeoCoordinateList coords;
278
0
    for (uint32_t i = 0; i < size; i++) {
279
0
        if (!readCoordinate(ctx)) {
280
0
            return GeoCoordinateList();
281
0
        }
282
0
        unsigned int j = 0;
283
0
        GeoCoordinate coord;
284
0
        coord.x = ctx->ordValues[j++];
285
0
        coord.y = ctx->ordValues[j++];
286
0
        coords.add(coord);
287
0
    }
288
0
    return coords;
289
0
}
290
291
0
GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext* ctx) {
292
0
    uint64_t minSize = 0;
293
0
    constexpr uint64_t minCoordSize = 2 * sizeof(double);
294
    //constexpr uint64_t minPtSize = (1+4) + minCoordSize;
295
    //constexpr uint64_t minLineSize = (1+4+4); // empty line
296
0
    constexpr uint64_t minLoopSize = 4; // empty loop
297
    //constexpr uint64_t minPolySize = (1+4+4); // empty polygon
298
    //constexpr uint64_t minGeomSize = minLineSize;
299
300
0
    switch (wkbType) {
301
0
    case wkbLine:
302
0
        minSize = size * minCoordSize;
303
0
        break;
304
0
    case wkbPolygon:
305
0
        minSize = size * minLoopSize;
306
0
        break;
307
0
    }
308
0
    if (ctx->dis.size() < minSize) {
309
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
310
0
    }
311
0
    return GEO_PARSE_OK;
312
0
}
313
0
bool WkbParse::readCoordinate(WkbParseContext* ctx) {
314
0
    for (std::size_t i = 0; i < ctx->inputDimension; ++i) {
315
0
        ctx->ordValues[i] = ctx->dis.readDouble();
316
0
    }
317
318
0
    return true;
319
0
}
320
321
} // namespace doris