Coverage Report

Created: 2024-11-21 16:04

/root/doris/be/src/geo/wkb_parse.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "wkb_parse.h"
19
20
#include <cstddef>
21
#include <istream>
22
#include <sstream>
23
#include <vector>
24
25
#include "geo/ByteOrderDataInStream.h"
26
#include "geo/ByteOrderValues.h"
27
#include "geo/geo_types.h"
28
#include "geo/wkb_parse_ctx.h"
29
#include "geo_tobinary_type.h"
30
31
namespace doris {
32
33
0
unsigned char ASCIIHexToUChar(char val) {
34
0
    switch (val) {
35
0
    case '0':
36
0
        return 0;
37
0
    case '1':
38
0
        return 1;
39
0
    case '2':
40
0
        return 2;
41
0
    case '3':
42
0
        return 3;
43
0
    case '4':
44
0
        return 4;
45
0
    case '5':
46
0
        return 5;
47
0
    case '6':
48
0
        return 6;
49
0
    case '7':
50
0
        return 7;
51
0
    case '8':
52
0
        return 8;
53
0
    case '9':
54
0
        return 9;
55
0
    case 'A':
56
0
    case 'a':
57
0
        return 10;
58
0
    case 'B':
59
0
    case 'b':
60
0
        return 11;
61
0
    case 'C':
62
0
    case 'c':
63
0
        return 12;
64
0
    case 'D':
65
0
    case 'd':
66
0
        return 13;
67
0
    case 'E':
68
0
    case 'e':
69
0
        return 14;
70
0
    case 'F':
71
0
    case 'f':
72
0
        return 15;
73
0
    default:
74
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
75
0
    }
76
0
}
77
78
0
GeoParseStatus WkbParse::parse_wkb(std::istream& is, GeoShape** shape) {
79
0
    WkbParseContext ctx;
80
81
0
    ctx = *(WkbParse::read_hex(is, &ctx));
82
0
    if (ctx.parse_status == GEO_PARSE_OK) {
83
0
        *shape = ctx.shape;
84
0
    } else {
85
0
        ctx.parse_status = GEO_PARSE_WKT_SYNTAX_ERROR;
86
0
    }
87
0
    return ctx.parse_status;
88
0
}
89
90
0
WkbParseContext* WkbParse::read_hex(std::istream& is, WkbParseContext* ctx) {
91
    // setup input/output stream
92
0
    std::stringstream os(std::ios_base::binary | std::ios_base::in | std::ios_base::out);
93
94
0
    while (true) {
95
0
        const int input_high = is.get();
96
0
        if (input_high == std::char_traits<char>::eof()) {
97
0
            break;
98
0
        }
99
100
0
        const int input_low = is.get();
101
0
        if (input_low == std::char_traits<char>::eof()) {
102
0
            ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
103
0
            return ctx;
104
0
        }
105
106
0
        const char high = static_cast<char>(input_high);
107
0
        const char low = static_cast<char>(input_low);
108
109
0
        const unsigned char result_high = ASCIIHexToUChar(high);
110
0
        const unsigned char result_low = ASCIIHexToUChar(low);
111
112
0
        const unsigned char value = static_cast<unsigned char>((result_high << 4) + result_low);
113
114
        // write the value to the output stream
115
0
        os << value;
116
0
    }
117
0
    return WkbParse::read(os, ctx);
118
0
}
119
120
0
WkbParseContext* WkbParse::read(std::istream& is, WkbParseContext* ctx) {
121
0
    is.seekg(0, std::ios::end);
122
0
    auto size = is.tellg();
123
0
    is.seekg(0, std::ios::beg);
124
125
0
    std::vector<unsigned char> buf(static_cast<size_t>(size));
126
0
    is.read(reinterpret_cast<char*>(buf.data()), static_cast<std::streamsize>(size));
127
128
0
    ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default to machine endian
129
130
0
    ctx->shape = readGeometry(ctx).release();
131
132
0
    if (!ctx->shape) {
133
0
        ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
134
0
    }
135
0
    return ctx;
136
0
}
137
138
0
std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
139
    // determine byte order
140
0
    unsigned char byteOrder = ctx->dis.readByte();
141
142
    // default is machine endian
143
0
    if (byteOrder == byteOrder::wkbNDR) {
144
0
        ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
145
0
    } else if (byteOrder == byteOrder::wkbXDR) {
146
0
        ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
147
0
    }
148
149
0
    uint32_t typeInt = ctx->dis.readUnsigned();
150
151
0
    uint32_t geometryType = (typeInt & 0xffff) % 1000;
152
153
0
    std::unique_ptr<GeoShape> shape;
154
155
0
    switch (geometryType) {
156
0
    case wkbType::wkbPoint:
157
0
        shape.reset(readPoint(ctx).release());
158
0
        break;
159
0
    case wkbType::wkbLine:
160
0
        shape.reset(readLine(ctx).release());
161
0
        break;
162
0
    case wkbType::wkbPolygon:
163
0
        shape.reset(readPolygon(ctx).release());
164
0
        break;
165
0
    default:
166
0
        return nullptr;
167
0
    }
168
0
    return shape;
169
0
}
170
171
0
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
172
0
    GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
173
0
    std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
174
175
0
    if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) {
176
0
        return point;
177
0
    } else {
178
0
        return nullptr;
179
0
    }
180
0
}
181
182
0
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
183
0
    uint32_t size = ctx->dis.readUnsigned();
184
0
    minMemSize(wkbLine, size, ctx);
185
186
0
    GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
187
0
    std::unique_ptr<GeoLine> line = GeoLine::create_unique();
188
189
0
    if (line->from_coords(coords) == GEO_PARSE_OK) {
190
0
        return line;
191
0
    } else {
192
0
        return nullptr;
193
0
    }
194
0
}
195
196
0
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
197
0
    uint32_t num_loops = ctx->dis.readUnsigned();
198
0
    minMemSize(wkbPolygon, num_loops, ctx);
199
0
    GeoCoordinateListList coordss;
200
0
    for (int i = 0; i < num_loops; ++i) {
201
0
        uint32_t size = ctx->dis.readUnsigned();
202
0
        GeoCoordinateList* coords = new GeoCoordinateList();
203
0
        *coords = WkbParse::readCoordinateList(size, ctx);
204
0
        coordss.add(coords);
205
0
    }
206
207
0
    std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
208
209
0
    if (polygon->from_coords(coordss) == GEO_PARSE_OK) {
210
0
        return polygon;
211
0
    } else {
212
0
        return nullptr;
213
0
    }
214
0
}
215
216
0
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext* ctx) {
217
0
    GeoCoordinateList coords;
218
0
    for (uint32_t i = 0; i < size; i++) {
219
0
        readCoordinate(ctx);
220
0
        unsigned int j = 0;
221
0
        GeoCoordinate coord;
222
0
        coord.x = ctx->ordValues[j++];
223
0
        coord.y = ctx->ordValues[j++];
224
0
        coords.add(coord);
225
0
    }
226
227
0
    return coords;
228
0
}
229
230
0
GeoParseStatus WkbParse::minMemSize(int wkbType, uint64_t size, WkbParseContext* ctx) {
231
0
    uint64_t minSize = 0;
232
0
    constexpr uint64_t minCoordSize = 2 * sizeof(double);
233
    //constexpr uint64_t minPtSize = (1+4) + minCoordSize;
234
    //constexpr uint64_t minLineSize = (1+4+4); // empty line
235
0
    constexpr uint64_t minLoopSize = 4; // empty loop
236
    //constexpr uint64_t minPolySize = (1+4+4); // empty polygon
237
    //constexpr uint64_t minGeomSize = minLineSize;
238
239
0
    switch (wkbType) {
240
0
    case wkbLine:
241
0
        minSize = size * minCoordSize;
242
0
        break;
243
0
    case wkbPolygon:
244
0
        minSize = size * minLoopSize;
245
0
        break;
246
0
    }
247
0
    if (ctx->dis.size() < minSize) {
248
0
        return GEO_PARSE_WKB_SYNTAX_ERROR;
249
0
    }
250
0
    return GEO_PARSE_OK;
251
0
}
252
0
bool WkbParse::readCoordinate(WkbParseContext* ctx) {
253
0
    for (std::size_t i = 0; i < ctx->inputDimension; ++i) {
254
0
        ctx->ordValues[i] = ctx->dis.readDouble();
255
0
    }
256
257
0
    return true;
258
0
}
259
260
} // namespace doris