be/src/format/jni/jni_data_bridge.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "jni_data_bridge.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <sstream> |
23 | | #include <variant> |
24 | | |
25 | | #include "core/block/block.h" |
26 | | #include "core/column/column_array.h" |
27 | | #include "core/column/column_map.h" |
28 | | #include "core/column/column_nullable.h" |
29 | | #include "core/column/column_string.h" |
30 | | #include "core/column/column_struct.h" |
31 | | #include "core/column/column_varbinary.h" |
32 | | #include "core/data_type/data_type_array.h" |
33 | | #include "core/data_type/data_type_map.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_struct.h" |
36 | | #include "core/data_type/data_type_varbinary.h" |
37 | | #include "core/data_type/define_primitive_type.h" |
38 | | #include "core/data_type/primitive_type.h" |
39 | | #include "core/types.h" |
40 | | #include "core/value/decimalv2_value.h" |
41 | | |
42 | | namespace doris { |
43 | | |
44 | | #define FOR_FIXED_LENGTH_TYPES(M) \ |
45 | 0 | M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8) \ |
46 | 0 | M(PrimitiveType::TYPE_BOOLEAN, ColumnUInt8, UInt8) \ |
47 | 0 | M(PrimitiveType::TYPE_SMALLINT, ColumnInt16, Int16) \ |
48 | 0 | M(PrimitiveType::TYPE_INT, ColumnInt32, Int32) \ |
49 | 0 | M(PrimitiveType::TYPE_BIGINT, ColumnInt64, Int64) \ |
50 | 0 | M(PrimitiveType::TYPE_LARGEINT, ColumnInt128, Int128) \ |
51 | 0 | M(PrimitiveType::TYPE_FLOAT, ColumnFloat32, Float32) \ |
52 | 0 | M(PrimitiveType::TYPE_DOUBLE, ColumnFloat64, Float64) \ |
53 | 0 | M(PrimitiveType::TYPE_DECIMALV2, ColumnDecimal128V2, Int128) \ |
54 | 0 | M(PrimitiveType::TYPE_DECIMAL128I, ColumnDecimal128V3, Int128) \ |
55 | 0 | M(PrimitiveType::TYPE_DECIMAL32, ColumnDecimal32, Int32) \ |
56 | 0 | M(PrimitiveType::TYPE_DECIMAL64, ColumnDecimal64, Int64) \ |
57 | 0 | M(PrimitiveType::TYPE_DATE, ColumnDate, Int64) \ |
58 | 0 | M(PrimitiveType::TYPE_DATEV2, ColumnDateV2, UInt32) \ |
59 | 0 | M(PrimitiveType::TYPE_DATETIME, ColumnDateTime, Int64) \ |
60 | 0 | M(PrimitiveType::TYPE_DATETIMEV2, ColumnDateTimeV2, UInt64) \ |
61 | 0 | M(PrimitiveType::TYPE_TIMESTAMPTZ, ColumnTimeStampTz, UInt64) \ |
62 | 0 | M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4) \ |
63 | 0 | M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6) |
64 | | |
65 | 0 | Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) { |
66 | 0 | if (table_address == 0) { |
67 | 0 | return Status::InternalError("table_address is 0"); |
68 | 0 | } |
69 | 0 | TableMetaAddress table_meta(table_address); |
70 | 0 | long num_rows = table_meta.next_meta_as_long(); |
71 | 0 | for (size_t i : arguments) { |
72 | 0 | if (block->get_by_position(i).column.get() == nullptr) { |
73 | 0 | auto return_type = block->get_data_type(i); |
74 | 0 | bool result_nullable = return_type->is_nullable(); |
75 | 0 | ColumnUInt8::MutablePtr null_col = nullptr; |
76 | 0 | if (result_nullable) { |
77 | 0 | return_type = remove_nullable(return_type); |
78 | 0 | null_col = ColumnUInt8::create(); |
79 | 0 | } |
80 | 0 | auto res_col = return_type->create_column(); |
81 | 0 | if (result_nullable) { |
82 | 0 | block->replace_by_position( |
83 | 0 | i, ColumnNullable::create(std::move(res_col), std::move(null_col))); |
84 | 0 | } else { |
85 | 0 | block->replace_by_position(i, std::move(res_col)); |
86 | 0 | } |
87 | 0 | } else if (is_column_const(*(block->get_by_position(i).column))) { |
88 | 0 | auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const(); |
89 | 0 | bool is_nullable = block->get_by_position(i).type->is_nullable(); |
90 | 0 | block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column); |
91 | 0 | } |
92 | 0 | auto& column_with_type_and_name = block->get_by_position(i); |
93 | 0 | auto& column_ptr = column_with_type_and_name.column; |
94 | 0 | auto& column_type = column_with_type_and_name.type; |
95 | 0 | RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows)); |
96 | 0 | } |
97 | 0 | return Status::OK(); |
98 | 0 | } |
99 | | |
100 | | Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column, |
101 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
102 | 0 | auto logical_type = data_type->get_primitive_type(); |
103 | 0 | void* null_map_ptr = address.next_meta_as_ptr(); |
104 | 0 | if (null_map_ptr == nullptr) { |
105 | | // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0 |
106 | 0 | return Status::InternalError("Unsupported type {} in java side", data_type->get_name()); |
107 | 0 | } |
108 | 0 | MutableColumnPtr data_column; |
109 | 0 | if (doris_column->is_nullable()) { |
110 | 0 | auto* nullable_column = |
111 | 0 | reinterpret_cast<ColumnNullable*>(doris_column->assume_mutable().get()); |
112 | 0 | data_column = nullable_column->get_nested_column_ptr(); |
113 | 0 | NullMap& null_map = nullable_column->get_null_map_data(); |
114 | 0 | size_t origin_size = null_map.size(); |
115 | 0 | null_map.resize(origin_size + num_rows); |
116 | 0 | memcpy(null_map.data() + origin_size, static_cast<bool*>(null_map_ptr), num_rows); |
117 | 0 | } else { |
118 | 0 | data_column = doris_column->assume_mutable(); |
119 | 0 | } |
120 | | // Date and DateTime are deprecated and not supported. |
121 | 0 | switch (logical_type) { |
122 | 0 | #define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ |
123 | 0 | case TYPE_INDEX: \ |
124 | 0 | return _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>( \ |
125 | 0 | data_column, reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr()), num_rows); |
126 | 0 | FOR_FIXED_LENGTH_TYPES(DISPATCH) |
127 | 0 | #undef DISPATCH |
128 | 0 | case PrimitiveType::TYPE_STRING: |
129 | 0 | [[fallthrough]]; |
130 | 0 | case PrimitiveType::TYPE_CHAR: |
131 | 0 | [[fallthrough]]; |
132 | 0 | case PrimitiveType::TYPE_VARCHAR: |
133 | 0 | return _fill_string_column(address, data_column, num_rows); |
134 | 0 | case PrimitiveType::TYPE_ARRAY: |
135 | 0 | return _fill_array_column(address, data_column, data_type, num_rows); |
136 | 0 | case PrimitiveType::TYPE_MAP: |
137 | 0 | return _fill_map_column(address, data_column, data_type, num_rows); |
138 | 0 | case PrimitiveType::TYPE_STRUCT: |
139 | 0 | return _fill_struct_column(address, data_column, data_type, num_rows); |
140 | 0 | case PrimitiveType::TYPE_VARBINARY: |
141 | 0 | return _fill_varbinary_column(address, data_column, num_rows); |
142 | 0 | default: |
143 | 0 | return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name()); |
144 | 0 | } |
145 | 0 | return Status::OK(); |
146 | 0 | } |
147 | | |
148 | | Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, |
149 | 0 | MutableColumnPtr& doris_column, size_t num_rows) { |
150 | 0 | auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr()); |
151 | 0 | auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column); |
152 | | // Java side writes per-row metadata as 16 bytes: [len: long][addr: long] |
153 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
154 | | // Read length (first 8 bytes) |
155 | 0 | int64_t len = 0; |
156 | 0 | memcpy(&len, meta_base + 16 * i, sizeof(len)); |
157 | 0 | if (len <= 0) { |
158 | 0 | varbinary_col.insert_default(); |
159 | 0 | } else { |
160 | | // Read address (next 8 bytes) |
161 | 0 | uint64_t addr_u = 0; |
162 | 0 | memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u)); |
163 | 0 | const char* src = reinterpret_cast<const char*>(addr_u); |
164 | 0 | varbinary_col.insert_data(src, static_cast<size_t>(len)); |
165 | 0 | } |
166 | 0 | } |
167 | 0 | return Status::OK(); |
168 | 0 | } |
169 | | |
170 | | Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
171 | 0 | size_t num_rows) { |
172 | 0 | auto& string_col = static_cast<ColumnString&>(*doris_column); |
173 | 0 | ColumnString::Chars& string_chars = string_col.get_chars(); |
174 | 0 | ColumnString::Offsets& string_offsets = string_col.get_offsets(); |
175 | 0 | int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr()); |
176 | 0 | char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr()); |
177 | | |
178 | | // This judgment is necessary, otherwise the following statement `offsets[num_rows - 1]` out of bounds |
179 | | // What's more, This judgment must be placed after `address.next_meta_as_ptr()` |
180 | | // because `address.next_meta_as_ptr` will make `address._meta_index` plus 1 |
181 | 0 | if (num_rows == 0) { |
182 | 0 | return Status::OK(); |
183 | 0 | } |
184 | | |
185 | 0 | size_t origin_chars_size = string_chars.size(); |
186 | 0 | string_chars.resize(origin_chars_size + offsets[num_rows - 1]); |
187 | 0 | memcpy(string_chars.data() + origin_chars_size, chars, offsets[num_rows - 1]); |
188 | |
|
189 | 0 | size_t origin_offsets_size = string_offsets.size(); |
190 | 0 | size_t start_offset = string_offsets[origin_offsets_size - 1]; |
191 | 0 | string_offsets.resize(origin_offsets_size + num_rows); |
192 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
193 | 0 | string_offsets[origin_offsets_size + i] = |
194 | 0 | static_cast<unsigned int>(offsets[i] + start_offset); |
195 | 0 | } |
196 | 0 | return Status::OK(); |
197 | 0 | } |
198 | | |
199 | | Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
200 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
201 | 0 | ColumnPtr& element_column = static_cast<ColumnArray&>(*doris_column).get_data_ptr(); |
202 | 0 | const DataTypePtr& element_type = |
203 | 0 | (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get())) |
204 | 0 | ->get_nested_type(); |
205 | 0 | ColumnArray::Offsets64& offsets_data = static_cast<ColumnArray&>(*doris_column).get_offsets(); |
206 | |
|
207 | 0 | int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr()); |
208 | 0 | size_t origin_size = offsets_data.size(); |
209 | 0 | offsets_data.resize(origin_size + num_rows); |
210 | 0 | size_t start_offset = offsets_data[origin_size - 1]; |
211 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
212 | 0 | offsets_data[origin_size + i] = offsets[i] + start_offset; |
213 | 0 | } |
214 | |
|
215 | 0 | return fill_column(address, element_column, element_type, |
216 | 0 | offsets_data[origin_size + num_rows - 1] - start_offset); |
217 | 0 | } |
218 | | |
219 | | Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
220 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
221 | 0 | auto& map = static_cast<ColumnMap&>(*doris_column); |
222 | 0 | const DataTypePtr& key_type = |
223 | 0 | reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type(); |
224 | 0 | const DataTypePtr& value_type = |
225 | 0 | reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get()) |
226 | 0 | ->get_value_type(); |
227 | 0 | ColumnPtr& key_column = map.get_keys_ptr(); |
228 | 0 | ColumnPtr& value_column = map.get_values_ptr(); |
229 | 0 | ColumnArray::Offsets64& map_offsets = map.get_offsets(); |
230 | |
|
231 | 0 | int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr()); |
232 | 0 | size_t origin_size = map_offsets.size(); |
233 | 0 | map_offsets.resize(origin_size + num_rows); |
234 | 0 | size_t start_offset = map_offsets[origin_size - 1]; |
235 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
236 | 0 | map_offsets[origin_size + i] = offsets[i] + start_offset; |
237 | 0 | } |
238 | |
|
239 | 0 | RETURN_IF_ERROR(fill_column(address, key_column, key_type, |
240 | 0 | map_offsets[origin_size + num_rows - 1] - start_offset)); |
241 | 0 | RETURN_IF_ERROR(fill_column(address, value_column, value_type, |
242 | 0 | map_offsets[origin_size + num_rows - 1] - start_offset)); |
243 | 0 | return Status::OK(); |
244 | 0 | } |
245 | | |
246 | | Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
247 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
248 | 0 | auto& doris_struct = static_cast<ColumnStruct&>(*doris_column); |
249 | 0 | const DataTypeStruct* doris_struct_type = |
250 | 0 | reinterpret_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
251 | 0 | for (int i = 0; i < doris_struct.tuple_size(); ++i) { |
252 | 0 | ColumnPtr& struct_field = doris_struct.get_column_ptr(i); |
253 | 0 | const DataTypePtr& field_type = doris_struct_type->get_element(i); |
254 | 0 | RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows)); |
255 | 0 | } |
256 | 0 | return Status::OK(); |
257 | 0 | } |
258 | | |
259 | 0 | std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) { |
260 | 0 | DataTypePtr type = remove_nullable(data_type); |
261 | 0 | std::ostringstream buffer; |
262 | 0 | switch (type->get_primitive_type()) { |
263 | 0 | case TYPE_BOOLEAN: |
264 | 0 | return "boolean"; |
265 | 0 | case TYPE_TINYINT: |
266 | 0 | return "tinyint"; |
267 | 0 | case TYPE_SMALLINT: |
268 | 0 | return "smallint"; |
269 | 0 | case TYPE_INT: |
270 | 0 | return "int"; |
271 | 0 | case TYPE_BIGINT: |
272 | 0 | return "bigint"; |
273 | 0 | case TYPE_LARGEINT: |
274 | 0 | return "largeint"; |
275 | 0 | case TYPE_FLOAT: |
276 | 0 | return "float"; |
277 | 0 | case TYPE_DOUBLE: |
278 | 0 | return "double"; |
279 | 0 | case TYPE_IPV4: |
280 | 0 | return "ipv4"; |
281 | 0 | case TYPE_IPV6: |
282 | 0 | return "ipv6"; |
283 | 0 | case TYPE_VARCHAR: |
284 | 0 | [[fallthrough]]; |
285 | 0 | case TYPE_CHAR: |
286 | 0 | [[fallthrough]]; |
287 | 0 | case TYPE_STRING: |
288 | 0 | return "string"; |
289 | 0 | case TYPE_DATE: |
290 | 0 | return "datev1"; |
291 | 0 | case TYPE_DATEV2: |
292 | 0 | return "datev2"; |
293 | 0 | case TYPE_DATETIME: |
294 | 0 | return "datetimev1"; |
295 | 0 | case TYPE_DATETIMEV2: |
296 | 0 | [[fallthrough]]; |
297 | 0 | case TYPE_TIMEV2: { |
298 | 0 | buffer << "datetimev2(" << type->get_scale() << ")"; |
299 | 0 | return buffer.str(); |
300 | 0 | } |
301 | 0 | case TYPE_TIMESTAMPTZ: { |
302 | 0 | buffer << "timestamptz(" << type->get_scale() << ")"; |
303 | 0 | return buffer.str(); |
304 | 0 | } |
305 | 0 | case TYPE_BINARY: |
306 | 0 | return "binary"; |
307 | 0 | case TYPE_DECIMALV2: { |
308 | 0 | buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")"; |
309 | 0 | return buffer.str(); |
310 | 0 | } |
311 | 0 | case TYPE_DECIMAL32: { |
312 | 0 | buffer << "decimal32(" << type->get_precision() << "," << type->get_scale() << ")"; |
313 | 0 | return buffer.str(); |
314 | 0 | } |
315 | 0 | case TYPE_DECIMAL64: { |
316 | 0 | buffer << "decimal64(" << type->get_precision() << "," << type->get_scale() << ")"; |
317 | 0 | return buffer.str(); |
318 | 0 | } |
319 | 0 | case TYPE_DECIMAL128I: { |
320 | 0 | buffer << "decimal128(" << type->get_precision() << "," << type->get_scale() << ")"; |
321 | 0 | return buffer.str(); |
322 | 0 | } |
323 | 0 | case TYPE_STRUCT: { |
324 | 0 | const DataTypeStruct* struct_type = reinterpret_cast<const DataTypeStruct*>(type.get()); |
325 | 0 | buffer << "struct<"; |
326 | 0 | for (int i = 0; i < struct_type->get_elements().size(); ++i) { |
327 | 0 | if (i != 0) { |
328 | 0 | buffer << ","; |
329 | 0 | } |
330 | 0 | buffer << struct_type->get_element_names()[i] << ":" |
331 | 0 | << get_jni_type(struct_type->get_element(i)); |
332 | 0 | } |
333 | 0 | buffer << ">"; |
334 | 0 | return buffer.str(); |
335 | 0 | } |
336 | 0 | case TYPE_ARRAY: { |
337 | 0 | const DataTypeArray* array_type = reinterpret_cast<const DataTypeArray*>(type.get()); |
338 | 0 | buffer << "array<" << get_jni_type(array_type->get_nested_type()) << ">"; |
339 | 0 | return buffer.str(); |
340 | 0 | } |
341 | 0 | case TYPE_MAP: { |
342 | 0 | const DataTypeMap* map_type = reinterpret_cast<const DataTypeMap*>(type.get()); |
343 | 0 | buffer << "map<" << get_jni_type(map_type->get_key_type()) << "," |
344 | 0 | << get_jni_type(map_type->get_value_type()) << ">"; |
345 | 0 | return buffer.str(); |
346 | 0 | } |
347 | 0 | case TYPE_VARBINARY: |
348 | 0 | return "varbinary"; |
349 | | // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI |
350 | 0 | case TYPE_BITMAP: |
351 | 0 | [[fallthrough]]; |
352 | 0 | case TYPE_HLL: |
353 | 0 | [[fallthrough]]; |
354 | 0 | case TYPE_QUANTILE_STATE: |
355 | 0 | [[fallthrough]]; |
356 | 0 | case TYPE_JSONB: |
357 | 0 | return "string"; |
358 | 0 | default: |
359 | 0 | return "unsupported"; |
360 | 0 | } |
361 | 0 | } |
362 | | |
363 | 0 | std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) { |
364 | 0 | DataTypePtr type = remove_nullable(data_type); |
365 | 0 | std::ostringstream buffer; |
366 | 0 | switch (data_type->get_primitive_type()) { |
367 | 0 | case TYPE_BOOLEAN: |
368 | 0 | return "boolean"; |
369 | 0 | case TYPE_TINYINT: |
370 | 0 | return "tinyint"; |
371 | 0 | case TYPE_SMALLINT: |
372 | 0 | return "smallint"; |
373 | 0 | case TYPE_INT: |
374 | 0 | return "int"; |
375 | 0 | case TYPE_BIGINT: |
376 | 0 | return "bigint"; |
377 | 0 | case TYPE_LARGEINT: |
378 | 0 | return "largeint"; |
379 | 0 | case TYPE_FLOAT: |
380 | 0 | return "float"; |
381 | 0 | case TYPE_DOUBLE: |
382 | 0 | return "double"; |
383 | 0 | case TYPE_IPV4: |
384 | 0 | return "ipv4"; |
385 | 0 | case TYPE_IPV6: |
386 | 0 | return "ipv6"; |
387 | 0 | case TYPE_VARCHAR: { |
388 | 0 | buffer << "varchar(" |
389 | 0 | << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len() |
390 | 0 | << ")"; |
391 | 0 | return buffer.str(); |
392 | 0 | } |
393 | 0 | case TYPE_DATE: |
394 | 0 | return "datev1"; |
395 | 0 | case TYPE_DATEV2: |
396 | 0 | return "datev2"; |
397 | 0 | case TYPE_DATETIME: |
398 | 0 | return "datetimev1"; |
399 | 0 | case TYPE_DATETIMEV2: |
400 | 0 | [[fallthrough]]; |
401 | 0 | case TYPE_TIMEV2: { |
402 | 0 | buffer << "datetimev2(" << data_type->get_scale() << ")"; |
403 | 0 | return buffer.str(); |
404 | 0 | } |
405 | 0 | case TYPE_TIMESTAMPTZ: { |
406 | 0 | buffer << "timestamptz(" << data_type->get_scale() << ")"; |
407 | 0 | return buffer.str(); |
408 | 0 | } |
409 | 0 | case TYPE_BINARY: |
410 | 0 | return "binary"; |
411 | 0 | case TYPE_CHAR: { |
412 | 0 | buffer << "char(" |
413 | 0 | << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len() |
414 | 0 | << ")"; |
415 | 0 | return buffer.str(); |
416 | 0 | } |
417 | 0 | case TYPE_STRING: |
418 | 0 | return "string"; |
419 | 0 | case TYPE_VARBINARY: |
420 | 0 | buffer << "varbinary(" |
421 | 0 | << assert_cast<const DataTypeVarbinary*>(remove_nullable(data_type).get())->len() |
422 | 0 | << ")"; |
423 | 0 | return buffer.str(); |
424 | 0 | case TYPE_DECIMALV2: { |
425 | 0 | buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")"; |
426 | 0 | return buffer.str(); |
427 | 0 | } |
428 | 0 | case TYPE_DECIMAL32: { |
429 | 0 | buffer << "decimal32(" << data_type->get_precision() << "," << data_type->get_scale() |
430 | 0 | << ")"; |
431 | 0 | return buffer.str(); |
432 | 0 | } |
433 | 0 | case TYPE_DECIMAL64: { |
434 | 0 | buffer << "decimal64(" << data_type->get_precision() << "," << data_type->get_scale() |
435 | 0 | << ")"; |
436 | 0 | return buffer.str(); |
437 | 0 | } |
438 | 0 | case TYPE_DECIMAL128I: { |
439 | 0 | buffer << "decimal128(" << data_type->get_precision() << "," << data_type->get_scale() |
440 | 0 | << ")"; |
441 | 0 | return buffer.str(); |
442 | 0 | } |
443 | 0 | case TYPE_STRUCT: { |
444 | 0 | const auto* type_struct = |
445 | 0 | assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
446 | 0 | buffer << "struct<"; |
447 | 0 | for (int i = 0; i < type_struct->get_elements().size(); ++i) { |
448 | 0 | if (i != 0) { |
449 | 0 | buffer << ","; |
450 | 0 | } |
451 | 0 | buffer << type_struct->get_element_name(i) << ":" |
452 | 0 | << get_jni_type_with_different_string(type_struct->get_element(i)); |
453 | 0 | } |
454 | 0 | buffer << ">"; |
455 | 0 | return buffer.str(); |
456 | 0 | } |
457 | 0 | case TYPE_ARRAY: { |
458 | 0 | const auto* type_arr = assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()); |
459 | 0 | buffer << "array<" << get_jni_type_with_different_string(type_arr->get_nested_type()) |
460 | 0 | << ">"; |
461 | 0 | return buffer.str(); |
462 | 0 | } |
463 | 0 | case TYPE_MAP: { |
464 | 0 | const auto* type_map = assert_cast<const DataTypeMap*>(remove_nullable(data_type).get()); |
465 | 0 | buffer << "map<" << get_jni_type_with_different_string(type_map->get_key_type()) << "," |
466 | 0 | << get_jni_type_with_different_string(type_map->get_value_type()) << ">"; |
467 | 0 | return buffer.str(); |
468 | 0 | } |
469 | | // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI |
470 | 0 | case TYPE_BITMAP: |
471 | 0 | [[fallthrough]]; |
472 | 0 | case TYPE_HLL: |
473 | 0 | [[fallthrough]]; |
474 | 0 | case TYPE_QUANTILE_STATE: |
475 | 0 | [[fallthrough]]; |
476 | 0 | case TYPE_JSONB: |
477 | 0 | return "string"; |
478 | 0 | default: |
479 | 0 | return "unsupported"; |
480 | 0 | } |
481 | 0 | } |
482 | | |
483 | | Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, |
484 | 0 | std::vector<long>& meta_data) { |
485 | 0 | auto logical_type = data_type->get_primitive_type(); |
486 | 0 | const IColumn* column = nullptr; |
487 | | // insert const flag |
488 | 0 | if (is_column_const(*doris_column)) { |
489 | 0 | meta_data.emplace_back((long)1); |
490 | 0 | const auto& const_column = assert_cast<const ColumnConst&>(*doris_column); |
491 | 0 | column = &(const_column.get_data_column()); |
492 | 0 | } else { |
493 | 0 | meta_data.emplace_back((long)0); |
494 | 0 | column = &(*doris_column); |
495 | 0 | } |
496 | | |
497 | | // insert null map address |
498 | 0 | const IColumn* data_column = nullptr; |
499 | 0 | if (column->is_nullable()) { |
500 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*column); |
501 | 0 | data_column = &(nullable_column.get_nested_column()); |
502 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
503 | 0 | meta_data.emplace_back((long)null_map.data()); |
504 | 0 | } else { |
505 | 0 | meta_data.emplace_back(0); |
506 | 0 | data_column = column; |
507 | 0 | } |
508 | 0 | switch (logical_type) { |
509 | 0 | #define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ |
510 | 0 | case TYPE_INDEX: { \ |
511 | 0 | meta_data.emplace_back(_get_fixed_length_column_address<COLUMN_TYPE>(*data_column)); \ |
512 | 0 | break; \ |
513 | 0 | } |
514 | 0 | FOR_FIXED_LENGTH_TYPES(DISPATCH) |
515 | 0 | #undef DISPATCH |
516 | 0 | case PrimitiveType::TYPE_STRING: |
517 | 0 | [[fallthrough]]; |
518 | 0 | case PrimitiveType::TYPE_CHAR: |
519 | 0 | [[fallthrough]]; |
520 | 0 | case PrimitiveType::TYPE_VARCHAR: { |
521 | 0 | const auto& string_column = assert_cast<const ColumnString&>(*data_column); |
522 | | // insert offsets |
523 | 0 | meta_data.emplace_back((long)string_column.get_offsets().data()); |
524 | 0 | meta_data.emplace_back((long)string_column.get_chars().data()); |
525 | 0 | break; |
526 | 0 | } |
527 | 0 | case PrimitiveType::TYPE_ARRAY: { |
528 | 0 | const auto& element_column = assert_cast<const ColumnArray&>(*data_column).get_data_ptr(); |
529 | 0 | meta_data.emplace_back( |
530 | 0 | (long)assert_cast<const ColumnArray&>(*data_column).get_offsets().data()); |
531 | 0 | const auto& element_type = assert_cast<const DataTypePtr&>( |
532 | 0 | (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get())) |
533 | 0 | ->get_nested_type()); |
534 | 0 | RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data)); |
535 | 0 | break; |
536 | 0 | } |
537 | 0 | case PrimitiveType::TYPE_STRUCT: { |
538 | 0 | const auto& doris_struct = assert_cast<const ColumnStruct&>(*data_column); |
539 | 0 | const auto* doris_struct_type = |
540 | 0 | assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
541 | 0 | for (int i = 0; i < doris_struct.tuple_size(); ++i) { |
542 | 0 | const auto& struct_field = doris_struct.get_column_ptr(i); |
543 | 0 | const auto& field_type = |
544 | 0 | assert_cast<const DataTypePtr&>(doris_struct_type->get_element(i)); |
545 | 0 | RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data)); |
546 | 0 | } |
547 | 0 | break; |
548 | 0 | } |
549 | 0 | case PrimitiveType::TYPE_MAP: { |
550 | 0 | const auto& map = assert_cast<const ColumnMap&>(*data_column); |
551 | 0 | const auto& key_type = assert_cast<const DataTypePtr&>( |
552 | 0 | assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type()); |
553 | 0 | const auto& value_type = assert_cast<const DataTypePtr&>( |
554 | 0 | assert_cast<const DataTypeMap*>(remove_nullable(data_type).get()) |
555 | 0 | ->get_value_type()); |
556 | 0 | const auto& key_column = map.get_keys_ptr(); |
557 | 0 | const auto& value_column = map.get_values_ptr(); |
558 | 0 | meta_data.emplace_back((long)map.get_offsets().data()); |
559 | 0 | RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data)); |
560 | 0 | RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data)); |
561 | 0 | break; |
562 | 0 | } |
563 | 0 | case PrimitiveType::TYPE_VARBINARY: { |
564 | 0 | const auto& varbinary_col = assert_cast<const ColumnVarbinary&>(*data_column); |
565 | 0 | meta_data.emplace_back( |
566 | 0 | (long)assert_cast<const ColumnVarbinary&>(varbinary_col).get_data().data()); |
567 | 0 | break; |
568 | 0 | } |
569 | 0 | default: |
570 | 0 | return Status::InternalError("Unsupported type: {}", data_type->get_name()); |
571 | 0 | } |
572 | 0 | return Status::OK(); |
573 | 0 | } |
574 | | |
575 | 0 | Status JniDataBridge::to_java_table(Block* block, std::unique_ptr<long[]>& meta) { |
576 | 0 | ColumnNumbers arguments; |
577 | 0 | for (size_t i = 0; i < block->columns(); ++i) { |
578 | 0 | arguments.emplace_back(i); |
579 | 0 | } |
580 | 0 | return to_java_table(block, block->rows(), arguments, meta); |
581 | 0 | } |
582 | | |
583 | | Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, |
584 | 0 | std::unique_ptr<long[]>& meta) { |
585 | 0 | std::vector<long> meta_data; |
586 | | // insert number of rows |
587 | 0 | meta_data.emplace_back(num_rows); |
588 | 0 | for (size_t i : arguments) { |
589 | 0 | auto& column_with_type_and_name = block->get_by_position(i); |
590 | 0 | RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column, |
591 | 0 | column_with_type_and_name.type, meta_data)); |
592 | 0 | } |
593 | | |
594 | 0 | meta.reset(new long[meta_data.size()]); |
595 | 0 | memcpy(meta.get(), &meta_data[0], meta_data.size() * 8); |
596 | 0 | return Status::OK(); |
597 | 0 | } |
598 | | |
599 | | std::pair<std::string, std::string> JniDataBridge::parse_table_schema( |
600 | 0 | Block* block, const ColumnNumbers& arguments, bool ignore_column_name) { |
601 | | // prepare table schema |
602 | 0 | std::ostringstream required_fields; |
603 | 0 | std::ostringstream columns_types; |
604 | 0 | for (int i = 0; i < arguments.size(); ++i) { |
605 | 0 | std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type); |
606 | 0 | if (i == 0) { |
607 | 0 | if (ignore_column_name) { |
608 | 0 | required_fields << "_col_" << arguments[i]; |
609 | 0 | } else { |
610 | 0 | required_fields << block->get_by_position(arguments[i]).name; |
611 | 0 | } |
612 | 0 | columns_types << type; |
613 | 0 | } else { |
614 | 0 | if (ignore_column_name) { |
615 | 0 | required_fields << "," |
616 | 0 | << "_col_" << arguments[i]; |
617 | 0 | } else { |
618 | 0 | required_fields << "," << block->get_by_position(arguments[i]).name; |
619 | 0 | } |
620 | 0 | columns_types << "#" << type; |
621 | 0 | } |
622 | 0 | } |
623 | 0 | return std::make_pair(required_fields.str(), columns_types.str()); |
624 | 0 | } |
625 | | |
626 | 0 | std::pair<std::string, std::string> JniDataBridge::parse_table_schema(Block* block) { |
627 | 0 | ColumnNumbers arguments; |
628 | 0 | for (size_t i = 0; i < block->columns(); ++i) { |
629 | 0 | arguments.emplace_back(i); |
630 | 0 | } |
631 | 0 | return parse_table_schema(block, arguments, true); |
632 | 0 | } |
633 | | |
634 | | } // namespace doris |