be/src/format/jni/jni_data_bridge.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "jni_data_bridge.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <sstream> |
23 | | #include <variant> |
24 | | |
25 | | #include "core/block/block.h" |
26 | | #include "core/column/column_array.h" |
27 | | #include "core/column/column_map.h" |
28 | | #include "core/column/column_nullable.h" |
29 | | #include "core/column/column_string.h" |
30 | | #include "core/column/column_struct.h" |
31 | | #include "core/column/column_varbinary.h" |
32 | | #include "core/data_type/data_type_array.h" |
33 | | #include "core/data_type/data_type_map.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_struct.h" |
36 | | #include "core/data_type/data_type_varbinary.h" |
37 | | #include "core/data_type/define_primitive_type.h" |
38 | | #include "core/data_type/primitive_type.h" |
39 | | #include "core/types.h" |
40 | | #include "core/value/decimalv2_value.h" |
41 | | |
42 | | namespace doris { |
43 | | #include "common/compile_check_begin.h" |
44 | | |
45 | | #define FOR_FIXED_LENGTH_TYPES(M) \ |
46 | 0 | M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8) \ |
47 | 0 | M(PrimitiveType::TYPE_BOOLEAN, ColumnUInt8, UInt8) \ |
48 | 0 | M(PrimitiveType::TYPE_SMALLINT, ColumnInt16, Int16) \ |
49 | 0 | M(PrimitiveType::TYPE_INT, ColumnInt32, Int32) \ |
50 | 0 | M(PrimitiveType::TYPE_BIGINT, ColumnInt64, Int64) \ |
51 | 0 | M(PrimitiveType::TYPE_LARGEINT, ColumnInt128, Int128) \ |
52 | 0 | M(PrimitiveType::TYPE_FLOAT, ColumnFloat32, Float32) \ |
53 | 0 | M(PrimitiveType::TYPE_DOUBLE, ColumnFloat64, Float64) \ |
54 | 0 | M(PrimitiveType::TYPE_DECIMALV2, ColumnDecimal128V2, Int128) \ |
55 | 0 | M(PrimitiveType::TYPE_DECIMAL128I, ColumnDecimal128V3, Int128) \ |
56 | 0 | M(PrimitiveType::TYPE_DECIMAL32, ColumnDecimal32, Int32) \ |
57 | 0 | M(PrimitiveType::TYPE_DECIMAL64, ColumnDecimal64, Int64) \ |
58 | 0 | M(PrimitiveType::TYPE_DATE, ColumnDate, Int64) \ |
59 | 0 | M(PrimitiveType::TYPE_DATEV2, ColumnDateV2, UInt32) \ |
60 | 0 | M(PrimitiveType::TYPE_DATETIME, ColumnDateTime, Int64) \ |
61 | 0 | M(PrimitiveType::TYPE_DATETIMEV2, ColumnDateTimeV2, UInt64) \ |
62 | 0 | M(PrimitiveType::TYPE_TIMESTAMPTZ, ColumnTimeStampTz, UInt64) \ |
63 | 0 | M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4) \ |
64 | 0 | M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6) |
65 | | |
66 | 0 | Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) { |
67 | 0 | if (table_address == 0) { |
68 | 0 | return Status::InternalError("table_address is 0"); |
69 | 0 | } |
70 | 0 | TableMetaAddress table_meta(table_address); |
71 | 0 | long num_rows = table_meta.next_meta_as_long(); |
72 | 0 | for (size_t i : arguments) { |
73 | 0 | if (block->get_by_position(i).column.get() == nullptr) { |
74 | 0 | auto return_type = block->get_data_type(i); |
75 | 0 | bool result_nullable = return_type->is_nullable(); |
76 | 0 | ColumnUInt8::MutablePtr null_col = nullptr; |
77 | 0 | if (result_nullable) { |
78 | 0 | return_type = remove_nullable(return_type); |
79 | 0 | null_col = ColumnUInt8::create(); |
80 | 0 | } |
81 | 0 | auto res_col = return_type->create_column(); |
82 | 0 | if (result_nullable) { |
83 | 0 | block->replace_by_position( |
84 | 0 | i, ColumnNullable::create(std::move(res_col), std::move(null_col))); |
85 | 0 | } else { |
86 | 0 | block->replace_by_position(i, std::move(res_col)); |
87 | 0 | } |
88 | 0 | } else if (is_column_const(*(block->get_by_position(i).column))) { |
89 | 0 | auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const(); |
90 | 0 | bool is_nullable = block->get_by_position(i).type->is_nullable(); |
91 | 0 | block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column); |
92 | 0 | } |
93 | 0 | auto& column_with_type_and_name = block->get_by_position(i); |
94 | 0 | auto& column_ptr = column_with_type_and_name.column; |
95 | 0 | auto& column_type = column_with_type_and_name.type; |
96 | 0 | RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows)); |
97 | 0 | } |
98 | 0 | return Status::OK(); |
99 | 0 | } |
100 | | |
101 | | Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column, |
102 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
103 | 0 | auto logical_type = data_type->get_primitive_type(); |
104 | 0 | void* null_map_ptr = address.next_meta_as_ptr(); |
105 | 0 | if (null_map_ptr == nullptr) { |
106 | | // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0 |
107 | 0 | return Status::InternalError("Unsupported type {} in java side", data_type->get_name()); |
108 | 0 | } |
109 | 0 | MutableColumnPtr data_column; |
110 | 0 | if (doris_column->is_nullable()) { |
111 | 0 | auto* nullable_column = |
112 | 0 | reinterpret_cast<ColumnNullable*>(doris_column->assume_mutable().get()); |
113 | 0 | data_column = nullable_column->get_nested_column_ptr(); |
114 | 0 | NullMap& null_map = nullable_column->get_null_map_data(); |
115 | 0 | size_t origin_size = null_map.size(); |
116 | 0 | null_map.resize(origin_size + num_rows); |
117 | 0 | memcpy(null_map.data() + origin_size, static_cast<bool*>(null_map_ptr), num_rows); |
118 | 0 | } else { |
119 | 0 | data_column = doris_column->assume_mutable(); |
120 | 0 | } |
121 | | // Date and DateTime are deprecated and not supported. |
122 | 0 | switch (logical_type) { |
123 | 0 | #define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ |
124 | 0 | case TYPE_INDEX: \ |
125 | 0 | return _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>( \ |
126 | 0 | data_column, reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr()), num_rows); |
127 | 0 | FOR_FIXED_LENGTH_TYPES(DISPATCH) |
128 | 0 | #undef DISPATCH |
129 | 0 | case PrimitiveType::TYPE_STRING: |
130 | 0 | [[fallthrough]]; |
131 | 0 | case PrimitiveType::TYPE_CHAR: |
132 | 0 | [[fallthrough]]; |
133 | 0 | case PrimitiveType::TYPE_VARCHAR: |
134 | 0 | return _fill_string_column(address, data_column, num_rows); |
135 | 0 | case PrimitiveType::TYPE_ARRAY: |
136 | 0 | return _fill_array_column(address, data_column, data_type, num_rows); |
137 | 0 | case PrimitiveType::TYPE_MAP: |
138 | 0 | return _fill_map_column(address, data_column, data_type, num_rows); |
139 | 0 | case PrimitiveType::TYPE_STRUCT: |
140 | 0 | return _fill_struct_column(address, data_column, data_type, num_rows); |
141 | 0 | case PrimitiveType::TYPE_VARBINARY: |
142 | 0 | return _fill_varbinary_column(address, data_column, num_rows); |
143 | 0 | default: |
144 | 0 | return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name()); |
145 | 0 | } |
146 | 0 | return Status::OK(); |
147 | 0 | } |
148 | | |
149 | | Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address, |
150 | 0 | MutableColumnPtr& doris_column, size_t num_rows) { |
151 | 0 | auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr()); |
152 | 0 | auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column); |
153 | | // Java side writes per-row metadata as 16 bytes: [len: long][addr: long] |
154 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
155 | | // Read length (first 8 bytes) |
156 | 0 | int64_t len = 0; |
157 | 0 | memcpy(&len, meta_base + 16 * i, sizeof(len)); |
158 | 0 | if (len <= 0) { |
159 | 0 | varbinary_col.insert_default(); |
160 | 0 | } else { |
161 | | // Read address (next 8 bytes) |
162 | 0 | uint64_t addr_u = 0; |
163 | 0 | memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u)); |
164 | 0 | const char* src = reinterpret_cast<const char*>(addr_u); |
165 | 0 | varbinary_col.insert_data(src, static_cast<size_t>(len)); |
166 | 0 | } |
167 | 0 | } |
168 | 0 | return Status::OK(); |
169 | 0 | } |
170 | | |
171 | | Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
172 | 0 | size_t num_rows) { |
173 | 0 | auto& string_col = static_cast<ColumnString&>(*doris_column); |
174 | 0 | ColumnString::Chars& string_chars = string_col.get_chars(); |
175 | 0 | ColumnString::Offsets& string_offsets = string_col.get_offsets(); |
176 | 0 | int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr()); |
177 | 0 | char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr()); |
178 | | |
179 | | // This judgment is necessary, otherwise the following statement `offsets[num_rows - 1]` out of bounds |
180 | | // What's more, This judgment must be placed after `address.next_meta_as_ptr()` |
181 | | // because `address.next_meta_as_ptr` will make `address._meta_index` plus 1 |
182 | 0 | if (num_rows == 0) { |
183 | 0 | return Status::OK(); |
184 | 0 | } |
185 | | |
186 | 0 | size_t origin_chars_size = string_chars.size(); |
187 | 0 | string_chars.resize(origin_chars_size + offsets[num_rows - 1]); |
188 | 0 | memcpy(string_chars.data() + origin_chars_size, chars, offsets[num_rows - 1]); |
189 | |
|
190 | 0 | size_t origin_offsets_size = string_offsets.size(); |
191 | 0 | size_t start_offset = string_offsets[origin_offsets_size - 1]; |
192 | 0 | string_offsets.resize(origin_offsets_size + num_rows); |
193 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
194 | 0 | string_offsets[origin_offsets_size + i] = |
195 | 0 | static_cast<unsigned int>(offsets[i] + start_offset); |
196 | 0 | } |
197 | 0 | return Status::OK(); |
198 | 0 | } |
199 | | |
200 | | Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
201 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
202 | 0 | ColumnPtr& element_column = static_cast<ColumnArray&>(*doris_column).get_data_ptr(); |
203 | 0 | const DataTypePtr& element_type = |
204 | 0 | (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get())) |
205 | 0 | ->get_nested_type(); |
206 | 0 | ColumnArray::Offsets64& offsets_data = static_cast<ColumnArray&>(*doris_column).get_offsets(); |
207 | |
|
208 | 0 | int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr()); |
209 | 0 | size_t origin_size = offsets_data.size(); |
210 | 0 | offsets_data.resize(origin_size + num_rows); |
211 | 0 | size_t start_offset = offsets_data[origin_size - 1]; |
212 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
213 | 0 | offsets_data[origin_size + i] = offsets[i] + start_offset; |
214 | 0 | } |
215 | |
|
216 | 0 | return fill_column(address, element_column, element_type, |
217 | 0 | offsets_data[origin_size + num_rows - 1] - start_offset); |
218 | 0 | } |
219 | | |
220 | | Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
221 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
222 | 0 | auto& map = static_cast<ColumnMap&>(*doris_column); |
223 | 0 | const DataTypePtr& key_type = |
224 | 0 | reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type(); |
225 | 0 | const DataTypePtr& value_type = |
226 | 0 | reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get()) |
227 | 0 | ->get_value_type(); |
228 | 0 | ColumnPtr& key_column = map.get_keys_ptr(); |
229 | 0 | ColumnPtr& value_column = map.get_values_ptr(); |
230 | 0 | ColumnArray::Offsets64& map_offsets = map.get_offsets(); |
231 | |
|
232 | 0 | int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr()); |
233 | 0 | size_t origin_size = map_offsets.size(); |
234 | 0 | map_offsets.resize(origin_size + num_rows); |
235 | 0 | size_t start_offset = map_offsets[origin_size - 1]; |
236 | 0 | for (size_t i = 0; i < num_rows; ++i) { |
237 | 0 | map_offsets[origin_size + i] = offsets[i] + start_offset; |
238 | 0 | } |
239 | |
|
240 | 0 | RETURN_IF_ERROR(fill_column(address, key_column, key_type, |
241 | 0 | map_offsets[origin_size + num_rows - 1] - start_offset)); |
242 | 0 | RETURN_IF_ERROR(fill_column(address, value_column, value_type, |
243 | 0 | map_offsets[origin_size + num_rows - 1] - start_offset)); |
244 | 0 | return Status::OK(); |
245 | 0 | } |
246 | | |
247 | | Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column, |
248 | 0 | const DataTypePtr& data_type, size_t num_rows) { |
249 | 0 | auto& doris_struct = static_cast<ColumnStruct&>(*doris_column); |
250 | 0 | const DataTypeStruct* doris_struct_type = |
251 | 0 | reinterpret_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
252 | 0 | for (int i = 0; i < doris_struct.tuple_size(); ++i) { |
253 | 0 | ColumnPtr& struct_field = doris_struct.get_column_ptr(i); |
254 | 0 | const DataTypePtr& field_type = doris_struct_type->get_element(i); |
255 | 0 | RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows)); |
256 | 0 | } |
257 | 0 | return Status::OK(); |
258 | 0 | } |
259 | | |
260 | 0 | std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) { |
261 | 0 | DataTypePtr type = remove_nullable(data_type); |
262 | 0 | std::ostringstream buffer; |
263 | 0 | switch (type->get_primitive_type()) { |
264 | 0 | case TYPE_BOOLEAN: |
265 | 0 | return "boolean"; |
266 | 0 | case TYPE_TINYINT: |
267 | 0 | return "tinyint"; |
268 | 0 | case TYPE_SMALLINT: |
269 | 0 | return "smallint"; |
270 | 0 | case TYPE_INT: |
271 | 0 | return "int"; |
272 | 0 | case TYPE_BIGINT: |
273 | 0 | return "bigint"; |
274 | 0 | case TYPE_LARGEINT: |
275 | 0 | return "largeint"; |
276 | 0 | case TYPE_FLOAT: |
277 | 0 | return "float"; |
278 | 0 | case TYPE_DOUBLE: |
279 | 0 | return "double"; |
280 | 0 | case TYPE_IPV4: |
281 | 0 | return "ipv4"; |
282 | 0 | case TYPE_IPV6: |
283 | 0 | return "ipv6"; |
284 | 0 | case TYPE_VARCHAR: |
285 | 0 | [[fallthrough]]; |
286 | 0 | case TYPE_CHAR: |
287 | 0 | [[fallthrough]]; |
288 | 0 | case TYPE_STRING: |
289 | 0 | return "string"; |
290 | 0 | case TYPE_DATE: |
291 | 0 | return "datev1"; |
292 | 0 | case TYPE_DATEV2: |
293 | 0 | return "datev2"; |
294 | 0 | case TYPE_DATETIME: |
295 | 0 | return "datetimev1"; |
296 | 0 | case TYPE_DATETIMEV2: |
297 | 0 | [[fallthrough]]; |
298 | 0 | case TYPE_TIMEV2: { |
299 | 0 | buffer << "datetimev2(" << type->get_scale() << ")"; |
300 | 0 | return buffer.str(); |
301 | 0 | } |
302 | 0 | case TYPE_TIMESTAMPTZ: { |
303 | 0 | buffer << "timestamptz(" << type->get_scale() << ")"; |
304 | 0 | return buffer.str(); |
305 | 0 | } |
306 | 0 | case TYPE_BINARY: |
307 | 0 | return "binary"; |
308 | 0 | case TYPE_DECIMALV2: { |
309 | 0 | buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")"; |
310 | 0 | return buffer.str(); |
311 | 0 | } |
312 | 0 | case TYPE_DECIMAL32: { |
313 | 0 | buffer << "decimal32(" << type->get_precision() << "," << type->get_scale() << ")"; |
314 | 0 | return buffer.str(); |
315 | 0 | } |
316 | 0 | case TYPE_DECIMAL64: { |
317 | 0 | buffer << "decimal64(" << type->get_precision() << "," << type->get_scale() << ")"; |
318 | 0 | return buffer.str(); |
319 | 0 | } |
320 | 0 | case TYPE_DECIMAL128I: { |
321 | 0 | buffer << "decimal128(" << type->get_precision() << "," << type->get_scale() << ")"; |
322 | 0 | return buffer.str(); |
323 | 0 | } |
324 | 0 | case TYPE_STRUCT: { |
325 | 0 | const DataTypeStruct* struct_type = reinterpret_cast<const DataTypeStruct*>(type.get()); |
326 | 0 | buffer << "struct<"; |
327 | 0 | for (int i = 0; i < struct_type->get_elements().size(); ++i) { |
328 | 0 | if (i != 0) { |
329 | 0 | buffer << ","; |
330 | 0 | } |
331 | 0 | buffer << struct_type->get_element_names()[i] << ":" |
332 | 0 | << get_jni_type(struct_type->get_element(i)); |
333 | 0 | } |
334 | 0 | buffer << ">"; |
335 | 0 | return buffer.str(); |
336 | 0 | } |
337 | 0 | case TYPE_ARRAY: { |
338 | 0 | const DataTypeArray* array_type = reinterpret_cast<const DataTypeArray*>(type.get()); |
339 | 0 | buffer << "array<" << get_jni_type(array_type->get_nested_type()) << ">"; |
340 | 0 | return buffer.str(); |
341 | 0 | } |
342 | 0 | case TYPE_MAP: { |
343 | 0 | const DataTypeMap* map_type = reinterpret_cast<const DataTypeMap*>(type.get()); |
344 | 0 | buffer << "map<" << get_jni_type(map_type->get_key_type()) << "," |
345 | 0 | << get_jni_type(map_type->get_value_type()) << ">"; |
346 | 0 | return buffer.str(); |
347 | 0 | } |
348 | 0 | case TYPE_VARBINARY: |
349 | 0 | return "varbinary"; |
350 | | // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI |
351 | 0 | case TYPE_BITMAP: |
352 | 0 | [[fallthrough]]; |
353 | 0 | case TYPE_HLL: |
354 | 0 | [[fallthrough]]; |
355 | 0 | case TYPE_QUANTILE_STATE: |
356 | 0 | [[fallthrough]]; |
357 | 0 | case TYPE_JSONB: |
358 | 0 | return "string"; |
359 | 0 | default: |
360 | 0 | return "unsupported"; |
361 | 0 | } |
362 | 0 | } |
363 | | |
364 | 0 | std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) { |
365 | 0 | DataTypePtr type = remove_nullable(data_type); |
366 | 0 | std::ostringstream buffer; |
367 | 0 | switch (data_type->get_primitive_type()) { |
368 | 0 | case TYPE_BOOLEAN: |
369 | 0 | return "boolean"; |
370 | 0 | case TYPE_TINYINT: |
371 | 0 | return "tinyint"; |
372 | 0 | case TYPE_SMALLINT: |
373 | 0 | return "smallint"; |
374 | 0 | case TYPE_INT: |
375 | 0 | return "int"; |
376 | 0 | case TYPE_BIGINT: |
377 | 0 | return "bigint"; |
378 | 0 | case TYPE_LARGEINT: |
379 | 0 | return "largeint"; |
380 | 0 | case TYPE_FLOAT: |
381 | 0 | return "float"; |
382 | 0 | case TYPE_DOUBLE: |
383 | 0 | return "double"; |
384 | 0 | case TYPE_IPV4: |
385 | 0 | return "ipv4"; |
386 | 0 | case TYPE_IPV6: |
387 | 0 | return "ipv6"; |
388 | 0 | case TYPE_VARCHAR: { |
389 | 0 | buffer << "varchar(" |
390 | 0 | << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len() |
391 | 0 | << ")"; |
392 | 0 | return buffer.str(); |
393 | 0 | } |
394 | 0 | case TYPE_DATE: |
395 | 0 | return "datev1"; |
396 | 0 | case TYPE_DATEV2: |
397 | 0 | return "datev2"; |
398 | 0 | case TYPE_DATETIME: |
399 | 0 | return "datetimev1"; |
400 | 0 | case TYPE_DATETIMEV2: |
401 | 0 | [[fallthrough]]; |
402 | 0 | case TYPE_TIMEV2: { |
403 | 0 | buffer << "datetimev2(" << data_type->get_scale() << ")"; |
404 | 0 | return buffer.str(); |
405 | 0 | } |
406 | 0 | case TYPE_TIMESTAMPTZ: { |
407 | 0 | buffer << "timestamptz(" << data_type->get_scale() << ")"; |
408 | 0 | return buffer.str(); |
409 | 0 | } |
410 | 0 | case TYPE_BINARY: |
411 | 0 | return "binary"; |
412 | 0 | case TYPE_CHAR: { |
413 | 0 | buffer << "char(" |
414 | 0 | << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len() |
415 | 0 | << ")"; |
416 | 0 | return buffer.str(); |
417 | 0 | } |
418 | 0 | case TYPE_STRING: |
419 | 0 | return "string"; |
420 | 0 | case TYPE_VARBINARY: |
421 | 0 | buffer << "varbinary(" |
422 | 0 | << assert_cast<const DataTypeVarbinary*>(remove_nullable(data_type).get())->len() |
423 | 0 | << ")"; |
424 | 0 | return buffer.str(); |
425 | 0 | case TYPE_DECIMALV2: { |
426 | 0 | buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")"; |
427 | 0 | return buffer.str(); |
428 | 0 | } |
429 | 0 | case TYPE_DECIMAL32: { |
430 | 0 | buffer << "decimal32(" << data_type->get_precision() << "," << data_type->get_scale() |
431 | 0 | << ")"; |
432 | 0 | return buffer.str(); |
433 | 0 | } |
434 | 0 | case TYPE_DECIMAL64: { |
435 | 0 | buffer << "decimal64(" << data_type->get_precision() << "," << data_type->get_scale() |
436 | 0 | << ")"; |
437 | 0 | return buffer.str(); |
438 | 0 | } |
439 | 0 | case TYPE_DECIMAL128I: { |
440 | 0 | buffer << "decimal128(" << data_type->get_precision() << "," << data_type->get_scale() |
441 | 0 | << ")"; |
442 | 0 | return buffer.str(); |
443 | 0 | } |
444 | 0 | case TYPE_STRUCT: { |
445 | 0 | const auto* type_struct = |
446 | 0 | assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
447 | 0 | buffer << "struct<"; |
448 | 0 | for (int i = 0; i < type_struct->get_elements().size(); ++i) { |
449 | 0 | if (i != 0) { |
450 | 0 | buffer << ","; |
451 | 0 | } |
452 | 0 | buffer << type_struct->get_element_name(i) << ":" |
453 | 0 | << get_jni_type_with_different_string(type_struct->get_element(i)); |
454 | 0 | } |
455 | 0 | buffer << ">"; |
456 | 0 | return buffer.str(); |
457 | 0 | } |
458 | 0 | case TYPE_ARRAY: { |
459 | 0 | const auto* type_arr = assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()); |
460 | 0 | buffer << "array<" << get_jni_type_with_different_string(type_arr->get_nested_type()) |
461 | 0 | << ">"; |
462 | 0 | return buffer.str(); |
463 | 0 | } |
464 | 0 | case TYPE_MAP: { |
465 | 0 | const auto* type_map = assert_cast<const DataTypeMap*>(remove_nullable(data_type).get()); |
466 | 0 | buffer << "map<" << get_jni_type_with_different_string(type_map->get_key_type()) << "," |
467 | 0 | << get_jni_type_with_different_string(type_map->get_value_type()) << ">"; |
468 | 0 | return buffer.str(); |
469 | 0 | } |
470 | | // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI |
471 | 0 | case TYPE_BITMAP: |
472 | 0 | [[fallthrough]]; |
473 | 0 | case TYPE_HLL: |
474 | 0 | [[fallthrough]]; |
475 | 0 | case TYPE_QUANTILE_STATE: |
476 | 0 | [[fallthrough]]; |
477 | 0 | case TYPE_JSONB: |
478 | 0 | return "string"; |
479 | 0 | default: |
480 | 0 | return "unsupported"; |
481 | 0 | } |
482 | 0 | } |
483 | | |
484 | | Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type, |
485 | 0 | std::vector<long>& meta_data) { |
486 | 0 | auto logical_type = data_type->get_primitive_type(); |
487 | 0 | const IColumn* column = nullptr; |
488 | | // insert const flag |
489 | 0 | if (is_column_const(*doris_column)) { |
490 | 0 | meta_data.emplace_back((long)1); |
491 | 0 | const auto& const_column = assert_cast<const ColumnConst&>(*doris_column); |
492 | 0 | column = &(const_column.get_data_column()); |
493 | 0 | } else { |
494 | 0 | meta_data.emplace_back((long)0); |
495 | 0 | column = &(*doris_column); |
496 | 0 | } |
497 | | |
498 | | // insert null map address |
499 | 0 | const IColumn* data_column = nullptr; |
500 | 0 | if (column->is_nullable()) { |
501 | 0 | const auto& nullable_column = assert_cast<const ColumnNullable&>(*column); |
502 | 0 | data_column = &(nullable_column.get_nested_column()); |
503 | 0 | const auto& null_map = nullable_column.get_null_map_data(); |
504 | 0 | meta_data.emplace_back((long)null_map.data()); |
505 | 0 | } else { |
506 | 0 | meta_data.emplace_back(0); |
507 | 0 | data_column = column; |
508 | 0 | } |
509 | 0 | switch (logical_type) { |
510 | 0 | #define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE) \ |
511 | 0 | case TYPE_INDEX: { \ |
512 | 0 | meta_data.emplace_back(_get_fixed_length_column_address<COLUMN_TYPE>(*data_column)); \ |
513 | 0 | break; \ |
514 | 0 | } |
515 | 0 | FOR_FIXED_LENGTH_TYPES(DISPATCH) |
516 | 0 | #undef DISPATCH |
517 | 0 | case PrimitiveType::TYPE_STRING: |
518 | 0 | [[fallthrough]]; |
519 | 0 | case PrimitiveType::TYPE_CHAR: |
520 | 0 | [[fallthrough]]; |
521 | 0 | case PrimitiveType::TYPE_VARCHAR: { |
522 | 0 | const auto& string_column = assert_cast<const ColumnString&>(*data_column); |
523 | | // insert offsets |
524 | 0 | meta_data.emplace_back((long)string_column.get_offsets().data()); |
525 | 0 | meta_data.emplace_back((long)string_column.get_chars().data()); |
526 | 0 | break; |
527 | 0 | } |
528 | 0 | case PrimitiveType::TYPE_ARRAY: { |
529 | 0 | const auto& element_column = assert_cast<const ColumnArray&>(*data_column).get_data_ptr(); |
530 | 0 | meta_data.emplace_back( |
531 | 0 | (long)assert_cast<const ColumnArray&>(*data_column).get_offsets().data()); |
532 | 0 | const auto& element_type = assert_cast<const DataTypePtr&>( |
533 | 0 | (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get())) |
534 | 0 | ->get_nested_type()); |
535 | 0 | RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data)); |
536 | 0 | break; |
537 | 0 | } |
538 | 0 | case PrimitiveType::TYPE_STRUCT: { |
539 | 0 | const auto& doris_struct = assert_cast<const ColumnStruct&>(*data_column); |
540 | 0 | const auto* doris_struct_type = |
541 | 0 | assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get()); |
542 | 0 | for (int i = 0; i < doris_struct.tuple_size(); ++i) { |
543 | 0 | const auto& struct_field = doris_struct.get_column_ptr(i); |
544 | 0 | const auto& field_type = |
545 | 0 | assert_cast<const DataTypePtr&>(doris_struct_type->get_element(i)); |
546 | 0 | RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data)); |
547 | 0 | } |
548 | 0 | break; |
549 | 0 | } |
550 | 0 | case PrimitiveType::TYPE_MAP: { |
551 | 0 | const auto& map = assert_cast<const ColumnMap&>(*data_column); |
552 | 0 | const auto& key_type = assert_cast<const DataTypePtr&>( |
553 | 0 | assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type()); |
554 | 0 | const auto& value_type = assert_cast<const DataTypePtr&>( |
555 | 0 | assert_cast<const DataTypeMap*>(remove_nullable(data_type).get()) |
556 | 0 | ->get_value_type()); |
557 | 0 | const auto& key_column = map.get_keys_ptr(); |
558 | 0 | const auto& value_column = map.get_values_ptr(); |
559 | 0 | meta_data.emplace_back((long)map.get_offsets().data()); |
560 | 0 | RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data)); |
561 | 0 | RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data)); |
562 | 0 | break; |
563 | 0 | } |
564 | 0 | case PrimitiveType::TYPE_VARBINARY: { |
565 | 0 | const auto& varbinary_col = assert_cast<const ColumnVarbinary&>(*data_column); |
566 | 0 | meta_data.emplace_back( |
567 | 0 | (long)assert_cast<const ColumnVarbinary&>(varbinary_col).get_data().data()); |
568 | 0 | break; |
569 | 0 | } |
570 | 0 | default: |
571 | 0 | return Status::InternalError("Unsupported type: {}", data_type->get_name()); |
572 | 0 | } |
573 | 0 | return Status::OK(); |
574 | 0 | } |
575 | | |
576 | 0 | Status JniDataBridge::to_java_table(Block* block, std::unique_ptr<long[]>& meta) { |
577 | 0 | ColumnNumbers arguments; |
578 | 0 | for (size_t i = 0; i < block->columns(); ++i) { |
579 | 0 | arguments.emplace_back(i); |
580 | 0 | } |
581 | 0 | return to_java_table(block, block->rows(), arguments, meta); |
582 | 0 | } |
583 | | |
584 | | Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments, |
585 | 0 | std::unique_ptr<long[]>& meta) { |
586 | 0 | std::vector<long> meta_data; |
587 | | // insert number of rows |
588 | 0 | meta_data.emplace_back(num_rows); |
589 | 0 | for (size_t i : arguments) { |
590 | 0 | auto& column_with_type_and_name = block->get_by_position(i); |
591 | 0 | RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column, |
592 | 0 | column_with_type_and_name.type, meta_data)); |
593 | 0 | } |
594 | | |
595 | 0 | meta.reset(new long[meta_data.size()]); |
596 | 0 | memcpy(meta.get(), &meta_data[0], meta_data.size() * 8); |
597 | 0 | return Status::OK(); |
598 | 0 | } |
599 | | |
600 | | std::pair<std::string, std::string> JniDataBridge::parse_table_schema( |
601 | 0 | Block* block, const ColumnNumbers& arguments, bool ignore_column_name) { |
602 | | // prepare table schema |
603 | 0 | std::ostringstream required_fields; |
604 | 0 | std::ostringstream columns_types; |
605 | 0 | for (int i = 0; i < arguments.size(); ++i) { |
606 | 0 | std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type); |
607 | 0 | if (i == 0) { |
608 | 0 | if (ignore_column_name) { |
609 | 0 | required_fields << "_col_" << arguments[i]; |
610 | 0 | } else { |
611 | 0 | required_fields << block->get_by_position(arguments[i]).name; |
612 | 0 | } |
613 | 0 | columns_types << type; |
614 | 0 | } else { |
615 | 0 | if (ignore_column_name) { |
616 | 0 | required_fields << "," |
617 | 0 | << "_col_" << arguments[i]; |
618 | 0 | } else { |
619 | 0 | required_fields << "," << block->get_by_position(arguments[i]).name; |
620 | 0 | } |
621 | 0 | columns_types << "#" << type; |
622 | 0 | } |
623 | 0 | } |
624 | 0 | return std::make_pair(required_fields.str(), columns_types.str()); |
625 | 0 | } |
626 | | |
627 | 0 | std::pair<std::string, std::string> JniDataBridge::parse_table_schema(Block* block) { |
628 | 0 | ColumnNumbers arguments; |
629 | 0 | for (size_t i = 0; i < block->columns(); ++i) { |
630 | 0 | arguments.emplace_back(i); |
631 | 0 | } |
632 | 0 | return parse_table_schema(block, arguments, true); |
633 | 0 | } |
634 | | |
635 | | #include "common/compile_check_end.h" |
636 | | } // namespace doris |