Coverage Report

Created: 2026-04-10 04:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/jni/jni_data_bridge.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "jni_data_bridge.h"
19
20
#include <glog/logging.h>
21
22
#include <sstream>
23
#include <variant>
24
25
#include "core/block/block.h"
26
#include "core/column/column_array.h"
27
#include "core/column/column_map.h"
28
#include "core/column/column_nullable.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_struct.h"
31
#include "core/column/column_varbinary.h"
32
#include "core/data_type/data_type_array.h"
33
#include "core/data_type/data_type_map.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_struct.h"
36
#include "core/data_type/data_type_varbinary.h"
37
#include "core/data_type/define_primitive_type.h"
38
#include "core/data_type/primitive_type.h"
39
#include "core/types.h"
40
#include "core/value/decimalv2_value.h"
41
42
namespace doris {
43
44
#define FOR_FIXED_LENGTH_TYPES(M)                                  \
45
0
    M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8)               \
46
0
    M(PrimitiveType::TYPE_BOOLEAN, ColumnUInt8, UInt8)             \
47
0
    M(PrimitiveType::TYPE_SMALLINT, ColumnInt16, Int16)            \
48
0
    M(PrimitiveType::TYPE_INT, ColumnInt32, Int32)                 \
49
0
    M(PrimitiveType::TYPE_BIGINT, ColumnInt64, Int64)              \
50
0
    M(PrimitiveType::TYPE_LARGEINT, ColumnInt128, Int128)          \
51
0
    M(PrimitiveType::TYPE_FLOAT, ColumnFloat32, Float32)           \
52
0
    M(PrimitiveType::TYPE_DOUBLE, ColumnFloat64, Float64)          \
53
0
    M(PrimitiveType::TYPE_DECIMALV2, ColumnDecimal128V2, Int128)   \
54
0
    M(PrimitiveType::TYPE_DECIMAL128I, ColumnDecimal128V3, Int128) \
55
0
    M(PrimitiveType::TYPE_DECIMAL32, ColumnDecimal32, Int32)       \
56
0
    M(PrimitiveType::TYPE_DECIMAL64, ColumnDecimal64, Int64)       \
57
0
    M(PrimitiveType::TYPE_DATE, ColumnDate, Int64)                 \
58
0
    M(PrimitiveType::TYPE_DATEV2, ColumnDateV2, UInt32)            \
59
0
    M(PrimitiveType::TYPE_DATETIME, ColumnDateTime, Int64)         \
60
0
    M(PrimitiveType::TYPE_DATETIMEV2, ColumnDateTimeV2, UInt64)    \
61
0
    M(PrimitiveType::TYPE_TIMESTAMPTZ, ColumnTimeStampTz, UInt64)  \
62
0
    M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4)                  \
63
0
    M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6)
64
65
0
Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) {
66
0
    if (table_address == 0) {
67
0
        return Status::InternalError("table_address is 0");
68
0
    }
69
0
    TableMetaAddress table_meta(table_address);
70
0
    long num_rows = table_meta.next_meta_as_long();
71
0
    for (size_t i : arguments) {
72
0
        if (block->get_by_position(i).column.get() == nullptr) {
73
0
            auto return_type = block->get_data_type(i);
74
0
            bool result_nullable = return_type->is_nullable();
75
0
            ColumnUInt8::MutablePtr null_col = nullptr;
76
0
            if (result_nullable) {
77
0
                return_type = remove_nullable(return_type);
78
0
                null_col = ColumnUInt8::create();
79
0
            }
80
0
            auto res_col = return_type->create_column();
81
0
            if (result_nullable) {
82
0
                block->replace_by_position(
83
0
                        i, ColumnNullable::create(std::move(res_col), std::move(null_col)));
84
0
            } else {
85
0
                block->replace_by_position(i, std::move(res_col));
86
0
            }
87
0
        } else if (is_column_const(*(block->get_by_position(i).column))) {
88
0
            auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const();
89
0
            bool is_nullable = block->get_by_position(i).type->is_nullable();
90
0
            block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column);
91
0
        }
92
0
        auto& column_with_type_and_name = block->get_by_position(i);
93
0
        auto& column_ptr = column_with_type_and_name.column;
94
0
        auto& column_type = column_with_type_and_name.type;
95
0
        RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows));
96
0
    }
97
0
    return Status::OK();
98
0
}
99
100
Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column,
101
0
                                  const DataTypePtr& data_type, size_t num_rows) {
102
0
    auto logical_type = data_type->get_primitive_type();
103
0
    void* null_map_ptr = address.next_meta_as_ptr();
104
0
    if (null_map_ptr == nullptr) {
105
        // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0
106
0
        return Status::InternalError("Unsupported type {} in java side", data_type->get_name());
107
0
    }
108
0
    MutableColumnPtr data_column;
109
0
    if (doris_column->is_nullable()) {
110
0
        auto* nullable_column =
111
0
                reinterpret_cast<ColumnNullable*>(doris_column->assume_mutable().get());
112
0
        data_column = nullable_column->get_nested_column_ptr();
113
0
        NullMap& null_map = nullable_column->get_null_map_data();
114
0
        size_t origin_size = null_map.size();
115
0
        null_map.resize(origin_size + num_rows);
116
0
        memcpy(null_map.data() + origin_size, static_cast<bool*>(null_map_ptr), num_rows);
117
0
    } else {
118
0
        data_column = doris_column->assume_mutable();
119
0
    }
120
    // Date and DateTime are deprecated and not supported.
121
0
    switch (logical_type) {
122
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)              \
123
0
    case TYPE_INDEX:                                             \
124
0
        return _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>( \
125
0
                data_column, reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr()), num_rows);
126
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
127
0
#undef DISPATCH
128
0
    case PrimitiveType::TYPE_STRING:
129
0
        [[fallthrough]];
130
0
    case PrimitiveType::TYPE_CHAR:
131
0
        [[fallthrough]];
132
0
    case PrimitiveType::TYPE_VARCHAR:
133
0
        return _fill_string_column(address, data_column, num_rows);
134
0
    case PrimitiveType::TYPE_ARRAY:
135
0
        return _fill_array_column(address, data_column, data_type, num_rows);
136
0
    case PrimitiveType::TYPE_MAP:
137
0
        return _fill_map_column(address, data_column, data_type, num_rows);
138
0
    case PrimitiveType::TYPE_STRUCT:
139
0
        return _fill_struct_column(address, data_column, data_type, num_rows);
140
0
    case PrimitiveType::TYPE_VARBINARY:
141
0
        return _fill_varbinary_column(address, data_column, num_rows);
142
0
    default:
143
0
        return Status::InvalidArgument("Unsupported type {} in jni scanner", data_type->get_name());
144
0
    }
145
0
    return Status::OK();
146
0
}
147
148
Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address,
149
0
                                             MutableColumnPtr& doris_column, size_t num_rows) {
150
0
    auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr());
151
0
    auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column);
152
    // Java side writes per-row metadata as 16 bytes: [len: long][addr: long]
153
0
    for (size_t i = 0; i < num_rows; ++i) {
154
        // Read length (first 8 bytes)
155
0
        int64_t len = 0;
156
0
        memcpy(&len, meta_base + 16 * i, sizeof(len));
157
0
        if (len <= 0) {
158
0
            varbinary_col.insert_default();
159
0
        } else {
160
            // Read address (next 8 bytes)
161
0
            uint64_t addr_u = 0;
162
0
            memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u));
163
0
            const char* src = reinterpret_cast<const char*>(addr_u);
164
0
            varbinary_col.insert_data(src, static_cast<size_t>(len));
165
0
        }
166
0
    }
167
0
    return Status::OK();
168
0
}
169
170
Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
171
0
                                          size_t num_rows) {
172
0
    auto& string_col = static_cast<ColumnString&>(*doris_column);
173
0
    ColumnString::Chars& string_chars = string_col.get_chars();
174
0
    ColumnString::Offsets& string_offsets = string_col.get_offsets();
175
0
    int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr());
176
0
    char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr());
177
178
    // This judgment is necessary, otherwise the following statement `offsets[num_rows - 1]` out of bounds
179
    // What's more, This judgment must be placed after `address.next_meta_as_ptr()`
180
    // because `address.next_meta_as_ptr` will make `address._meta_index` plus 1
181
0
    if (num_rows == 0) {
182
0
        return Status::OK();
183
0
    }
184
185
0
    size_t origin_chars_size = string_chars.size();
186
0
    string_chars.resize(origin_chars_size + offsets[num_rows - 1]);
187
0
    memcpy(string_chars.data() + origin_chars_size, chars, offsets[num_rows - 1]);
188
189
0
    size_t origin_offsets_size = string_offsets.size();
190
0
    size_t start_offset = string_offsets[origin_offsets_size - 1];
191
0
    string_offsets.resize(origin_offsets_size + num_rows);
192
0
    for (size_t i = 0; i < num_rows; ++i) {
193
0
        string_offsets[origin_offsets_size + i] =
194
0
                static_cast<unsigned int>(offsets[i] + start_offset);
195
0
    }
196
0
    return Status::OK();
197
0
}
198
199
Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
200
0
                                         const DataTypePtr& data_type, size_t num_rows) {
201
0
    ColumnPtr& element_column = static_cast<ColumnArray&>(*doris_column).get_data_ptr();
202
0
    const DataTypePtr& element_type =
203
0
            (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
204
0
                    ->get_nested_type();
205
0
    ColumnArray::Offsets64& offsets_data = static_cast<ColumnArray&>(*doris_column).get_offsets();
206
207
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
208
0
    size_t origin_size = offsets_data.size();
209
0
    offsets_data.resize(origin_size + num_rows);
210
0
    size_t start_offset = offsets_data[origin_size - 1];
211
0
    for (size_t i = 0; i < num_rows; ++i) {
212
0
        offsets_data[origin_size + i] = offsets[i] + start_offset;
213
0
    }
214
215
0
    return fill_column(address, element_column, element_type,
216
0
                       offsets_data[origin_size + num_rows - 1] - start_offset);
217
0
}
218
219
Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
220
0
                                       const DataTypePtr& data_type, size_t num_rows) {
221
0
    auto& map = static_cast<ColumnMap&>(*doris_column);
222
0
    const DataTypePtr& key_type =
223
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
224
0
    const DataTypePtr& value_type =
225
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())
226
0
                    ->get_value_type();
227
0
    ColumnPtr& key_column = map.get_keys_ptr();
228
0
    ColumnPtr& value_column = map.get_values_ptr();
229
0
    ColumnArray::Offsets64& map_offsets = map.get_offsets();
230
231
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
232
0
    size_t origin_size = map_offsets.size();
233
0
    map_offsets.resize(origin_size + num_rows);
234
0
    size_t start_offset = map_offsets[origin_size - 1];
235
0
    for (size_t i = 0; i < num_rows; ++i) {
236
0
        map_offsets[origin_size + i] = offsets[i] + start_offset;
237
0
    }
238
239
0
    RETURN_IF_ERROR(fill_column(address, key_column, key_type,
240
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
241
0
    RETURN_IF_ERROR(fill_column(address, value_column, value_type,
242
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
243
0
    return Status::OK();
244
0
}
245
246
Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
247
0
                                          const DataTypePtr& data_type, size_t num_rows) {
248
0
    auto& doris_struct = static_cast<ColumnStruct&>(*doris_column);
249
0
    const DataTypeStruct* doris_struct_type =
250
0
            reinterpret_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
251
0
    for (int i = 0; i < doris_struct.tuple_size(); ++i) {
252
0
        ColumnPtr& struct_field = doris_struct.get_column_ptr(i);
253
0
        const DataTypePtr& field_type = doris_struct_type->get_element(i);
254
0
        RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows));
255
0
    }
256
0
    return Status::OK();
257
0
}
258
259
0
std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) {
260
0
    DataTypePtr type = remove_nullable(data_type);
261
0
    std::ostringstream buffer;
262
0
    switch (type->get_primitive_type()) {
263
0
    case TYPE_BOOLEAN:
264
0
        return "boolean";
265
0
    case TYPE_TINYINT:
266
0
        return "tinyint";
267
0
    case TYPE_SMALLINT:
268
0
        return "smallint";
269
0
    case TYPE_INT:
270
0
        return "int";
271
0
    case TYPE_BIGINT:
272
0
        return "bigint";
273
0
    case TYPE_LARGEINT:
274
0
        return "largeint";
275
0
    case TYPE_FLOAT:
276
0
        return "float";
277
0
    case TYPE_DOUBLE:
278
0
        return "double";
279
0
    case TYPE_IPV4:
280
0
        return "ipv4";
281
0
    case TYPE_IPV6:
282
0
        return "ipv6";
283
0
    case TYPE_VARCHAR:
284
0
        [[fallthrough]];
285
0
    case TYPE_CHAR:
286
0
        [[fallthrough]];
287
0
    case TYPE_STRING:
288
0
        return "string";
289
0
    case TYPE_DATE:
290
0
        return "datev1";
291
0
    case TYPE_DATEV2:
292
0
        return "datev2";
293
0
    case TYPE_DATETIME:
294
0
        return "datetimev1";
295
0
    case TYPE_DATETIMEV2:
296
0
        [[fallthrough]];
297
0
    case TYPE_TIMEV2: {
298
0
        buffer << "datetimev2(" << type->get_scale() << ")";
299
0
        return buffer.str();
300
0
    }
301
0
    case TYPE_TIMESTAMPTZ: {
302
0
        buffer << "timestamptz(" << type->get_scale() << ")";
303
0
        return buffer.str();
304
0
    }
305
0
    case TYPE_BINARY:
306
0
        return "binary";
307
0
    case TYPE_DECIMALV2: {
308
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
309
0
        return buffer.str();
310
0
    }
311
0
    case TYPE_DECIMAL32: {
312
0
        buffer << "decimal32(" << type->get_precision() << "," << type->get_scale() << ")";
313
0
        return buffer.str();
314
0
    }
315
0
    case TYPE_DECIMAL64: {
316
0
        buffer << "decimal64(" << type->get_precision() << "," << type->get_scale() << ")";
317
0
        return buffer.str();
318
0
    }
319
0
    case TYPE_DECIMAL128I: {
320
0
        buffer << "decimal128(" << type->get_precision() << "," << type->get_scale() << ")";
321
0
        return buffer.str();
322
0
    }
323
0
    case TYPE_STRUCT: {
324
0
        const DataTypeStruct* struct_type = reinterpret_cast<const DataTypeStruct*>(type.get());
325
0
        buffer << "struct<";
326
0
        for (int i = 0; i < struct_type->get_elements().size(); ++i) {
327
0
            if (i != 0) {
328
0
                buffer << ",";
329
0
            }
330
0
            buffer << struct_type->get_element_names()[i] << ":"
331
0
                   << get_jni_type(struct_type->get_element(i));
332
0
        }
333
0
        buffer << ">";
334
0
        return buffer.str();
335
0
    }
336
0
    case TYPE_ARRAY: {
337
0
        const DataTypeArray* array_type = reinterpret_cast<const DataTypeArray*>(type.get());
338
0
        buffer << "array<" << get_jni_type(array_type->get_nested_type()) << ">";
339
0
        return buffer.str();
340
0
    }
341
0
    case TYPE_MAP: {
342
0
        const DataTypeMap* map_type = reinterpret_cast<const DataTypeMap*>(type.get());
343
0
        buffer << "map<" << get_jni_type(map_type->get_key_type()) << ","
344
0
               << get_jni_type(map_type->get_value_type()) << ">";
345
0
        return buffer.str();
346
0
    }
347
0
    case TYPE_VARBINARY:
348
0
        return "varbinary";
349
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
350
0
    case TYPE_BITMAP:
351
0
        [[fallthrough]];
352
0
    case TYPE_HLL:
353
0
        [[fallthrough]];
354
0
    case TYPE_QUANTILE_STATE:
355
0
        [[fallthrough]];
356
0
    case TYPE_JSONB:
357
0
        return "string";
358
0
    default:
359
0
        return "unsupported";
360
0
    }
361
0
}
362
363
0
std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) {
364
0
    DataTypePtr type = remove_nullable(data_type);
365
0
    std::ostringstream buffer;
366
0
    switch (data_type->get_primitive_type()) {
367
0
    case TYPE_BOOLEAN:
368
0
        return "boolean";
369
0
    case TYPE_TINYINT:
370
0
        return "tinyint";
371
0
    case TYPE_SMALLINT:
372
0
        return "smallint";
373
0
    case TYPE_INT:
374
0
        return "int";
375
0
    case TYPE_BIGINT:
376
0
        return "bigint";
377
0
    case TYPE_LARGEINT:
378
0
        return "largeint";
379
0
    case TYPE_FLOAT:
380
0
        return "float";
381
0
    case TYPE_DOUBLE:
382
0
        return "double";
383
0
    case TYPE_IPV4:
384
0
        return "ipv4";
385
0
    case TYPE_IPV6:
386
0
        return "ipv6";
387
0
    case TYPE_VARCHAR: {
388
0
        buffer << "varchar("
389
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
390
0
               << ")";
391
0
        return buffer.str();
392
0
    }
393
0
    case TYPE_DATE:
394
0
        return "datev1";
395
0
    case TYPE_DATEV2:
396
0
        return "datev2";
397
0
    case TYPE_DATETIME:
398
0
        return "datetimev1";
399
0
    case TYPE_DATETIMEV2:
400
0
        [[fallthrough]];
401
0
    case TYPE_TIMEV2: {
402
0
        buffer << "datetimev2(" << data_type->get_scale() << ")";
403
0
        return buffer.str();
404
0
    }
405
0
    case TYPE_TIMESTAMPTZ: {
406
0
        buffer << "timestamptz(" << data_type->get_scale() << ")";
407
0
        return buffer.str();
408
0
    }
409
0
    case TYPE_BINARY:
410
0
        return "binary";
411
0
    case TYPE_CHAR: {
412
0
        buffer << "char("
413
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
414
0
               << ")";
415
0
        return buffer.str();
416
0
    }
417
0
    case TYPE_STRING:
418
0
        return "string";
419
0
    case TYPE_VARBINARY:
420
0
        buffer << "varbinary("
421
0
               << assert_cast<const DataTypeVarbinary*>(remove_nullable(data_type).get())->len()
422
0
               << ")";
423
0
        return buffer.str();
424
0
    case TYPE_DECIMALV2: {
425
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
426
0
        return buffer.str();
427
0
    }
428
0
    case TYPE_DECIMAL32: {
429
0
        buffer << "decimal32(" << data_type->get_precision() << "," << data_type->get_scale()
430
0
               << ")";
431
0
        return buffer.str();
432
0
    }
433
0
    case TYPE_DECIMAL64: {
434
0
        buffer << "decimal64(" << data_type->get_precision() << "," << data_type->get_scale()
435
0
               << ")";
436
0
        return buffer.str();
437
0
    }
438
0
    case TYPE_DECIMAL128I: {
439
0
        buffer << "decimal128(" << data_type->get_precision() << "," << data_type->get_scale()
440
0
               << ")";
441
0
        return buffer.str();
442
0
    }
443
0
    case TYPE_STRUCT: {
444
0
        const auto* type_struct =
445
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
446
0
        buffer << "struct<";
447
0
        for (int i = 0; i < type_struct->get_elements().size(); ++i) {
448
0
            if (i != 0) {
449
0
                buffer << ",";
450
0
            }
451
0
            buffer << type_struct->get_element_name(i) << ":"
452
0
                   << get_jni_type_with_different_string(type_struct->get_element(i));
453
0
        }
454
0
        buffer << ">";
455
0
        return buffer.str();
456
0
    }
457
0
    case TYPE_ARRAY: {
458
0
        const auto* type_arr = assert_cast<const DataTypeArray*>(remove_nullable(data_type).get());
459
0
        buffer << "array<" << get_jni_type_with_different_string(type_arr->get_nested_type())
460
0
               << ">";
461
0
        return buffer.str();
462
0
    }
463
0
    case TYPE_MAP: {
464
0
        const auto* type_map = assert_cast<const DataTypeMap*>(remove_nullable(data_type).get());
465
0
        buffer << "map<" << get_jni_type_with_different_string(type_map->get_key_type()) << ","
466
0
               << get_jni_type_with_different_string(type_map->get_value_type()) << ">";
467
0
        return buffer.str();
468
0
    }
469
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
470
0
    case TYPE_BITMAP:
471
0
        [[fallthrough]];
472
0
    case TYPE_HLL:
473
0
        [[fallthrough]];
474
0
    case TYPE_QUANTILE_STATE:
475
0
        [[fallthrough]];
476
0
    case TYPE_JSONB:
477
0
        return "string";
478
0
    default:
479
0
        return "unsupported";
480
0
    }
481
0
}
482
483
Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type,
484
0
                                        std::vector<long>& meta_data) {
485
0
    auto logical_type = data_type->get_primitive_type();
486
0
    const IColumn* column = nullptr;
487
    // insert const flag
488
0
    if (is_column_const(*doris_column)) {
489
0
        meta_data.emplace_back((long)1);
490
0
        const auto& const_column = assert_cast<const ColumnConst&>(*doris_column);
491
0
        column = &(const_column.get_data_column());
492
0
    } else {
493
0
        meta_data.emplace_back((long)0);
494
0
        column = &(*doris_column);
495
0
    }
496
497
    // insert null map address
498
0
    const IColumn* data_column = nullptr;
499
0
    if (column->is_nullable()) {
500
0
        const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
501
0
        data_column = &(nullable_column.get_nested_column());
502
0
        const auto& null_map = nullable_column.get_null_map_data();
503
0
        meta_data.emplace_back((long)null_map.data());
504
0
    } else {
505
0
        meta_data.emplace_back(0);
506
0
        data_column = column;
507
0
    }
508
0
    switch (logical_type) {
509
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                                          \
510
0
    case TYPE_INDEX: {                                                                       \
511
0
        meta_data.emplace_back(_get_fixed_length_column_address<COLUMN_TYPE>(*data_column)); \
512
0
        break;                                                                               \
513
0
    }
514
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
515
0
#undef DISPATCH
516
0
    case PrimitiveType::TYPE_STRING:
517
0
        [[fallthrough]];
518
0
    case PrimitiveType::TYPE_CHAR:
519
0
        [[fallthrough]];
520
0
    case PrimitiveType::TYPE_VARCHAR: {
521
0
        const auto& string_column = assert_cast<const ColumnString&>(*data_column);
522
        // insert offsets
523
0
        meta_data.emplace_back((long)string_column.get_offsets().data());
524
0
        meta_data.emplace_back((long)string_column.get_chars().data());
525
0
        break;
526
0
    }
527
0
    case PrimitiveType::TYPE_ARRAY: {
528
0
        const auto& element_column = assert_cast<const ColumnArray&>(*data_column).get_data_ptr();
529
0
        meta_data.emplace_back(
530
0
                (long)assert_cast<const ColumnArray&>(*data_column).get_offsets().data());
531
0
        const auto& element_type = assert_cast<const DataTypePtr&>(
532
0
                (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
533
0
                        ->get_nested_type());
534
0
        RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data));
535
0
        break;
536
0
    }
537
0
    case PrimitiveType::TYPE_STRUCT: {
538
0
        const auto& doris_struct = assert_cast<const ColumnStruct&>(*data_column);
539
0
        const auto* doris_struct_type =
540
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
541
0
        for (int i = 0; i < doris_struct.tuple_size(); ++i) {
542
0
            const auto& struct_field = doris_struct.get_column_ptr(i);
543
0
            const auto& field_type =
544
0
                    assert_cast<const DataTypePtr&>(doris_struct_type->get_element(i));
545
0
            RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data));
546
0
        }
547
0
        break;
548
0
    }
549
0
    case PrimitiveType::TYPE_MAP: {
550
0
        const auto& map = assert_cast<const ColumnMap&>(*data_column);
551
0
        const auto& key_type = assert_cast<const DataTypePtr&>(
552
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type());
553
0
        const auto& value_type = assert_cast<const DataTypePtr&>(
554
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())
555
0
                        ->get_value_type());
556
0
        const auto& key_column = map.get_keys_ptr();
557
0
        const auto& value_column = map.get_values_ptr();
558
0
        meta_data.emplace_back((long)map.get_offsets().data());
559
0
        RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data));
560
0
        RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data));
561
0
        break;
562
0
    }
563
0
    case PrimitiveType::TYPE_VARBINARY: {
564
0
        const auto& varbinary_col = assert_cast<const ColumnVarbinary&>(*data_column);
565
0
        meta_data.emplace_back(
566
0
                (long)assert_cast<const ColumnVarbinary&>(varbinary_col).get_data().data());
567
0
        break;
568
0
    }
569
0
    default:
570
0
        return Status::InternalError("Unsupported type: {}", data_type->get_name());
571
0
    }
572
0
    return Status::OK();
573
0
}
574
575
0
Status JniDataBridge::to_java_table(Block* block, std::unique_ptr<long[]>& meta) {
576
0
    ColumnNumbers arguments;
577
0
    for (size_t i = 0; i < block->columns(); ++i) {
578
0
        arguments.emplace_back(i);
579
0
    }
580
0
    return to_java_table(block, block->rows(), arguments, meta);
581
0
}
582
583
Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments,
584
0
                                    std::unique_ptr<long[]>& meta) {
585
0
    std::vector<long> meta_data;
586
    // insert number of rows
587
0
    meta_data.emplace_back(num_rows);
588
0
    for (size_t i : arguments) {
589
0
        auto& column_with_type_and_name = block->get_by_position(i);
590
0
        RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column,
591
0
                                          column_with_type_and_name.type, meta_data));
592
0
    }
593
594
0
    meta.reset(new long[meta_data.size()]);
595
0
    memcpy(meta.get(), &meta_data[0], meta_data.size() * 8);
596
0
    return Status::OK();
597
0
}
598
599
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(
600
0
        Block* block, const ColumnNumbers& arguments, bool ignore_column_name) {
601
    // prepare table schema
602
0
    std::ostringstream required_fields;
603
0
    std::ostringstream columns_types;
604
0
    for (int i = 0; i < arguments.size(); ++i) {
605
0
        std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type);
606
0
        if (i == 0) {
607
0
            if (ignore_column_name) {
608
0
                required_fields << "_col_" << arguments[i];
609
0
            } else {
610
0
                required_fields << block->get_by_position(arguments[i]).name;
611
0
            }
612
0
            columns_types << type;
613
0
        } else {
614
0
            if (ignore_column_name) {
615
0
                required_fields << ","
616
0
                                << "_col_" << arguments[i];
617
0
            } else {
618
0
                required_fields << "," << block->get_by_position(arguments[i]).name;
619
0
            }
620
0
            columns_types << "#" << type;
621
0
        }
622
0
    }
623
0
    return std::make_pair(required_fields.str(), columns_types.str());
624
0
}
625
626
0
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(Block* block) {
627
0
    ColumnNumbers arguments;
628
0
    for (size_t i = 0; i < block->columns(); ++i) {
629
0
        arguments.emplace_back(i);
630
0
    }
631
0
    return parse_table_schema(block, arguments, true);
632
0
}
633
634
} // namespace doris