Coverage Report

Created: 2026-05-25 13:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/jni/jni_data_bridge.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "jni_data_bridge.h"
19
20
#include <glog/logging.h>
21
22
#include <sstream>
23
#include <variant>
24
25
#include "core/block/block.h"
26
#include "core/column/column_array.h"
27
#include "core/column/column_map.h"
28
#include "core/column/column_nullable.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_struct.h"
31
#include "core/column/column_varbinary.h"
32
#include "core/data_type/data_type_array.h"
33
#include "core/data_type/data_type_map.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_struct.h"
36
#include "core/data_type/data_type_varbinary.h"
37
#include "core/data_type/define_primitive_type.h"
38
#include "core/data_type/primitive_type.h"
39
#include "core/types.h"
40
#include "core/value/decimalv2_value.h"
41
42
namespace doris {
43
44
#define FOR_FIXED_LENGTH_TYPES(M)                                  \
45
0
    M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8)               \
46
0
    M(PrimitiveType::TYPE_BOOLEAN, ColumnUInt8, UInt8)             \
47
0
    M(PrimitiveType::TYPE_SMALLINT, ColumnInt16, Int16)            \
48
0
    M(PrimitiveType::TYPE_INT, ColumnInt32, Int32)                 \
49
0
    M(PrimitiveType::TYPE_BIGINT, ColumnInt64, Int64)              \
50
0
    M(PrimitiveType::TYPE_LARGEINT, ColumnInt128, Int128)          \
51
0
    M(PrimitiveType::TYPE_FLOAT, ColumnFloat32, Float32)           \
52
0
    M(PrimitiveType::TYPE_DOUBLE, ColumnFloat64, Float64)          \
53
0
    M(PrimitiveType::TYPE_DECIMALV2, ColumnDecimal128V2, Int128)   \
54
0
    M(PrimitiveType::TYPE_DECIMAL128I, ColumnDecimal128V3, Int128) \
55
0
    M(PrimitiveType::TYPE_DECIMAL32, ColumnDecimal32, Int32)       \
56
0
    M(PrimitiveType::TYPE_DECIMAL64, ColumnDecimal64, Int64)       \
57
0
    M(PrimitiveType::TYPE_DATE, ColumnDate, Int64)                 \
58
0
    M(PrimitiveType::TYPE_DATEV2, ColumnDateV2, UInt32)            \
59
0
    M(PrimitiveType::TYPE_DATETIME, ColumnDateTime, Int64)         \
60
0
    M(PrimitiveType::TYPE_DATETIMEV2, ColumnDateTimeV2, UInt64)    \
61
0
    M(PrimitiveType::TYPE_TIMESTAMPTZ, ColumnTimeStampTz, UInt64)  \
62
0
    M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4)                  \
63
0
    M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6)
64
65
0
Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) {
66
0
    if (table_address == 0) {
67
0
        return Status::InternalError("table_address is 0");
68
0
    }
69
0
    TableMetaAddress table_meta(table_address);
70
0
    long num_rows = table_meta.next_meta_as_long();
71
0
    for (size_t i : arguments) {
72
0
        if (block->get_by_position(i).column.get() == nullptr) {
73
0
            auto return_type = block->get_data_type(i);
74
0
            bool result_nullable = return_type->is_nullable();
75
0
            ColumnUInt8::MutablePtr null_col = nullptr;
76
0
            if (result_nullable) {
77
0
                return_type = remove_nullable(return_type);
78
0
                null_col = ColumnUInt8::create();
79
0
            }
80
0
            auto res_col = return_type->create_column();
81
0
            if (result_nullable) {
82
0
                block->replace_by_position(
83
0
                        i, ColumnNullable::create(std::move(res_col), std::move(null_col)));
84
0
            } else {
85
0
                block->replace_by_position(i, std::move(res_col));
86
0
            }
87
0
        } else if (is_column_const(*(block->get_by_position(i).column))) {
88
0
            auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const();
89
0
            bool is_nullable = block->get_by_position(i).type->is_nullable();
90
0
            block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column);
91
0
        }
92
0
        auto& column_with_type_and_name = block->get_by_position(i);
93
0
        auto& column_ptr = column_with_type_and_name.column;
94
0
        auto& column_type = column_with_type_and_name.type;
95
0
        RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows));
96
0
    }
97
0
    return Status::OK();
98
0
}
99
100
Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column,
101
0
                                  const DataTypePtr& data_type, size_t num_rows) {
102
0
    auto logical_type = data_type->get_primitive_type();
103
0
    void* null_map_ptr = address.next_meta_as_ptr();
104
0
    if (null_map_ptr == nullptr) {
105
        // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0
106
0
        return Status::InternalError("Unsupported type {} in java side", data_type->get_name());
107
0
    }
108
0
    auto mutable_doris_column = IColumn::mutate(std::move(doris_column));
109
0
    MutableColumnPtr data_column;
110
0
    if (mutable_doris_column->is_nullable()) {
111
0
        auto* nullable_column = assert_cast<ColumnNullable*>(mutable_doris_column.get());
112
0
        data_column = nullable_column->get_nested_column_ptr();
113
0
        NullMap& null_map = nullable_column->get_null_map_data();
114
0
        size_t origin_size = null_map.size();
115
0
        null_map.resize(origin_size + num_rows);
116
0
        memcpy(null_map.data() + origin_size, static_cast<bool*>(null_map_ptr), num_rows);
117
0
    } else {
118
0
        data_column = mutable_doris_column->get_ptr();
119
0
    }
120
    // Date and DateTime are deprecated and not supported.
121
0
    Status status = Status::OK();
122
0
    switch (logical_type) {
123
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                                             \
124
0
    case TYPE_INDEX: {                                                                          \
125
0
        auto* data = reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr());                   \
126
0
        status = _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>(data_column, data, num_rows); \
127
0
        break;                                                                                  \
128
0
    }
129
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
130
0
#undef DISPATCH
131
0
    case PrimitiveType::TYPE_STRING:
132
0
        [[fallthrough]];
133
0
    case PrimitiveType::TYPE_CHAR:
134
0
        [[fallthrough]];
135
0
    case PrimitiveType::TYPE_VARCHAR:
136
0
        status = _fill_string_column(address, data_column, num_rows);
137
0
        break;
138
0
    case PrimitiveType::TYPE_ARRAY:
139
0
        status = _fill_array_column(address, data_column, data_type, num_rows);
140
0
        break;
141
0
    case PrimitiveType::TYPE_MAP:
142
0
        status = _fill_map_column(address, data_column, data_type, num_rows);
143
0
        break;
144
0
    case PrimitiveType::TYPE_STRUCT:
145
0
        status = _fill_struct_column(address, data_column, data_type, num_rows);
146
0
        break;
147
0
    case PrimitiveType::TYPE_VARBINARY:
148
0
        status = _fill_varbinary_column(address, data_column, num_rows);
149
0
        break;
150
0
    default:
151
0
        status = Status::InvalidArgument("Unsupported type {} in jni scanner",
152
0
                                         data_type->get_name());
153
0
        break;
154
0
    }
155
0
    doris_column = std::move(mutable_doris_column);
156
0
    return status;
157
0
}
158
159
Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address,
160
0
                                             MutableColumnPtr& doris_column, size_t num_rows) {
161
0
    auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr());
162
0
    auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column);
163
    // Java side writes per-row metadata as 16 bytes: [len: long][addr: long]
164
0
    for (size_t i = 0; i < num_rows; ++i) {
165
        // Read length (first 8 bytes)
166
0
        int64_t len = 0;
167
0
        memcpy(&len, meta_base + 16 * i, sizeof(len));
168
0
        if (len <= 0) {
169
0
            varbinary_col.insert_default();
170
0
        } else {
171
            // Read address (next 8 bytes)
172
0
            uint64_t addr_u = 0;
173
0
            memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u));
174
0
            const char* src = reinterpret_cast<const char*>(addr_u);
175
0
            varbinary_col.insert_data(src, static_cast<size_t>(len));
176
0
        }
177
0
    }
178
0
    return Status::OK();
179
0
}
180
181
Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
182
0
                                          size_t num_rows) {
183
0
    auto& string_col = static_cast<ColumnString&>(*doris_column);
184
0
    ColumnString::Chars& string_chars = string_col.get_chars();
185
0
    ColumnString::Offsets& string_offsets = string_col.get_offsets();
186
0
    int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr());
187
0
    char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr());
188
189
    // This judgment is necessary, otherwise the following statement `offsets[num_rows - 1]` out of bounds
190
    // What's more, This judgment must be placed after `address.next_meta_as_ptr()`
191
    // because `address.next_meta_as_ptr` will make `address._meta_index` plus 1
192
0
    if (num_rows == 0) {
193
0
        return Status::OK();
194
0
    }
195
196
0
    size_t origin_chars_size = string_chars.size();
197
0
    string_chars.resize(origin_chars_size + offsets[num_rows - 1]);
198
0
    memcpy(string_chars.data() + origin_chars_size, chars, offsets[num_rows - 1]);
199
200
0
    size_t origin_offsets_size = string_offsets.size();
201
0
    size_t start_offset = string_offsets[origin_offsets_size - 1];
202
0
    string_offsets.resize(origin_offsets_size + num_rows);
203
0
    for (size_t i = 0; i < num_rows; ++i) {
204
0
        string_offsets[origin_offsets_size + i] =
205
0
                static_cast<unsigned int>(offsets[i] + start_offset);
206
0
    }
207
0
    return Status::OK();
208
0
}
209
210
Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
211
0
                                         const DataTypePtr& data_type, size_t num_rows) {
212
0
    ColumnPtr& element_column = static_cast<ColumnArray&>(*doris_column).get_data_ptr();
213
0
    const DataTypePtr& element_type =
214
0
            (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
215
0
                    ->get_nested_type();
216
0
    ColumnArray::Offsets64& offsets_data = static_cast<ColumnArray&>(*doris_column).get_offsets();
217
218
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
219
0
    size_t origin_size = offsets_data.size();
220
0
    offsets_data.resize(origin_size + num_rows);
221
0
    size_t start_offset = offsets_data[origin_size - 1];
222
0
    for (size_t i = 0; i < num_rows; ++i) {
223
0
        offsets_data[origin_size + i] = offsets[i] + start_offset;
224
0
    }
225
226
0
    return fill_column(address, element_column, element_type,
227
0
                       offsets_data[origin_size + num_rows - 1] - start_offset);
228
0
}
229
230
Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
231
0
                                       const DataTypePtr& data_type, size_t num_rows) {
232
0
    auto& map = static_cast<ColumnMap&>(*doris_column);
233
0
    const DataTypePtr& key_type =
234
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
235
0
    const DataTypePtr& value_type =
236
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())
237
0
                    ->get_value_type();
238
0
    ColumnPtr& key_column = map.get_keys_ptr();
239
0
    ColumnPtr& value_column = map.get_values_ptr();
240
0
    ColumnArray::Offsets64& map_offsets = map.get_offsets();
241
242
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
243
0
    size_t origin_size = map_offsets.size();
244
0
    map_offsets.resize(origin_size + num_rows);
245
0
    size_t start_offset = map_offsets[origin_size - 1];
246
0
    for (size_t i = 0; i < num_rows; ++i) {
247
0
        map_offsets[origin_size + i] = offsets[i] + start_offset;
248
0
    }
249
250
0
    RETURN_IF_ERROR(fill_column(address, key_column, key_type,
251
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
252
0
    RETURN_IF_ERROR(fill_column(address, value_column, value_type,
253
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
254
0
    return Status::OK();
255
0
}
256
257
Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
258
0
                                          const DataTypePtr& data_type, size_t num_rows) {
259
0
    auto& doris_struct = static_cast<ColumnStruct&>(*doris_column);
260
0
    const DataTypeStruct* doris_struct_type =
261
0
            reinterpret_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
262
0
    for (int i = 0; i < doris_struct.tuple_size(); ++i) {
263
0
        ColumnPtr& struct_field = doris_struct.get_column_ptr(i);
264
0
        const DataTypePtr& field_type = doris_struct_type->get_element(i);
265
0
        RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows));
266
0
    }
267
0
    return Status::OK();
268
0
}
269
270
0
std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) {
271
0
    DataTypePtr type = remove_nullable(data_type);
272
0
    std::ostringstream buffer;
273
0
    switch (type->get_primitive_type()) {
274
0
    case TYPE_BOOLEAN:
275
0
        return "boolean";
276
0
    case TYPE_TINYINT:
277
0
        return "tinyint";
278
0
    case TYPE_SMALLINT:
279
0
        return "smallint";
280
0
    case TYPE_INT:
281
0
        return "int";
282
0
    case TYPE_BIGINT:
283
0
        return "bigint";
284
0
    case TYPE_LARGEINT:
285
0
        return "largeint";
286
0
    case TYPE_FLOAT:
287
0
        return "float";
288
0
    case TYPE_DOUBLE:
289
0
        return "double";
290
0
    case TYPE_IPV4:
291
0
        return "ipv4";
292
0
    case TYPE_IPV6:
293
0
        return "ipv6";
294
0
    case TYPE_VARCHAR:
295
0
        [[fallthrough]];
296
0
    case TYPE_CHAR:
297
0
        [[fallthrough]];
298
0
    case TYPE_STRING:
299
0
        return "string";
300
0
    case TYPE_DATE:
301
0
        return "datev1";
302
0
    case TYPE_DATEV2:
303
0
        return "datev2";
304
0
    case TYPE_DATETIME:
305
0
        return "datetimev1";
306
0
    case TYPE_DATETIMEV2:
307
0
        [[fallthrough]];
308
0
    case TYPE_TIMEV2: {
309
0
        buffer << "datetimev2(" << type->get_scale() << ")";
310
0
        return buffer.str();
311
0
    }
312
0
    case TYPE_TIMESTAMPTZ: {
313
0
        buffer << "timestamptz(" << type->get_scale() << ")";
314
0
        return buffer.str();
315
0
    }
316
0
    case TYPE_BINARY:
317
0
        return "binary";
318
0
    case TYPE_DECIMALV2: {
319
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
320
0
        return buffer.str();
321
0
    }
322
0
    case TYPE_DECIMAL32: {
323
0
        buffer << "decimal32(" << type->get_precision() << "," << type->get_scale() << ")";
324
0
        return buffer.str();
325
0
    }
326
0
    case TYPE_DECIMAL64: {
327
0
        buffer << "decimal64(" << type->get_precision() << "," << type->get_scale() << ")";
328
0
        return buffer.str();
329
0
    }
330
0
    case TYPE_DECIMAL128I: {
331
0
        buffer << "decimal128(" << type->get_precision() << "," << type->get_scale() << ")";
332
0
        return buffer.str();
333
0
    }
334
0
    case TYPE_STRUCT: {
335
0
        const DataTypeStruct* struct_type = reinterpret_cast<const DataTypeStruct*>(type.get());
336
0
        buffer << "struct<";
337
0
        for (int i = 0; i < struct_type->get_elements().size(); ++i) {
338
0
            if (i != 0) {
339
0
                buffer << ",";
340
0
            }
341
0
            buffer << struct_type->get_element_names()[i] << ":"
342
0
                   << get_jni_type(struct_type->get_element(i));
343
0
        }
344
0
        buffer << ">";
345
0
        return buffer.str();
346
0
    }
347
0
    case TYPE_ARRAY: {
348
0
        const DataTypeArray* array_type = reinterpret_cast<const DataTypeArray*>(type.get());
349
0
        buffer << "array<" << get_jni_type(array_type->get_nested_type()) << ">";
350
0
        return buffer.str();
351
0
    }
352
0
    case TYPE_MAP: {
353
0
        const DataTypeMap* map_type = reinterpret_cast<const DataTypeMap*>(type.get());
354
0
        buffer << "map<" << get_jni_type(map_type->get_key_type()) << ","
355
0
               << get_jni_type(map_type->get_value_type()) << ">";
356
0
        return buffer.str();
357
0
    }
358
0
    case TYPE_VARBINARY:
359
0
        return "varbinary";
360
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
361
0
    case TYPE_BITMAP:
362
0
        [[fallthrough]];
363
0
    case TYPE_HLL:
364
0
        [[fallthrough]];
365
0
    case TYPE_QUANTILE_STATE:
366
0
        [[fallthrough]];
367
0
    case TYPE_JSONB:
368
0
        return "string";
369
0
    default:
370
0
        return "unsupported";
371
0
    }
372
0
}
373
374
0
std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) {
375
0
    DataTypePtr type = remove_nullable(data_type);
376
0
    std::ostringstream buffer;
377
0
    switch (data_type->get_primitive_type()) {
378
0
    case TYPE_BOOLEAN:
379
0
        return "boolean";
380
0
    case TYPE_TINYINT:
381
0
        return "tinyint";
382
0
    case TYPE_SMALLINT:
383
0
        return "smallint";
384
0
    case TYPE_INT:
385
0
        return "int";
386
0
    case TYPE_BIGINT:
387
0
        return "bigint";
388
0
    case TYPE_LARGEINT:
389
0
        return "largeint";
390
0
    case TYPE_FLOAT:
391
0
        return "float";
392
0
    case TYPE_DOUBLE:
393
0
        return "double";
394
0
    case TYPE_IPV4:
395
0
        return "ipv4";
396
0
    case TYPE_IPV6:
397
0
        return "ipv6";
398
0
    case TYPE_VARCHAR: {
399
0
        buffer << "varchar("
400
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
401
0
               << ")";
402
0
        return buffer.str();
403
0
    }
404
0
    case TYPE_DATE:
405
0
        return "datev1";
406
0
    case TYPE_DATEV2:
407
0
        return "datev2";
408
0
    case TYPE_DATETIME:
409
0
        return "datetimev1";
410
0
    case TYPE_DATETIMEV2:
411
0
        [[fallthrough]];
412
0
    case TYPE_TIMEV2: {
413
0
        buffer << "datetimev2(" << data_type->get_scale() << ")";
414
0
        return buffer.str();
415
0
    }
416
0
    case TYPE_TIMESTAMPTZ: {
417
0
        buffer << "timestamptz(" << data_type->get_scale() << ")";
418
0
        return buffer.str();
419
0
    }
420
0
    case TYPE_BINARY:
421
0
        return "binary";
422
0
    case TYPE_CHAR: {
423
0
        buffer << "char("
424
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
425
0
               << ")";
426
0
        return buffer.str();
427
0
    }
428
0
    case TYPE_STRING:
429
0
        return "string";
430
0
    case TYPE_VARBINARY:
431
0
        buffer << "varbinary("
432
0
               << assert_cast<const DataTypeVarbinary*>(remove_nullable(data_type).get())->len()
433
0
               << ")";
434
0
        return buffer.str();
435
0
    case TYPE_DECIMALV2: {
436
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
437
0
        return buffer.str();
438
0
    }
439
0
    case TYPE_DECIMAL32: {
440
0
        buffer << "decimal32(" << data_type->get_precision() << "," << data_type->get_scale()
441
0
               << ")";
442
0
        return buffer.str();
443
0
    }
444
0
    case TYPE_DECIMAL64: {
445
0
        buffer << "decimal64(" << data_type->get_precision() << "," << data_type->get_scale()
446
0
               << ")";
447
0
        return buffer.str();
448
0
    }
449
0
    case TYPE_DECIMAL128I: {
450
0
        buffer << "decimal128(" << data_type->get_precision() << "," << data_type->get_scale()
451
0
               << ")";
452
0
        return buffer.str();
453
0
    }
454
0
    case TYPE_STRUCT: {
455
0
        const auto* type_struct =
456
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
457
0
        buffer << "struct<";
458
0
        for (int i = 0; i < type_struct->get_elements().size(); ++i) {
459
0
            if (i != 0) {
460
0
                buffer << ",";
461
0
            }
462
0
            buffer << type_struct->get_element_name(i) << ":"
463
0
                   << get_jni_type_with_different_string(type_struct->get_element(i));
464
0
        }
465
0
        buffer << ">";
466
0
        return buffer.str();
467
0
    }
468
0
    case TYPE_ARRAY: {
469
0
        const auto* type_arr = assert_cast<const DataTypeArray*>(remove_nullable(data_type).get());
470
0
        buffer << "array<" << get_jni_type_with_different_string(type_arr->get_nested_type())
471
0
               << ">";
472
0
        return buffer.str();
473
0
    }
474
0
    case TYPE_MAP: {
475
0
        const auto* type_map = assert_cast<const DataTypeMap*>(remove_nullable(data_type).get());
476
0
        buffer << "map<" << get_jni_type_with_different_string(type_map->get_key_type()) << ","
477
0
               << get_jni_type_with_different_string(type_map->get_value_type()) << ">";
478
0
        return buffer.str();
479
0
    }
480
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
481
0
    case TYPE_BITMAP:
482
0
        [[fallthrough]];
483
0
    case TYPE_HLL:
484
0
        [[fallthrough]];
485
0
    case TYPE_QUANTILE_STATE:
486
0
        [[fallthrough]];
487
0
    case TYPE_JSONB:
488
0
        return "string";
489
0
    default:
490
0
        return "unsupported";
491
0
    }
492
0
}
493
494
Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type,
495
0
                                        std::vector<long>& meta_data) {
496
0
    auto logical_type = data_type->get_primitive_type();
497
0
    const IColumn* column = nullptr;
498
    // insert const flag
499
0
    if (is_column_const(*doris_column)) {
500
0
        meta_data.emplace_back((long)1);
501
0
        const auto& const_column = assert_cast<const ColumnConst&>(*doris_column);
502
0
        column = &(const_column.get_data_column());
503
0
    } else {
504
0
        meta_data.emplace_back((long)0);
505
0
        column = &(*doris_column);
506
0
    }
507
508
    // insert null map address
509
0
    const IColumn* data_column = nullptr;
510
0
    if (column->is_nullable()) {
511
0
        const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
512
0
        data_column = &(nullable_column.get_nested_column());
513
0
        const auto& null_map = nullable_column.get_null_map_data();
514
0
        meta_data.emplace_back((long)null_map.data());
515
0
    } else {
516
0
        meta_data.emplace_back(0);
517
0
        data_column = column;
518
0
    }
519
0
    switch (logical_type) {
520
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                                          \
521
0
    case TYPE_INDEX: {                                                                       \
522
0
        meta_data.emplace_back(_get_fixed_length_column_address<COLUMN_TYPE>(*data_column)); \
523
0
        break;                                                                               \
524
0
    }
525
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
526
0
#undef DISPATCH
527
0
    case PrimitiveType::TYPE_STRING:
528
0
        [[fallthrough]];
529
0
    case PrimitiveType::TYPE_CHAR:
530
0
        [[fallthrough]];
531
0
    case PrimitiveType::TYPE_VARCHAR: {
532
0
        const auto& string_column = assert_cast<const ColumnString&>(*data_column);
533
        // insert offsets
534
0
        meta_data.emplace_back((long)string_column.get_offsets().data());
535
0
        meta_data.emplace_back((long)string_column.get_chars().data());
536
0
        break;
537
0
    }
538
0
    case PrimitiveType::TYPE_ARRAY: {
539
0
        const auto& element_column = assert_cast<const ColumnArray&>(*data_column).get_data_ptr();
540
0
        meta_data.emplace_back(
541
0
                (long)assert_cast<const ColumnArray&>(*data_column).get_offsets().data());
542
0
        const auto& element_type =
543
0
                (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
544
0
                        ->get_nested_type();
545
0
        RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data));
546
0
        break;
547
0
    }
548
0
    case PrimitiveType::TYPE_STRUCT: {
549
0
        const auto& doris_struct = assert_cast<const ColumnStruct&>(*data_column);
550
0
        const auto* doris_struct_type =
551
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
552
0
        for (int i = 0; i < doris_struct.tuple_size(); ++i) {
553
0
            const auto& struct_field = doris_struct.get_column_ptr(i);
554
0
            const auto& field_type = doris_struct_type->get_element(i);
555
0
            RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data));
556
0
        }
557
0
        break;
558
0
    }
559
0
    case PrimitiveType::TYPE_MAP: {
560
0
        const auto& map = assert_cast<const ColumnMap&>(*data_column);
561
0
        const auto& key_type =
562
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
563
0
        const auto& value_type =
564
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_value_type();
565
0
        const auto& key_column = map.get_keys_ptr();
566
0
        const auto& value_column = map.get_values_ptr();
567
0
        meta_data.emplace_back((long)map.get_offsets().data());
568
0
        RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data));
569
0
        RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data));
570
0
        break;
571
0
    }
572
0
    case PrimitiveType::TYPE_VARBINARY: {
573
0
        const auto& varbinary_col = assert_cast<const ColumnVarbinary&>(*data_column);
574
0
        meta_data.emplace_back((long)varbinary_col.get_data().data());
575
0
        break;
576
0
    }
577
0
    default:
578
0
        return Status::InternalError("Unsupported type: {}", data_type->get_name());
579
0
    }
580
0
    return Status::OK();
581
0
}
582
583
0
Status JniDataBridge::to_java_table(Block* block, std::unique_ptr<long[]>& meta) {
584
0
    ColumnNumbers arguments;
585
0
    for (size_t i = 0; i < block->columns(); ++i) {
586
0
        arguments.emplace_back(i);
587
0
    }
588
0
    return to_java_table(block, block->rows(), arguments, meta);
589
0
}
590
591
Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments,
592
0
                                    std::unique_ptr<long[]>& meta) {
593
0
    std::vector<long> meta_data;
594
    // insert number of rows
595
0
    meta_data.emplace_back(num_rows);
596
0
    for (size_t i : arguments) {
597
0
        auto& column_with_type_and_name = block->get_by_position(i);
598
0
        RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column,
599
0
                                          column_with_type_and_name.type, meta_data));
600
0
    }
601
602
0
    meta.reset(new long[meta_data.size()]);
603
0
    memcpy(meta.get(), &meta_data[0], meta_data.size() * 8);
604
0
    return Status::OK();
605
0
}
606
607
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(
608
0
        Block* block, const ColumnNumbers& arguments, bool ignore_column_name) {
609
    // prepare table schema
610
0
    std::ostringstream required_fields;
611
0
    std::ostringstream columns_types;
612
0
    for (int i = 0; i < arguments.size(); ++i) {
613
0
        std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type);
614
0
        if (i == 0) {
615
0
            if (ignore_column_name) {
616
0
                required_fields << "_col_" << arguments[i];
617
0
            } else {
618
0
                required_fields << block->get_by_position(arguments[i]).name;
619
0
            }
620
0
            columns_types << type;
621
0
        } else {
622
0
            if (ignore_column_name) {
623
0
                required_fields << ","
624
0
                                << "_col_" << arguments[i];
625
0
            } else {
626
0
                required_fields << "," << block->get_by_position(arguments[i]).name;
627
0
            }
628
0
            columns_types << "#" << type;
629
0
        }
630
0
    }
631
0
    return std::make_pair(required_fields.str(), columns_types.str());
632
0
}
633
634
0
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(Block* block) {
635
0
    ColumnNumbers arguments;
636
0
    for (size_t i = 0; i < block->columns(); ++i) {
637
0
        arguments.emplace_back(i);
638
0
    }
639
0
    return parse_table_schema(block, arguments, true);
640
0
}
641
642
} // namespace doris