Coverage Report

Created: 2026-06-12 21:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/jni/jni_data_bridge.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "jni_data_bridge.h"
19
20
#include <glog/logging.h>
21
22
#include <sstream>
23
#include <variant>
24
25
#include "core/block/block.h"
26
#include "core/column/column_array.h"
27
#include "core/column/column_map.h"
28
#include "core/column/column_nullable.h"
29
#include "core/column/column_string.h"
30
#include "core/column/column_struct.h"
31
#include "core/column/column_varbinary.h"
32
#include "core/data_type/data_type_array.h"
33
#include "core/data_type/data_type_map.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_struct.h"
36
#include "core/data_type/data_type_varbinary.h"
37
#include "core/data_type/define_primitive_type.h"
38
#include "core/data_type/primitive_type.h"
39
#include "core/types.h"
40
#include "core/value/decimalv2_value.h"
41
42
namespace doris {
43
44
#define FOR_FIXED_LENGTH_TYPES(M)                                  \
45
0
    M(PrimitiveType::TYPE_TINYINT, ColumnInt8, Int8)               \
46
0
    M(PrimitiveType::TYPE_BOOLEAN, ColumnUInt8, UInt8)             \
47
0
    M(PrimitiveType::TYPE_SMALLINT, ColumnInt16, Int16)            \
48
0
    M(PrimitiveType::TYPE_INT, ColumnInt32, Int32)                 \
49
0
    M(PrimitiveType::TYPE_BIGINT, ColumnInt64, Int64)              \
50
0
    M(PrimitiveType::TYPE_LARGEINT, ColumnInt128, Int128)          \
51
0
    M(PrimitiveType::TYPE_FLOAT, ColumnFloat32, Float32)           \
52
0
    M(PrimitiveType::TYPE_DOUBLE, ColumnFloat64, Float64)          \
53
0
    M(PrimitiveType::TYPE_DECIMALV2, ColumnDecimal128V2, Int128)   \
54
0
    M(PrimitiveType::TYPE_DECIMAL128I, ColumnDecimal128V3, Int128) \
55
0
    M(PrimitiveType::TYPE_DECIMAL32, ColumnDecimal32, Int32)       \
56
0
    M(PrimitiveType::TYPE_DECIMAL64, ColumnDecimal64, Int64)       \
57
0
    M(PrimitiveType::TYPE_DATE, ColumnDate, Int64)                 \
58
0
    M(PrimitiveType::TYPE_DATEV2, ColumnDateV2, UInt32)            \
59
0
    M(PrimitiveType::TYPE_DATETIME, ColumnDateTime, Int64)         \
60
0
    M(PrimitiveType::TYPE_DATETIMEV2, ColumnDateTimeV2, UInt64)    \
61
0
    M(PrimitiveType::TYPE_TIMESTAMPTZ, ColumnTimeStampTz, UInt64)  \
62
0
    M(PrimitiveType::TYPE_IPV4, ColumnIPv4, IPv4)                  \
63
0
    M(PrimitiveType::TYPE_IPV6, ColumnIPv6, IPv6)
64
65
0
Status JniDataBridge::fill_block(Block* block, const ColumnNumbers& arguments, long table_address) {
66
0
    if (table_address == 0) {
67
0
        return Status::InternalError("table_address is 0");
68
0
    }
69
0
    TableMetaAddress table_meta(table_address);
70
0
    long num_rows = table_meta.next_meta_as_long();
71
0
    for (size_t i : arguments) {
72
0
        if (block->get_by_position(i).column.get() == nullptr) {
73
0
            auto return_type = block->get_data_type(i);
74
0
            bool result_nullable = return_type->is_nullable();
75
0
            ColumnUInt8::MutablePtr null_col = nullptr;
76
0
            if (result_nullable) {
77
0
                return_type = remove_nullable(return_type);
78
0
                null_col = ColumnUInt8::create();
79
0
            }
80
0
            auto res_col = return_type->create_column();
81
0
            if (result_nullable) {
82
0
                block->replace_by_position(
83
0
                        i, ColumnNullable::create(std::move(res_col), std::move(null_col)));
84
0
            } else {
85
0
                block->replace_by_position(i, std::move(res_col));
86
0
            }
87
0
        } else if (is_column_const(*(block->get_by_position(i).column))) {
88
0
            auto doris_column = block->get_by_position(i).column->convert_to_full_column_if_const();
89
0
            bool is_nullable = block->get_by_position(i).type->is_nullable();
90
0
            block->replace_by_position(i, is_nullable ? make_nullable(doris_column) : doris_column);
91
0
        }
92
0
        auto& column_with_type_and_name = block->get_by_position(i);
93
0
        auto& column_ptr = column_with_type_and_name.column;
94
0
        auto& column_type = column_with_type_and_name.type;
95
0
        RETURN_IF_ERROR(fill_column(table_meta, column_ptr, column_type, num_rows));
96
0
    }
97
0
    return Status::OK();
98
0
}
99
100
Status JniDataBridge::fill_column(TableMetaAddress& address, ColumnPtr& doris_column,
101
0
                                  const DataTypePtr& data_type, size_t num_rows) {
102
0
    auto logical_type = data_type->get_primitive_type();
103
0
    void* null_map_ptr = address.next_meta_as_ptr();
104
0
    if (null_map_ptr == nullptr) {
105
        // org.apache.doris.common.jni.vec.ColumnType.Type#UNSUPPORTED will set column address as 0
106
0
        return Status::InternalError("Unsupported type {} in java side", data_type->get_name());
107
0
    }
108
0
    auto mutable_doris_column = IColumn::mutate(std::move(doris_column));
109
0
    MutableColumnPtr data_column;
110
0
    if (auto* nullable_column = check_and_get_column<ColumnNullable>(mutable_doris_column.get())) {
111
0
        data_column = nullable_column->get_nested_column_ptr();
112
0
        NullMap& null_map = nullable_column->get_null_map_data();
113
0
        size_t origin_size = null_map.size();
114
0
        null_map.resize(origin_size + num_rows);
115
0
        memcpy(null_map.data() + origin_size, static_cast<bool*>(null_map_ptr), num_rows);
116
0
    } else {
117
0
        data_column = mutable_doris_column->get_ptr();
118
0
    }
119
    // Date and DateTime are deprecated and not supported.
120
0
    Status status = Status::OK();
121
0
    switch (logical_type) {
122
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                                             \
123
0
    case TYPE_INDEX: {                                                                          \
124
0
        auto* data = reinterpret_cast<CPP_TYPE*>(address.next_meta_as_ptr());                   \
125
0
        status = _fill_fixed_length_column<COLUMN_TYPE, CPP_TYPE>(data_column, data, num_rows); \
126
0
        break;                                                                                  \
127
0
    }
128
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
129
0
#undef DISPATCH
130
0
    case PrimitiveType::TYPE_STRING:
131
0
        [[fallthrough]];
132
0
    case PrimitiveType::TYPE_CHAR:
133
0
        [[fallthrough]];
134
0
    case PrimitiveType::TYPE_VARCHAR:
135
0
        status = _fill_string_column(address, data_column, num_rows);
136
0
        break;
137
0
    case PrimitiveType::TYPE_ARRAY:
138
0
        status = _fill_array_column(address, data_column, data_type, num_rows);
139
0
        break;
140
0
    case PrimitiveType::TYPE_MAP:
141
0
        status = _fill_map_column(address, data_column, data_type, num_rows);
142
0
        break;
143
0
    case PrimitiveType::TYPE_STRUCT:
144
0
        status = _fill_struct_column(address, data_column, data_type, num_rows);
145
0
        break;
146
0
    case PrimitiveType::TYPE_VARBINARY:
147
0
        status = _fill_varbinary_column(address, data_column, num_rows);
148
0
        break;
149
0
    default:
150
0
        status = Status::InvalidArgument("Unsupported type {} in jni scanner",
151
0
                                         data_type->get_name());
152
0
        break;
153
0
    }
154
0
    doris_column = std::move(mutable_doris_column);
155
0
    return status;
156
0
}
157
158
Status JniDataBridge::_fill_varbinary_column(TableMetaAddress& address,
159
0
                                             MutableColumnPtr& doris_column, size_t num_rows) {
160
0
    auto* meta_base = reinterpret_cast<char*>(address.next_meta_as_ptr());
161
0
    auto& varbinary_col = assert_cast<ColumnVarbinary&>(*doris_column);
162
    // Java side writes per-row metadata as 16 bytes: [len: long][addr: long]
163
0
    for (size_t i = 0; i < num_rows; ++i) {
164
        // Read length (first 8 bytes)
165
0
        int64_t len = 0;
166
0
        memcpy(&len, meta_base + 16 * i, sizeof(len));
167
0
        if (len <= 0) {
168
0
            varbinary_col.insert_default();
169
0
        } else {
170
            // Read address (next 8 bytes)
171
0
            uint64_t addr_u = 0;
172
0
            memcpy(&addr_u, meta_base + 16 * i + 8, sizeof(addr_u));
173
0
            const char* src = reinterpret_cast<const char*>(addr_u);
174
0
            varbinary_col.insert_data(src, static_cast<size_t>(len));
175
0
        }
176
0
    }
177
0
    return Status::OK();
178
0
}
179
180
Status JniDataBridge::_fill_string_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
181
0
                                          size_t num_rows) {
182
0
    auto& string_col = static_cast<ColumnString&>(*doris_column);
183
0
    ColumnString::Chars& string_chars = string_col.get_chars();
184
0
    ColumnString::Offsets& string_offsets = string_col.get_offsets();
185
0
    int* offsets = reinterpret_cast<int*>(address.next_meta_as_ptr());
186
0
    char* chars = reinterpret_cast<char*>(address.next_meta_as_ptr());
187
188
    // This judgment is necessary, otherwise the following statement `offsets[num_rows - 1]` out of bounds
189
    // What's more, This judgment must be placed after `address.next_meta_as_ptr()`
190
    // because `address.next_meta_as_ptr` will make `address._meta_index` plus 1
191
0
    if (num_rows == 0) {
192
0
        return Status::OK();
193
0
    }
194
195
0
    size_t origin_chars_size = string_chars.size();
196
0
    string_chars.resize(origin_chars_size + offsets[num_rows - 1]);
197
0
    memcpy(string_chars.data() + origin_chars_size, chars, offsets[num_rows - 1]);
198
199
0
    size_t origin_offsets_size = string_offsets.size();
200
0
    size_t start_offset = string_offsets[origin_offsets_size - 1];
201
0
    string_offsets.resize(origin_offsets_size + num_rows);
202
0
    for (size_t i = 0; i < num_rows; ++i) {
203
0
        string_offsets[origin_offsets_size + i] =
204
0
                static_cast<unsigned int>(offsets[i] + start_offset);
205
0
    }
206
0
    return Status::OK();
207
0
}
208
209
Status JniDataBridge::_fill_array_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
210
0
                                         const DataTypePtr& data_type, size_t num_rows) {
211
0
    ColumnPtr& element_column = static_cast<ColumnArray&>(*doris_column).get_data_ptr();
212
0
    const DataTypePtr& element_type =
213
0
            (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
214
0
                    ->get_nested_type();
215
0
    ColumnArray::Offsets64& offsets_data = static_cast<ColumnArray&>(*doris_column).get_offsets();
216
217
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
218
0
    size_t origin_size = offsets_data.size();
219
0
    offsets_data.resize(origin_size + num_rows);
220
0
    size_t start_offset = offsets_data[origin_size - 1];
221
0
    for (size_t i = 0; i < num_rows; ++i) {
222
0
        offsets_data[origin_size + i] = offsets[i] + start_offset;
223
0
    }
224
225
0
    return fill_column(address, element_column, element_type,
226
0
                       offsets_data[origin_size + num_rows - 1] - start_offset);
227
0
}
228
229
Status JniDataBridge::_fill_map_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
230
0
                                       const DataTypePtr& data_type, size_t num_rows) {
231
0
    auto& map = static_cast<ColumnMap&>(*doris_column);
232
0
    const DataTypePtr& key_type =
233
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
234
0
    const DataTypePtr& value_type =
235
0
            reinterpret_cast<const DataTypeMap*>(remove_nullable(data_type).get())
236
0
                    ->get_value_type();
237
0
    ColumnPtr& key_column = map.get_keys_ptr();
238
0
    ColumnPtr& value_column = map.get_values_ptr();
239
0
    ColumnArray::Offsets64& map_offsets = map.get_offsets();
240
241
0
    int64_t* offsets = reinterpret_cast<int64_t*>(address.next_meta_as_ptr());
242
0
    size_t origin_size = map_offsets.size();
243
0
    map_offsets.resize(origin_size + num_rows);
244
0
    size_t start_offset = map_offsets[origin_size - 1];
245
0
    for (size_t i = 0; i < num_rows; ++i) {
246
0
        map_offsets[origin_size + i] = offsets[i] + start_offset;
247
0
    }
248
249
0
    RETURN_IF_ERROR(fill_column(address, key_column, key_type,
250
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
251
0
    RETURN_IF_ERROR(fill_column(address, value_column, value_type,
252
0
                                map_offsets[origin_size + num_rows - 1] - start_offset));
253
0
    return Status::OK();
254
0
}
255
256
Status JniDataBridge::_fill_struct_column(TableMetaAddress& address, MutableColumnPtr& doris_column,
257
0
                                          const DataTypePtr& data_type, size_t num_rows) {
258
0
    auto& doris_struct = static_cast<ColumnStruct&>(*doris_column);
259
0
    const DataTypeStruct* doris_struct_type =
260
0
            reinterpret_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
261
0
    for (int i = 0; i < doris_struct.tuple_size(); ++i) {
262
0
        ColumnPtr& struct_field = doris_struct.get_column_ptr(i);
263
0
        const DataTypePtr& field_type = doris_struct_type->get_element(i);
264
0
        RETURN_IF_ERROR(fill_column(address, struct_field, field_type, num_rows));
265
0
    }
266
0
    return Status::OK();
267
0
}
268
269
0
std::string JniDataBridge::get_jni_type(const DataTypePtr& data_type) {
270
0
    DataTypePtr type = remove_nullable(data_type);
271
0
    std::ostringstream buffer;
272
0
    switch (type->get_primitive_type()) {
273
0
    case TYPE_BOOLEAN:
274
0
        return "boolean";
275
0
    case TYPE_TINYINT:
276
0
        return "tinyint";
277
0
    case TYPE_SMALLINT:
278
0
        return "smallint";
279
0
    case TYPE_INT:
280
0
        return "int";
281
0
    case TYPE_BIGINT:
282
0
        return "bigint";
283
0
    case TYPE_LARGEINT:
284
0
        return "largeint";
285
0
    case TYPE_FLOAT:
286
0
        return "float";
287
0
    case TYPE_DOUBLE:
288
0
        return "double";
289
0
    case TYPE_IPV4:
290
0
        return "ipv4";
291
0
    case TYPE_IPV6:
292
0
        return "ipv6";
293
0
    case TYPE_VARCHAR:
294
0
        [[fallthrough]];
295
0
    case TYPE_CHAR:
296
0
        [[fallthrough]];
297
0
    case TYPE_STRING:
298
0
        return "string";
299
0
    case TYPE_DATE:
300
0
        return "datev1";
301
0
    case TYPE_DATEV2:
302
0
        return "datev2";
303
0
    case TYPE_DATETIME:
304
0
        return "datetimev1";
305
0
    case TYPE_DATETIMEV2:
306
0
        [[fallthrough]];
307
0
    case TYPE_TIMEV2: {
308
0
        buffer << "datetimev2(" << type->get_scale() << ")";
309
0
        return buffer.str();
310
0
    }
311
0
    case TYPE_TIMESTAMPTZ: {
312
0
        buffer << "timestamptz(" << type->get_scale() << ")";
313
0
        return buffer.str();
314
0
    }
315
0
    case TYPE_BINARY:
316
0
        return "binary";
317
0
    case TYPE_DECIMALV2: {
318
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
319
0
        return buffer.str();
320
0
    }
321
0
    case TYPE_DECIMAL32: {
322
0
        buffer << "decimal32(" << type->get_precision() << "," << type->get_scale() << ")";
323
0
        return buffer.str();
324
0
    }
325
0
    case TYPE_DECIMAL64: {
326
0
        buffer << "decimal64(" << type->get_precision() << "," << type->get_scale() << ")";
327
0
        return buffer.str();
328
0
    }
329
0
    case TYPE_DECIMAL128I: {
330
0
        buffer << "decimal128(" << type->get_precision() << "," << type->get_scale() << ")";
331
0
        return buffer.str();
332
0
    }
333
0
    case TYPE_STRUCT: {
334
0
        const DataTypeStruct* struct_type = reinterpret_cast<const DataTypeStruct*>(type.get());
335
0
        buffer << "struct<";
336
0
        for (int i = 0; i < struct_type->get_elements().size(); ++i) {
337
0
            if (i != 0) {
338
0
                buffer << ",";
339
0
            }
340
0
            buffer << struct_type->get_element_names()[i] << ":"
341
0
                   << get_jni_type(struct_type->get_element(i));
342
0
        }
343
0
        buffer << ">";
344
0
        return buffer.str();
345
0
    }
346
0
    case TYPE_ARRAY: {
347
0
        const DataTypeArray* array_type = reinterpret_cast<const DataTypeArray*>(type.get());
348
0
        buffer << "array<" << get_jni_type(array_type->get_nested_type()) << ">";
349
0
        return buffer.str();
350
0
    }
351
0
    case TYPE_MAP: {
352
0
        const DataTypeMap* map_type = reinterpret_cast<const DataTypeMap*>(type.get());
353
0
        buffer << "map<" << get_jni_type(map_type->get_key_type()) << ","
354
0
               << get_jni_type(map_type->get_value_type()) << ">";
355
0
        return buffer.str();
356
0
    }
357
0
    case TYPE_VARBINARY:
358
0
        return "varbinary";
359
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
360
0
    case TYPE_BITMAP:
361
0
        [[fallthrough]];
362
0
    case TYPE_HLL:
363
0
        [[fallthrough]];
364
0
    case TYPE_QUANTILE_STATE:
365
0
        [[fallthrough]];
366
0
    case TYPE_JSONB:
367
0
        return "string";
368
0
    default:
369
0
        return "unsupported";
370
0
    }
371
0
}
372
373
0
std::string JniDataBridge::get_jni_type_with_different_string(const DataTypePtr& data_type) {
374
0
    DataTypePtr type = remove_nullable(data_type);
375
0
    std::ostringstream buffer;
376
0
    switch (data_type->get_primitive_type()) {
377
0
    case TYPE_BOOLEAN:
378
0
        return "boolean";
379
0
    case TYPE_TINYINT:
380
0
        return "tinyint";
381
0
    case TYPE_SMALLINT:
382
0
        return "smallint";
383
0
    case TYPE_INT:
384
0
        return "int";
385
0
    case TYPE_BIGINT:
386
0
        return "bigint";
387
0
    case TYPE_LARGEINT:
388
0
        return "largeint";
389
0
    case TYPE_FLOAT:
390
0
        return "float";
391
0
    case TYPE_DOUBLE:
392
0
        return "double";
393
0
    case TYPE_IPV4:
394
0
        return "ipv4";
395
0
    case TYPE_IPV6:
396
0
        return "ipv6";
397
0
    case TYPE_VARCHAR: {
398
0
        buffer << "varchar("
399
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
400
0
               << ")";
401
0
        return buffer.str();
402
0
    }
403
0
    case TYPE_DATE:
404
0
        return "datev1";
405
0
    case TYPE_DATEV2:
406
0
        return "datev2";
407
0
    case TYPE_DATETIME:
408
0
        return "datetimev1";
409
0
    case TYPE_DATETIMEV2:
410
0
        [[fallthrough]];
411
0
    case TYPE_TIMEV2: {
412
0
        buffer << "datetimev2(" << data_type->get_scale() << ")";
413
0
        return buffer.str();
414
0
    }
415
0
    case TYPE_TIMESTAMPTZ: {
416
0
        buffer << "timestamptz(" << data_type->get_scale() << ")";
417
0
        return buffer.str();
418
0
    }
419
0
    case TYPE_BINARY:
420
0
        return "binary";
421
0
    case TYPE_CHAR: {
422
0
        buffer << "char("
423
0
               << assert_cast<const DataTypeString*>(remove_nullable(data_type).get())->len()
424
0
               << ")";
425
0
        return buffer.str();
426
0
    }
427
0
    case TYPE_STRING:
428
0
        return "string";
429
0
    case TYPE_VARBINARY:
430
0
        buffer << "varbinary("
431
0
               << assert_cast<const DataTypeVarbinary*>(remove_nullable(data_type).get())->len()
432
0
               << ")";
433
0
        return buffer.str();
434
0
    case TYPE_DECIMALV2: {
435
0
        buffer << "decimalv2(" << DecimalV2Value::PRECISION << "," << DecimalV2Value::SCALE << ")";
436
0
        return buffer.str();
437
0
    }
438
0
    case TYPE_DECIMAL32: {
439
0
        buffer << "decimal32(" << data_type->get_precision() << "," << data_type->get_scale()
440
0
               << ")";
441
0
        return buffer.str();
442
0
    }
443
0
    case TYPE_DECIMAL64: {
444
0
        buffer << "decimal64(" << data_type->get_precision() << "," << data_type->get_scale()
445
0
               << ")";
446
0
        return buffer.str();
447
0
    }
448
0
    case TYPE_DECIMAL128I: {
449
0
        buffer << "decimal128(" << data_type->get_precision() << "," << data_type->get_scale()
450
0
               << ")";
451
0
        return buffer.str();
452
0
    }
453
0
    case TYPE_STRUCT: {
454
0
        const auto* type_struct =
455
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
456
0
        buffer << "struct<";
457
0
        for (int i = 0; i < type_struct->get_elements().size(); ++i) {
458
0
            if (i != 0) {
459
0
                buffer << ",";
460
0
            }
461
0
            buffer << type_struct->get_element_name(i) << ":"
462
0
                   << get_jni_type_with_different_string(type_struct->get_element(i));
463
0
        }
464
0
        buffer << ">";
465
0
        return buffer.str();
466
0
    }
467
0
    case TYPE_ARRAY: {
468
0
        const auto* type_arr = assert_cast<const DataTypeArray*>(remove_nullable(data_type).get());
469
0
        buffer << "array<" << get_jni_type_with_different_string(type_arr->get_nested_type())
470
0
               << ">";
471
0
        return buffer.str();
472
0
    }
473
0
    case TYPE_MAP: {
474
0
        const auto* type_map = assert_cast<const DataTypeMap*>(remove_nullable(data_type).get());
475
0
        buffer << "map<" << get_jni_type_with_different_string(type_map->get_key_type()) << ","
476
0
               << get_jni_type_with_different_string(type_map->get_value_type()) << ">";
477
0
        return buffer.str();
478
0
    }
479
    // bitmap, hll, quantile_state, jsonb are transferred as strings via JNI
480
0
    case TYPE_BITMAP:
481
0
        [[fallthrough]];
482
0
    case TYPE_HLL:
483
0
        [[fallthrough]];
484
0
    case TYPE_QUANTILE_STATE:
485
0
        [[fallthrough]];
486
0
    case TYPE_JSONB:
487
0
        return "string";
488
0
    default:
489
0
        return "unsupported";
490
0
    }
491
0
}
492
493
Status JniDataBridge::_fill_column_meta(const ColumnPtr& doris_column, const DataTypePtr& data_type,
494
0
                                        std::vector<long>& meta_data) {
495
0
    auto logical_type = data_type->get_primitive_type();
496
0
    const IColumn* column = nullptr;
497
    // insert const flag
498
0
    if (is_column_const(*doris_column)) {
499
0
        meta_data.emplace_back((long)1);
500
0
        const auto& const_column = assert_cast<const ColumnConst&>(*doris_column);
501
0
        column = &(const_column.get_data_column());
502
0
    } else {
503
0
        meta_data.emplace_back((long)0);
504
0
        column = &(*doris_column);
505
0
    }
506
507
    // insert null map address
508
0
    const IColumn* data_column = nullptr;
509
0
    if (const auto* nullable_column = check_and_get_column<ColumnNullable>(column)) {
510
0
        data_column = &(nullable_column->get_nested_column());
511
0
        const auto& null_map = nullable_column->get_null_map_data();
512
0
        meta_data.emplace_back((long)null_map.data());
513
0
    } else {
514
0
        meta_data.emplace_back(0);
515
0
        data_column = column;
516
0
    }
517
0
    switch (logical_type) {
518
0
#define DISPATCH(TYPE_INDEX, COLUMN_TYPE, CPP_TYPE)                                          \
519
0
    case TYPE_INDEX: {                                                                       \
520
0
        meta_data.emplace_back(_get_fixed_length_column_address<COLUMN_TYPE>(*data_column)); \
521
0
        break;                                                                               \
522
0
    }
523
0
        FOR_FIXED_LENGTH_TYPES(DISPATCH)
524
0
#undef DISPATCH
525
0
    case PrimitiveType::TYPE_STRING:
526
0
        [[fallthrough]];
527
0
    case PrimitiveType::TYPE_CHAR:
528
0
        [[fallthrough]];
529
0
    case PrimitiveType::TYPE_VARCHAR: {
530
0
        const auto& string_column = assert_cast<const ColumnString&>(*data_column);
531
        // insert offsets
532
0
        meta_data.emplace_back((long)string_column.get_offsets().data());
533
0
        meta_data.emplace_back((long)string_column.get_chars().data());
534
0
        break;
535
0
    }
536
0
    case PrimitiveType::TYPE_ARRAY: {
537
0
        const auto& element_column = assert_cast<const ColumnArray&>(*data_column).get_data_ptr();
538
0
        meta_data.emplace_back(
539
0
                (long)assert_cast<const ColumnArray&>(*data_column).get_offsets().data());
540
0
        const auto& element_type =
541
0
                (assert_cast<const DataTypeArray*>(remove_nullable(data_type).get()))
542
0
                        ->get_nested_type();
543
0
        RETURN_IF_ERROR(_fill_column_meta(element_column, element_type, meta_data));
544
0
        break;
545
0
    }
546
0
    case PrimitiveType::TYPE_STRUCT: {
547
0
        const auto& doris_struct = assert_cast<const ColumnStruct&>(*data_column);
548
0
        const auto* doris_struct_type =
549
0
                assert_cast<const DataTypeStruct*>(remove_nullable(data_type).get());
550
0
        for (int i = 0; i < doris_struct.tuple_size(); ++i) {
551
0
            const auto& struct_field = doris_struct.get_column_ptr(i);
552
0
            const auto& field_type = doris_struct_type->get_element(i);
553
0
            RETURN_IF_ERROR(_fill_column_meta(struct_field, field_type, meta_data));
554
0
        }
555
0
        break;
556
0
    }
557
0
    case PrimitiveType::TYPE_MAP: {
558
0
        const auto& map = assert_cast<const ColumnMap&>(*data_column);
559
0
        const auto& key_type =
560
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_key_type();
561
0
        const auto& value_type =
562
0
                assert_cast<const DataTypeMap*>(remove_nullable(data_type).get())->get_value_type();
563
0
        const auto& key_column = map.get_keys_ptr();
564
0
        const auto& value_column = map.get_values_ptr();
565
0
        meta_data.emplace_back((long)map.get_offsets().data());
566
0
        RETURN_IF_ERROR(_fill_column_meta(key_column, key_type, meta_data));
567
0
        RETURN_IF_ERROR(_fill_column_meta(value_column, value_type, meta_data));
568
0
        break;
569
0
    }
570
0
    case PrimitiveType::TYPE_VARBINARY: {
571
0
        const auto& varbinary_col = assert_cast<const ColumnVarbinary&>(*data_column);
572
0
        meta_data.emplace_back((long)varbinary_col.get_data().data());
573
0
        break;
574
0
    }
575
0
    default:
576
0
        return Status::InternalError("Unsupported type: {}", data_type->get_name());
577
0
    }
578
0
    return Status::OK();
579
0
}
580
581
0
Status JniDataBridge::to_java_table(Block* block, std::unique_ptr<long[]>& meta) {
582
0
    ColumnNumbers arguments;
583
0
    for (size_t i = 0; i < block->columns(); ++i) {
584
0
        arguments.emplace_back(i);
585
0
    }
586
0
    return to_java_table(block, block->rows(), arguments, meta);
587
0
}
588
589
Status JniDataBridge::to_java_table(Block* block, size_t num_rows, const ColumnNumbers& arguments,
590
0
                                    std::unique_ptr<long[]>& meta) {
591
0
    std::vector<long> meta_data;
592
    // insert number of rows
593
0
    meta_data.emplace_back(num_rows);
594
0
    for (size_t i : arguments) {
595
0
        auto& column_with_type_and_name = block->get_by_position(i);
596
0
        RETURN_IF_ERROR(_fill_column_meta(column_with_type_and_name.column,
597
0
                                          column_with_type_and_name.type, meta_data));
598
0
    }
599
600
0
    meta.reset(new long[meta_data.size()]);
601
0
    memcpy(meta.get(), &meta_data[0], meta_data.size() * 8);
602
0
    return Status::OK();
603
0
}
604
605
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(
606
0
        Block* block, const ColumnNumbers& arguments, bool ignore_column_name) {
607
    // prepare table schema
608
0
    std::ostringstream required_fields;
609
0
    std::ostringstream columns_types;
610
0
    for (int i = 0; i < arguments.size(); ++i) {
611
0
        std::string type = JniDataBridge::get_jni_type(block->get_by_position(arguments[i]).type);
612
0
        if (i == 0) {
613
0
            if (ignore_column_name) {
614
0
                required_fields << "_col_" << arguments[i];
615
0
            } else {
616
0
                required_fields << block->get_by_position(arguments[i]).name;
617
0
            }
618
0
            columns_types << type;
619
0
        } else {
620
0
            if (ignore_column_name) {
621
0
                required_fields << ","
622
0
                                << "_col_" << arguments[i];
623
0
            } else {
624
0
                required_fields << "," << block->get_by_position(arguments[i]).name;
625
0
            }
626
0
            columns_types << "#" << type;
627
0
        }
628
0
    }
629
0
    return std::make_pair(required_fields.str(), columns_types.str());
630
0
}
631
632
0
std::pair<std::string, std::string> JniDataBridge::parse_table_schema(Block* block) {
633
0
    ColumnNumbers arguments;
634
0
    for (size_t i = 0; i < block->columns(); ++i) {
635
0
        arguments.emplace_back(i);
636
0
    }
637
0
    return parse_table_schema(block, arguments, true);
638
0
}
639
640
} // namespace doris