Coverage Report

Created: 2026-04-02 09:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/table_function/vexplode_v2.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/table_function/vexplode_v2.h"
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <cstdint>
24
#include <ostream>
25
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_with_type_and_name.h"
30
#include "core/column/column.h"
31
#include "core/column/column_array.h"
32
#include "core/column/column_nothing.h"
33
#include "core/column/column_variant.h"
34
#include "core/data_type/data_type.h"
35
#include "core/data_type/data_type_array.h"
36
#include "core/data_type/data_type_nothing.h"
37
#include "core/data_type/primitive_type.h"
38
#include "exprs/function/function_helpers.h"
39
#include "exprs/vexpr.h"
40
#include "exprs/vexpr_context.h"
41
42
namespace doris {
43
44
#include "common/compile_check_begin.h"
45
#include "core/column/column_struct.h"
46
47
1.97k
VExplodeV2TableFunction::VExplodeV2TableFunction() {
48
1.97k
    _fn_name = "vexplode";
49
1.97k
}
50
51
Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_column_idx,
52
23
                                                      int children_column_idx) {
53
    // explode variant array
54
23
    auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column);
55
23
    auto column = column_without_nullable->convert_to_full_column_if_const();
56
23
    auto& variant_column = assert_cast<ColumnVariant&>(*(column->assume_mutable()));
57
23
    variant_column.finalize();
58
23
    _multi_detail[children_column_idx].output_as_variant = true;
59
23
    _multi_detail[children_column_idx].variant_enable_doc_mode = variant_column.enable_doc_mode();
60
23
    if (!variant_column.is_null_root()) {
61
22
        _array_columns[children_column_idx] = variant_column.get_root();
62
        // We need to wrap the output nested column within a variant column.
63
        // Otherwise the type is missmatched
64
22
        const auto* array_type = check_and_get_data_type<DataTypeArray>(
65
22
                remove_nullable(variant_column.get_root_type()).get());
66
22
        if (array_type == nullptr) {
67
2
            return Status::NotSupported("explode not support none array type {}",
68
2
                                        variant_column.get_root_type()->get_name());
69
2
        }
70
20
        _multi_detail[children_column_idx].nested_type = array_type->get_nested_type();
71
20
    } else {
72
        // null root, use nothing type
73
1
        _array_columns[children_column_idx] = ColumnNullable::create(
74
1
                ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0));
75
1
        _array_columns[children_column_idx]->assume_mutable()->insert_many_defaults(
76
1
                variant_column.size());
77
1
        _multi_detail[children_column_idx].nested_type = std::make_shared<DataTypeNothing>();
78
1
    }
79
21
    return Status::OK();
80
23
}
81
82
941
Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState* state) {
83
941
    auto expr_size = _expr_context->root()->children().size();
84
941
    CHECK(expr_size >= 1) << "VExplodeV2TableFunction support one or more child but has "
85
0
                          << expr_size;
86
87
941
    int value_column_idx = -1;
88
941
    _multi_detail.resize(expr_size);
89
941
    _array_offsets.resize(expr_size);
90
941
    _array_columns.resize(expr_size);
91
92
2.00k
    for (int i = 0; i < expr_size; i++) {
93
1.06k
        RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(), block,
94
1.06k
                                                                      &value_column_idx));
95
1.06k
        if (block->get_by_position(value_column_idx).type->get_primitive_type() == TYPE_VARIANT) {
96
23
            RETURN_IF_ERROR(_process_init_variant(block, value_column_idx, i));
97
1.04k
        } else {
98
1.04k
            _array_columns[i] = block->get_by_position(value_column_idx)
99
1.04k
                                        .column->convert_to_full_column_if_const();
100
1.04k
        }
101
1.06k
        if (!extract_column_array_info(*_array_columns[i], _multi_detail[i])) {
102
0
            return Status::NotSupported(
103
0
                    "column type {} not supported now",
104
0
                    block->get_by_position(value_column_idx).column->get_name());
105
0
        }
106
1.06k
    }
107
108
939
    return Status::OK();
109
941
}
110
111
17.2k
bool VExplodeV2TableFunction::support_block_fast_path() const {
112
17.2k
    return !_is_outer && !_generate_row_index && _multi_detail.size() == 1;
113
}
114
34.8k
115
17.5k
Status VExplodeV2TableFunction::prepare_block_fast_path(Block* /*block*/, RuntimeState* /*state*/,
116
17.5k
                                                        BlockFastPathContext* ctx) {
117
12.3k
    DCHECK(support_block_fast_path());
118
    const auto& detail = _multi_detail[0];
119
12.3k
    if (detail.offsets_ptr == nullptr || detail.nested_col.get() == nullptr) {
120
12.3k
        return Status::InternalError("vexplode block fast path not initialized");
121
12.3k
    }
122
17.5k
    ctx->array_nullmap_data = detail.array_nullmap_data;
123
17.2k
    ctx->offsets_ptr = detail.offsets_ptr;
124
17.2k
    ctx->nested_col = detail.nested_col;
125
    ctx->nested_nullmap_data = detail.nested_nullmap_data;
126
927
    return Status::OK();
127
927
}
128
927
129
927
void VExplodeV2TableFunction::process_row(size_t row_idx) {
130
927
    TableFunction::process_row(row_idx);
131
927
132
    for (int i = 0; i < _multi_detail.size(); i++) {
133
31
        auto& detail = _multi_detail[i];
134
31
        if (!detail.array_nullmap_data || !detail.array_nullmap_data[row_idx]) {
135
6
            _array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1];
136
6
            // find max size in array
137
6
            auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i];
138
25
            _cur_size = std::max<unsigned long>(_cur_size, cur_size);
139
25
        }
140
    }
141
25
    _row_idx = row_idx;
142
}
143
25
144
9
void VExplodeV2TableFunction::process_close() {
145
9
    _multi_detail.clear();
146
9
    _array_offsets.clear();
147
9
    _array_columns.clear();
148
9
    _row_idx = 0;
149
9
}
150
9
151
void VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
152
9
    if (current_empty()) {
153
0
        column->insert_many_defaults(length);
154
0
        return;
155
    }
156
27
    ColumnStruct* struct_column = nullptr;
157
18
    std::vector<IColumn*> columns;
158
18
159
16
    const bool multi_sub_columns = _multi_detail.size() > 1 || _generate_row_index;
160
16
161
16
    if (multi_sub_columns) {
162
        if (_is_nullable) {
163
25
            auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
164
0
            struct_column =
165
0
                    assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
166
0
            auto* nullmap_column =
167
                    assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
168
59
            nullmap_column->insert_many_defaults(length);
169
34
170
34
        } else {
171
34
            struct_column = assert_cast<ColumnStruct*>(column.get());
172
34
        }
173
34
174
3
        for (size_t i = 0; i != _multi_detail.size(); ++i) {
175
31
            columns.emplace_back(&struct_column->get_column(i + (_generate_row_index ? 1 : 0)));
176
31
        }
177
31
    } else {
178
31
        columns.push_back(column.get());
179
    }
180
31
181
31
    if (_generate_row_index) {
182
3
        auto& pos_column = assert_cast<ColumnInt32&>(struct_column->get_column(0));
183
28
        pos_column.insert_many_vals(static_cast<int32_t>(_cur_offset), length);
184
28
    }
185
28
186
28
    for (int i = 0; i < _multi_detail.size(); i++) {
187
28
        auto& detail = _multi_detail[i];
188
31
        size_t pos = _array_offsets[i] + _cur_offset;
189
34
        size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
190
25
        auto& struct_field = *columns.at(i);
191
        if ((detail.array_nullmap_data && detail.array_nullmap_data[_row_idx])) {
192
1.98k
            struct_field.insert_many_defaults(length);
193
1.98k
        } else {
194
1.98k
            auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
195
            auto* nullmap_column =
196
1.98k
                    assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
197
1.98k
            // only need to check if the value at position pos is null
198
            if (element_size < _cur_offset ||
199
1.98k
                (detail.nested_nullmap_data && detail.nested_nullmap_data[pos])) {
200
101
                nullable_column->insert_many_defaults(length);
201
101
            } else {
202
1.88k
                nullable_column->get_nested_column_ptr()->insert_many_from(*detail.nested_col, pos,
203
1.88k
                                                                           length);
204
506
                nullmap_column->insert_many_defaults(length);
205
261
            }
206
261
        }
207
261
    }
208
261
}
209
261
210
261
int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step) {
211
    max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
212
261
    const bool multi_sub_columns = _multi_detail.size() > 1 || _generate_row_index;
213
245
214
245
    ColumnStruct* struct_column = nullptr;
215
    std::vector<IColumn*> columns;
216
1.35k
217
848
    if (current_empty()) {
218
848
        column->insert_default();
219
1.38k
        max_step = 1;
220
1.38k
    } else {
221
1.38k
        if (multi_sub_columns) {
222
            if (_is_nullable) {
223
1.88k
                auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
224
396
                struct_column =
225
396
                        assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
226
396
                auto* nullmap_column =
227
396
                        assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
228
                nullmap_column->insert_many_defaults(max_step);
229
4.11k
230
2.22k
            } else {
231
2.22k
                struct_column = assert_cast<ColumnStruct*>(column.get());
232
2.22k
            }
233
2.22k
234
2.22k
            for (size_t i = 0; i != _multi_detail.size(); ++i) {
235
42
                columns.emplace_back(&struct_column->get_column(i + (_generate_row_index ? 1 : 0)));
236
2.18k
            }
237
2.18k
        } else {
238
2.18k
            columns.emplace_back(column.get());
239
2.18k
        }
240
2.18k
241
2.01k
        if (_generate_row_index) {
242
2.01k
            auto& pos_column = assert_cast<ColumnInt32&>(struct_column->get_column(0));
243
2.01k
            pos_column.insert_range_of_integer(static_cast<int32_t>(_cur_offset),
244
2.01k
                                               static_cast<int32_t>(_cur_offset + max_step));
245
2.01k
        }
246
2.01k
247
2.01k
        for (int i = 0; i < _multi_detail.size(); i++) {
248
2.01k
            auto& detail = _multi_detail[i];
249
0
            size_t pos = _array_offsets[i] + _cur_offset;
250
0
            size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
251
2.01k
            auto& struct_field = *columns.at(i);
252
127
            if (detail.array_nullmap_data && detail.array_nullmap_data[_row_idx]) {
253
127
                struct_field.insert_many_defaults(max_step);
254
127
            } else {
255
127
                auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
256
127
                auto* nullmap_column =
257
127
                        assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
258
127
                if (element_size >= _cur_offset + max_step) {
259
127
                    nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col,
260
127
                                                                                pos, max_step);
261
127
                    if (detail.nested_nullmap_data) {
262
0
                        size_t old_size = nullmap_column->size();
263
0
                        nullmap_column->resize(old_size + max_step);
264
127
                        memcpy(nullmap_column->get_data().data() + old_size,
265
127
                               detail.nested_nullmap_data + pos, max_step * sizeof(UInt8));
266
48
                    } else {
267
48
                        nullmap_column->insert_many_defaults(max_step);
268
2.18k
                    }
269
2.22k
                } else if (element_size > _cur_offset) {
270
1.88k
                    auto current_insert_num = element_size - _cur_offset;
271
                    nullable_column->get_nested_column_ptr()->insert_range_from(
272
1.98k
                            *detail.nested_col, pos, current_insert_num);
273
1.98k
                    if (detail.nested_nullmap_data) {
274
1.98k
                        size_t old_size = nullmap_column->size();
275
                        nullmap_column->resize(old_size + current_insert_num);
276
                        memcpy(nullmap_column->get_data().data() + old_size,
277
                               detail.nested_nullmap_data + pos,
278
                               current_insert_num * sizeof(UInt8));
279
                    } else {
280
                        nullmap_column->insert_many_defaults(current_insert_num);
281
                    }
282
                    nullable_column->insert_many_defaults(max_step - current_insert_num);
283
                } else {
284
                    nullable_column->insert_many_defaults(max_step);
285
                }
286
            }
287
        }
288
    }
289
290
    forward(max_step);
291
    return max_step;
292
}
293
294
#include "common/compile_check_end.h"
295
296
} // namespace doris