Coverage Report

Created: 2026-05-13 01:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/table_function/vexplode_v2.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/table_function/vexplode_v2.h"
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <cstdint>
24
#include <ostream>
25
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_with_type_and_name.h"
30
#include "core/column/column.h"
31
#include "core/column/column_array.h"
32
#include "core/column/column_nothing.h"
33
#include "core/column/column_struct.h"
34
#include "core/column/column_variant.h"
35
#include "core/data_type/data_type.h"
36
#include "core/data_type/data_type_array.h"
37
#include "core/data_type/data_type_nothing.h"
38
#include "core/data_type/primitive_type.h"
39
#include "exprs/function/function_helpers.h"
40
#include "exprs/vexpr.h"
41
#include "exprs/vexpr_context.h"
42
43
namespace doris {
44
45
13
VExplodeV2TableFunction::VExplodeV2TableFunction() {
46
13
    _fn_name = "vexplode";
47
13
}
48
49
Status VExplodeV2TableFunction::_process_init_variant(Block* block, int value_column_idx,
50
0
                                                      int children_column_idx) {
51
    // explode variant array
52
0
    auto column_without_nullable = remove_nullable(block->get_by_position(value_column_idx).column);
53
0
    auto column = column_without_nullable->convert_to_full_column_if_const();
54
0
    auto& variant_column = assert_cast<ColumnVariant&>(*(column->assume_mutable()));
55
0
    variant_column.finalize();
56
0
    _multi_detail[children_column_idx].output_as_variant = true;
57
0
    _multi_detail[children_column_idx].variant_enable_doc_mode = variant_column.enable_doc_mode();
58
0
    if (!variant_column.is_null_root()) {
59
0
        _array_columns[children_column_idx] = variant_column.get_root();
60
        // We need to wrap the output nested column within a variant column.
61
        // Otherwise the type is missmatched
62
0
        const auto* array_type = check_and_get_data_type<DataTypeArray>(
63
0
                remove_nullable(variant_column.get_root_type()).get());
64
0
        if (array_type == nullptr) {
65
0
            return Status::NotSupported("explode not support none array type {}",
66
0
                                        variant_column.get_root_type()->get_name());
67
0
        }
68
0
        _multi_detail[children_column_idx].nested_type = array_type->get_nested_type();
69
0
    } else {
70
        // null root, use nothing type
71
0
        _array_columns[children_column_idx] = ColumnNullable::create(
72
0
                ColumnArray::create(ColumnNothing::create(0)), ColumnUInt8::create(0));
73
0
        _array_columns[children_column_idx]->assume_mutable()->insert_many_defaults(
74
0
                variant_column.size());
75
0
        _multi_detail[children_column_idx].nested_type = std::make_shared<DataTypeNothing>();
76
0
    }
77
0
    return Status::OK();
78
0
}
79
80
16
Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState* state) {
81
16
    auto expr_size = _expr_context->root()->children().size();
82
16
    CHECK(expr_size >= 1) << "VExplodeV2TableFunction support one or more child but has "
83
0
                          << expr_size;
84
85
16
    int value_column_idx = -1;
86
16
    _multi_detail.resize(expr_size);
87
16
    _array_offsets.resize(expr_size);
88
16
    _array_columns.resize(expr_size);
89
90
38
    for (int i = 0; i < expr_size; i++) {
91
22
        RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(), block,
92
22
                                                                      &value_column_idx));
93
22
        if (block->get_by_position(value_column_idx).type->get_primitive_type() == TYPE_VARIANT) {
94
0
            RETURN_IF_ERROR(_process_init_variant(block, value_column_idx, i));
95
22
        } else {
96
22
            _array_columns[i] = block->get_by_position(value_column_idx)
97
22
                                        .column->convert_to_full_column_if_const();
98
22
        }
99
22
        if (!extract_column_array_info(*_array_columns[i], _multi_detail[i])) {
100
0
            return Status::NotSupported(
101
0
                    "column type {} not supported now",
102
0
                    block->get_by_position(value_column_idx).column->get_name());
103
0
        }
104
22
    }
105
106
16
    return Status::OK();
107
16
}
108
109
9
bool VExplodeV2TableFunction::support_block_fast_path() const {
110
9
    return _multi_detail.size() == 1;
111
9
}
112
113
Status VExplodeV2TableFunction::prepare_block_fast_path(Block* /*block*/, RuntimeState* /*state*/,
114
4
                                                        BlockFastPathContext* ctx) {
115
4
    DCHECK(support_block_fast_path());
116
4
    const auto& detail = _multi_detail[0];
117
4
    if (detail.offsets_ptr == nullptr || detail.nested_col.get() == nullptr) {
118
0
        return Status::InternalError("vexplode block fast path not initialized");
119
0
    }
120
4
    ctx->array_nullmap_data = detail.array_nullmap_data;
121
4
    ctx->offsets_ptr = detail.offsets_ptr;
122
4
    ctx->nested_col = detail.nested_col;
123
4
    ctx->nested_nullmap_data = detail.nested_nullmap_data;
124
4
    ctx->generate_row_index = _generate_row_index;
125
4
    return Status::OK();
126
4
}
127
128
27
void VExplodeV2TableFunction::process_row(size_t row_idx) {
129
27
    TableFunction::process_row(row_idx);
130
131
60
    for (int i = 0; i < _multi_detail.size(); i++) {
132
33
        auto& detail = _multi_detail[i];
133
33
        if (!detail.array_nullmap_data || !detail.array_nullmap_data[row_idx]) {
134
26
            _array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1];
135
            // find max size in array
136
26
            auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i];
137
26
            _cur_size = std::max<unsigned long>(_cur_size, cur_size);
138
26
        }
139
33
    }
140
27
    _row_idx = row_idx;
141
27
}
142
143
5
void VExplodeV2TableFunction::process_close() {
144
5
    _multi_detail.clear();
145
5
    _array_offsets.clear();
146
5
    _array_columns.clear();
147
5
    _row_idx = 0;
148
5
}
149
150
31
void VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
151
31
    if (current_empty()) {
152
6
        column->insert_many_defaults(length);
153
6
        return;
154
6
    }
155
25
    ColumnStruct* struct_column = nullptr;
156
25
    std::vector<IColumn*> columns;
157
158
25
    const bool multi_sub_columns = _multi_detail.size() > 1 || _generate_row_index;
159
160
25
    if (multi_sub_columns) {
161
9
        if (_is_nullable) {
162
9
            auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
163
9
            struct_column =
164
9
                    assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
165
9
            auto* nullmap_column =
166
9
                    assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
167
9
            nullmap_column->insert_many_defaults(length);
168
169
9
        } else {
170
0
            struct_column = assert_cast<ColumnStruct*>(column.get());
171
0
        }
172
173
27
        for (size_t i = 0; i != _multi_detail.size(); ++i) {
174
18
            columns.emplace_back(&struct_column->get_column(i + (_generate_row_index ? 1 : 0)));
175
18
        }
176
16
    } else {
177
16
        columns.push_back(column.get());
178
16
    }
179
180
25
    if (_generate_row_index) {
181
0
        auto& pos_column = assert_cast<ColumnInt32&>(struct_column->get_column(0));
182
0
        pos_column.insert_many_vals(static_cast<int32_t>(_cur_offset), length);
183
0
    }
184
185
59
    for (int i = 0; i < _multi_detail.size(); i++) {
186
34
        auto& detail = _multi_detail[i];
187
34
        size_t pos = _array_offsets[i] + _cur_offset;
188
34
        size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
189
34
        auto& struct_field = *columns.at(i);
190
34
        if ((detail.array_nullmap_data && detail.array_nullmap_data[_row_idx])) {
191
3
            struct_field.insert_many_defaults(length);
192
31
        } else {
193
31
            auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
194
31
            auto* nullmap_column =
195
31
                    assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
196
            // only need to check if the value at position pos is null
197
31
            if (element_size < _cur_offset ||
198
31
                (detail.nested_nullmap_data && detail.nested_nullmap_data[pos])) {
199
3
                nullable_column->insert_many_defaults(length);
200
28
            } else {
201
28
                nullable_column->get_nested_column_ptr()->insert_many_from(*detail.nested_col, pos,
202
28
                                                                           length);
203
28
                nullmap_column->insert_many_defaults(length);
204
28
            }
205
31
        }
206
34
    }
207
25
}
208
209
5
int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step) {
210
5
    max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
211
5
    const bool multi_sub_columns = _multi_detail.size() > 1 || _generate_row_index;
212
213
5
    ColumnStruct* struct_column = nullptr;
214
5
    std::vector<IColumn*> columns;
215
216
5
    if (current_empty()) {
217
0
        column->insert_default();
218
0
        max_step = 1;
219
5
    } else {
220
5
        if (multi_sub_columns) {
221
3
            if (_is_nullable) {
222
3
                auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
223
3
                struct_column =
224
3
                        assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
225
3
                auto* nullmap_column =
226
3
                        assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
227
3
                nullmap_column->insert_many_defaults(max_step);
228
229
3
            } else {
230
0
                struct_column = assert_cast<ColumnStruct*>(column.get());
231
0
            }
232
233
9
            for (size_t i = 0; i != _multi_detail.size(); ++i) {
234
6
                columns.emplace_back(&struct_column->get_column(i + (_generate_row_index ? 1 : 0)));
235
6
            }
236
3
        } else {
237
2
            columns.emplace_back(column.get());
238
2
        }
239
240
5
        if (_generate_row_index) {
241
0
            auto& pos_column = assert_cast<ColumnInt32&>(struct_column->get_column(0));
242
0
            pos_column.insert_range_of_integer(static_cast<int32_t>(_cur_offset),
243
0
                                               static_cast<int32_t>(_cur_offset + max_step));
244
0
        }
245
246
13
        for (int i = 0; i < _multi_detail.size(); i++) {
247
8
            auto& detail = _multi_detail[i];
248
8
            size_t pos = _array_offsets[i] + _cur_offset;
249
8
            size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
250
8
            auto& struct_field = *columns.at(i);
251
8
            if (detail.array_nullmap_data && detail.array_nullmap_data[_row_idx]) {
252
1
                struct_field.insert_many_defaults(max_step);
253
7
            } else {
254
7
                auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
255
7
                auto* nullmap_column =
256
7
                        assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
257
7
                if (element_size >= _cur_offset + max_step) {
258
7
                    nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col,
259
7
                                                                                pos, max_step);
260
7
                    if (detail.nested_nullmap_data) {
261
7
                        size_t old_size = nullmap_column->size();
262
7
                        nullmap_column->resize(old_size + max_step);
263
7
                        memcpy(nullmap_column->get_data().data() + old_size,
264
7
                               detail.nested_nullmap_data + pos, max_step * sizeof(UInt8));
265
7
                    } else {
266
0
                        nullmap_column->insert_many_defaults(max_step);
267
0
                    }
268
7
                } else if (element_size > _cur_offset) {
269
0
                    auto current_insert_num = element_size - _cur_offset;
270
0
                    nullable_column->get_nested_column_ptr()->insert_range_from(
271
0
                            *detail.nested_col, pos, current_insert_num);
272
0
                    if (detail.nested_nullmap_data) {
273
0
                        size_t old_size = nullmap_column->size();
274
0
                        nullmap_column->resize(old_size + current_insert_num);
275
0
                        memcpy(nullmap_column->get_data().data() + old_size,
276
0
                               detail.nested_nullmap_data + pos,
277
0
                               current_insert_num * sizeof(UInt8));
278
0
                    } else {
279
0
                        nullmap_column->insert_many_defaults(current_insert_num);
280
0
                    }
281
0
                    nullable_column->insert_many_defaults(max_step - current_insert_num);
282
0
                } else {
283
0
                    nullable_column->insert_many_defaults(max_step);
284
0
                }
285
7
            }
286
8
        }
287
5
    }
288
289
5
    forward(max_step);
290
5
    return max_step;
291
5
}
292
293
} // namespace doris