Coverage Report

Created: 2026-03-14 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/array/function_array_split.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
//
18
// This file is copied from
19
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arraySplit.cpp
20
// and modified by Doris
21
22
#include <cstddef>
23
#include <memory>
24
#include <utility>
25
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_numbers.h"
30
#include "core/block/column_with_type_and_name.h"
31
#include "core/column/column.h"
32
#include "core/column/column_array.h"
33
#include "core/column/column_const.h"
34
#include "core/column/column_nullable.h"
35
#include "core/data_type/data_type.h"
36
#include "core/data_type/data_type_array.h"
37
#include "core/data_type/data_type_nullable.h"
38
#include "core/types.h"
39
#include "exprs/aggregate/aggregate_function.h"
40
#include "exprs/function/function.h"
41
#include "exprs/function/simple_function_factory.h"
42
43
namespace doris {
44
class FunctionContext;
45
} // namespace doris
46
47
namespace doris {
48
#include "common/compile_check_begin.h"
49
template <bool reverse>
50
class FunctionArraySplit : public IFunction {
51
public:
52
    static constexpr auto name = reverse ? "array_reverse_split" : "array_split";
53
4
    static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); }
_ZN5doris18FunctionArraySplitILb1EE6createEv
Line
Count
Source
53
2
    static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); }
_ZN5doris18FunctionArraySplitILb0EE6createEv
Line
Count
Source
53
2
    static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); }
54
2
    String get_name() const override { return name; }
_ZNK5doris18FunctionArraySplitILb1EE8get_nameB5cxx11Ev
Line
Count
Source
54
1
    String get_name() const override { return name; }
_ZNK5doris18FunctionArraySplitILb0EE8get_nameB5cxx11Ev
Line
Count
Source
54
1
    String get_name() const override { return name; }
55
56
0
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE23get_number_of_argumentsEv
57
58
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
59
0
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
60
0
    };
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
61
62
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
63
0
                        uint32_t result, size_t input_rows_count) const override {
64
        // <Nullable>(Array(<Nullable>(Int)))
65
0
        auto src_column =
66
0
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
67
0
        auto spliter_column =
68
0
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
69
70
        // only change its split(i.e. offsets)
71
0
        const auto& src_data = assert_cast<const ColumnArray&>(*src_column).get_data_ptr();
72
0
        const auto& src_offsets = assert_cast<const ColumnArray&>(*src_column).get_offsets();
73
74
0
        auto split_col = assert_cast<const ColumnArray*>(spliter_column.get())->get_data_ptr();
75
0
        const auto& split_offsets = assert_cast<const ColumnArray&>(*spliter_column)
76
0
                                            .get_offsets(); // for check uneven array
77
78
0
        const NullMap* null_map = nullptr;
79
0
        if (split_col->is_nullable()) {
80
0
            if (split_col->has_null()) {
81
0
                null_map =
82
0
                        &assert_cast<const ColumnNullable*>(split_col.get())->get_null_map_data();
83
0
            }
84
0
            split_col =
85
0
                    assert_cast<const ColumnNullable*>(split_col.get())->get_nested_column_ptr();
86
0
        }
87
88
0
        const IColumn::Filter& cut = assert_cast<const ColumnBool*>(split_col.get())->get_data();
89
90
0
        auto col_offsets_inner = ColumnArray::ColumnOffsets::create();
91
0
        auto col_offsets_outer = ColumnArray::ColumnOffsets::create();
92
0
        auto& offsets_inner = col_offsets_inner->get_data();
93
0
        auto& offsets_outer = col_offsets_outer->get_data();
94
0
        offsets_inner.reserve(src_offsets.size()); // assume the actual size to be equal or larger
95
0
        offsets_outer.reserve(src_offsets.size());
96
97
0
        if (null_map != nullptr) {
98
0
            RETURN_IF_ERROR(do_loop<true>(src_offsets, split_offsets, cut, null_map, offsets_inner,
99
0
                                          offsets_outer));
100
0
        } else {
101
0
            RETURN_IF_ERROR(do_loop<false>(src_offsets, split_offsets, cut, null_map, offsets_inner,
102
0
                                           offsets_outer));
103
0
        }
104
105
0
        auto inner_result = ColumnArray::create(src_data, std::move(col_offsets_inner));
106
0
        auto outer_result = ColumnArray::create(
107
0
                ColumnNullable::create(std::move(inner_result),
108
0
                                       ColumnUInt8::create(inner_result->size(), 0)),
109
0
                std::move(col_offsets_outer));
110
0
        block.replace_by_position(result, std::move(outer_result));
111
0
        return Status::OK();
112
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
113
114
    template <bool CONSIDER_NULL>
115
    static Status do_loop(const IColumn::Offsets64& src_offsets,
116
                          const IColumn::Offsets64& split_offsets, const IColumn::Filter& cut,
117
                          const NullMap* null_map, PaddedPODArray<IColumn::Offset64>& offsets_inner,
118
0
                          PaddedPODArray<IColumn::Offset64>& offsets_outer) {
119
0
        size_t pos = 0;
120
0
        for (auto i = 0; i < src_offsets.size(); i++) { // per cells
121
0
            auto in_offset = src_offsets[i];
122
0
            auto sp_offset = split_offsets[i];
123
0
            if (in_offset != sp_offset) [[unlikely]] {
124
0
                return Status::InvalidArgument("function {} has uneven arguments on row {}", name,
125
0
                                               i);
126
0
            }
127
128
            // [1,2,3,4,5]
129
0
            if (pos < in_offset) { // values in a cell
130
0
                pos += !reverse;
131
0
                for (; pos < in_offset - reverse; ++pos) {
132
0
                    if constexpr (CONSIDER_NULL) {
133
0
                        if (cut[pos] && !(*null_map)[pos]) {
134
0
                            offsets_inner.push_back(pos + reverse); // cut a array [1,2,3]
135
0
                        }
136
0
                    } else {
137
0
                        if (cut[pos]) {
138
0
                            offsets_inner.push_back(pos + reverse); // cut a array [1,2,3]
139
0
                        }
140
0
                    }
141
0
                }
142
0
                pos += reverse;
143
                // put the tail offset, always last.
144
0
                offsets_inner.push_back(pos); // put [4,5]
145
0
            }
146
147
0
            offsets_outer.push_back(offsets_inner.size());
148
0
        }
149
0
        return Status::OK();
150
0
    }
Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_
Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_
Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_
Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_
151
};
152
153
1
void register_function_array_splits(SimpleFunctionFactory& factory) {
154
1
    factory.register_function<FunctionArraySplit<true>>();
155
1
    factory.register_function<FunctionArraySplit<false>>();
156
1
}
157
} // namespace doris