be/src/exprs/function/array/function_array_split.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // This file is copied from |
19 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arraySplit.cpp |
20 | | // and modified by Doris |
21 | | |
22 | | #include <cstddef> |
23 | | #include <memory> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/block/column_with_type_and_name.h" |
31 | | #include "core/column/column.h" |
32 | | #include "core/column/column_array.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/data_type/data_type.h" |
36 | | #include "core/data_type/data_type_array.h" |
37 | | #include "core/data_type/data_type_nullable.h" |
38 | | #include "core/types.h" |
39 | | #include "exprs/aggregate/aggregate_function.h" |
40 | | #include "exprs/function/function.h" |
41 | | #include "exprs/function/simple_function_factory.h" |
42 | | |
43 | | namespace doris { |
44 | | class FunctionContext; |
45 | | } // namespace doris |
46 | | |
47 | | namespace doris { |
48 | | #include "common/compile_check_begin.h" |
49 | | template <bool reverse> |
50 | | class FunctionArraySplit : public IFunction { |
51 | | public: |
52 | | static constexpr auto name = reverse ? "array_reverse_split" : "array_split"; |
53 | 4 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); }_ZN5doris18FunctionArraySplitILb1EE6createEv Line | Count | Source | 53 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); } |
_ZN5doris18FunctionArraySplitILb0EE6createEv Line | Count | Source | 53 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); } |
|
54 | 2 | String get_name() const override { return name; }_ZNK5doris18FunctionArraySplitILb1EE8get_nameB5cxx11Ev Line | Count | Source | 54 | 1 | String get_name() const override { return name; } |
_ZNK5doris18FunctionArraySplitILb0EE8get_nameB5cxx11Ev Line | Count | Source | 54 | 1 | String get_name() const override { return name; } |
|
55 | | |
56 | 0 | size_t get_number_of_arguments() const override { return 2; }Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE23get_number_of_argumentsEv |
57 | | |
58 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
59 | 0 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
60 | 0 | }; Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE |
61 | | |
62 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
63 | 0 | uint32_t result, size_t input_rows_count) const override { |
64 | | // <Nullable>(Array(<Nullable>(Int))) |
65 | 0 | auto src_column = |
66 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
67 | 0 | auto spliter_column = |
68 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
69 | | |
70 | | // only change its split(i.e. offsets) |
71 | 0 | const auto& src_data = assert_cast<const ColumnArray&>(*src_column).get_data_ptr(); |
72 | 0 | const auto& src_offsets = assert_cast<const ColumnArray&>(*src_column).get_offsets(); |
73 | |
|
74 | 0 | auto split_col = assert_cast<const ColumnArray*>(spliter_column.get())->get_data_ptr(); |
75 | 0 | const auto& split_offsets = assert_cast<const ColumnArray&>(*spliter_column) |
76 | 0 | .get_offsets(); // for check uneven array |
77 | |
|
78 | 0 | const NullMap* null_map = nullptr; |
79 | 0 | if (split_col->is_nullable()) { |
80 | 0 | if (split_col->has_null()) { |
81 | 0 | null_map = |
82 | 0 | &assert_cast<const ColumnNullable*>(split_col.get())->get_null_map_data(); |
83 | 0 | } |
84 | 0 | split_col = |
85 | 0 | assert_cast<const ColumnNullable*>(split_col.get())->get_nested_column_ptr(); |
86 | 0 | } |
87 | |
|
88 | 0 | const IColumn::Filter& cut = assert_cast<const ColumnBool*>(split_col.get())->get_data(); |
89 | |
|
90 | 0 | auto col_offsets_inner = ColumnArray::ColumnOffsets::create(); |
91 | 0 | auto col_offsets_outer = ColumnArray::ColumnOffsets::create(); |
92 | 0 | auto& offsets_inner = col_offsets_inner->get_data(); |
93 | 0 | auto& offsets_outer = col_offsets_outer->get_data(); |
94 | 0 | offsets_inner.reserve(src_offsets.size()); // assume the actual size to be equal or larger |
95 | 0 | offsets_outer.reserve(src_offsets.size()); |
96 | |
|
97 | 0 | if (null_map != nullptr) { |
98 | 0 | RETURN_IF_ERROR(do_loop<true>(src_offsets, split_offsets, cut, null_map, offsets_inner, |
99 | 0 | offsets_outer)); |
100 | 0 | } else { |
101 | 0 | RETURN_IF_ERROR(do_loop<false>(src_offsets, split_offsets, cut, null_map, offsets_inner, |
102 | 0 | offsets_outer)); |
103 | 0 | } |
104 | | |
105 | 0 | auto inner_result = ColumnArray::create(src_data, std::move(col_offsets_inner)); |
106 | 0 | auto outer_result = ColumnArray::create( |
107 | 0 | ColumnNullable::create(std::move(inner_result), |
108 | 0 | ColumnUInt8::create(inner_result->size(), 0)), |
109 | 0 | std::move(col_offsets_outer)); |
110 | 0 | block.replace_by_position(result, std::move(outer_result)); |
111 | 0 | return Status::OK(); |
112 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
113 | | |
114 | | template <bool CONSIDER_NULL> |
115 | | static Status do_loop(const IColumn::Offsets64& src_offsets, |
116 | | const IColumn::Offsets64& split_offsets, const IColumn::Filter& cut, |
117 | | const NullMap* null_map, PaddedPODArray<IColumn::Offset64>& offsets_inner, |
118 | 0 | PaddedPODArray<IColumn::Offset64>& offsets_outer) { |
119 | 0 | size_t pos = 0; |
120 | 0 | for (auto i = 0; i < src_offsets.size(); i++) { // per cells |
121 | 0 | auto in_offset = src_offsets[i]; |
122 | 0 | auto sp_offset = split_offsets[i]; |
123 | 0 | if (in_offset != sp_offset) [[unlikely]] { |
124 | 0 | return Status::InvalidArgument("function {} has uneven arguments on row {}", name, |
125 | 0 | i); |
126 | 0 | } |
127 | | |
128 | | // [1,2,3,4,5] |
129 | 0 | if (pos < in_offset) { // values in a cell |
130 | 0 | pos += !reverse; |
131 | 0 | for (; pos < in_offset - reverse; ++pos) { |
132 | 0 | if constexpr (CONSIDER_NULL) { |
133 | 0 | if (cut[pos] && !(*null_map)[pos]) { |
134 | 0 | offsets_inner.push_back(pos + reverse); // cut a array [1,2,3] |
135 | 0 | } |
136 | 0 | } else { |
137 | 0 | if (cut[pos]) { |
138 | 0 | offsets_inner.push_back(pos + reverse); // cut a array [1,2,3] |
139 | 0 | } |
140 | 0 | } |
141 | 0 | } |
142 | 0 | pos += reverse; |
143 | | // put the tail offset, always last. |
144 | 0 | offsets_inner.push_back(pos); // put [4,5] |
145 | 0 | } |
146 | |
|
147 | 0 | offsets_outer.push_back(offsets_inner.size()); |
148 | 0 | } |
149 | 0 | return Status::OK(); |
150 | 0 | } Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ |
151 | | }; |
152 | | |
153 | 1 | void register_function_array_splits(SimpleFunctionFactory& factory) { |
154 | 1 | factory.register_function<FunctionArraySplit<true>>(); |
155 | 1 | factory.register_function<FunctionArraySplit<false>>(); |
156 | 1 | } |
157 | | } // namespace doris |