be/src/exprs/function/array/function_array_split.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // |
18 | | // This file is copied from |
19 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/array/arraySplit.cpp |
20 | | // and modified by Doris |
21 | | |
22 | | #include <cstddef> |
23 | | #include <memory> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/block/column_with_type_and_name.h" |
31 | | #include "core/column/column.h" |
32 | | #include "core/column/column_array.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/data_type/data_type.h" |
36 | | #include "core/data_type/data_type_array.h" |
37 | | #include "core/data_type/data_type_nullable.h" |
38 | | #include "core/types.h" |
39 | | #include "exprs/aggregate/aggregate_function.h" |
40 | | #include "exprs/function/function.h" |
41 | | #include "exprs/function/simple_function_factory.h" |
42 | | |
43 | | namespace doris { |
44 | | class FunctionContext; |
45 | | } // namespace doris |
46 | | |
47 | | namespace doris { |
48 | | template <bool reverse> |
49 | | class FunctionArraySplit : public IFunction { |
50 | | public: |
51 | | static constexpr auto name = reverse ? "array_reverse_split" : "array_split"; |
52 | 4 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); }_ZN5doris18FunctionArraySplitILb1EE6createEv Line | Count | Source | 52 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); } |
_ZN5doris18FunctionArraySplitILb0EE6createEv Line | Count | Source | 52 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArraySplit>(); } |
|
53 | 2 | String get_name() const override { return name; }_ZNK5doris18FunctionArraySplitILb1EE8get_nameB5cxx11Ev Line | Count | Source | 53 | 1 | String get_name() const override { return name; } |
_ZNK5doris18FunctionArraySplitILb0EE8get_nameB5cxx11Ev Line | Count | Source | 53 | 1 | String get_name() const override { return name; } |
|
54 | | |
55 | 0 | size_t get_number_of_arguments() const override { return 2; }Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE23get_number_of_argumentsEv |
56 | | |
57 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
58 | 0 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
59 | 0 | }; Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE |
60 | | |
61 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
62 | 0 | uint32_t result, size_t input_rows_count) const override { |
63 | | // <Nullable>(Array(<Nullable>(Int))) |
64 | 0 | auto src_column = |
65 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
66 | 0 | auto spliter_column = |
67 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
68 | | |
69 | | // only change its split(i.e. offsets) |
70 | 0 | const auto& src_data = assert_cast<const ColumnArray&>(*src_column).get_data_ptr(); |
71 | 0 | const auto& src_offsets = assert_cast<const ColumnArray&>(*src_column).get_offsets(); |
72 | |
|
73 | 0 | auto split_col = assert_cast<const ColumnArray*>(spliter_column.get())->get_data_ptr(); |
74 | 0 | const auto& split_offsets = assert_cast<const ColumnArray&>(*spliter_column) |
75 | 0 | .get_offsets(); // for check uneven array |
76 | |
|
77 | 0 | const NullMap* null_map = nullptr; |
78 | 0 | if (split_col->is_nullable()) { |
79 | 0 | if (split_col->has_null()) { |
80 | 0 | null_map = |
81 | 0 | &assert_cast<const ColumnNullable*>(split_col.get())->get_null_map_data(); |
82 | 0 | } |
83 | 0 | split_col = |
84 | 0 | assert_cast<const ColumnNullable*>(split_col.get())->get_nested_column_ptr(); |
85 | 0 | } |
86 | |
|
87 | 0 | const IColumn::Filter& cut = assert_cast<const ColumnBool*>(split_col.get())->get_data(); |
88 | |
|
89 | 0 | auto col_offsets_inner = ColumnArray::ColumnOffsets::create(); |
90 | 0 | auto col_offsets_outer = ColumnArray::ColumnOffsets::create(); |
91 | 0 | auto& offsets_inner = col_offsets_inner->get_data(); |
92 | 0 | auto& offsets_outer = col_offsets_outer->get_data(); |
93 | 0 | offsets_inner.reserve(src_offsets.size()); // assume the actual size to be equal or larger |
94 | 0 | offsets_outer.reserve(src_offsets.size()); |
95 | |
|
96 | 0 | if (null_map != nullptr) { |
97 | 0 | RETURN_IF_ERROR(do_loop<true>(src_offsets, split_offsets, cut, null_map, offsets_inner, |
98 | 0 | offsets_outer)); |
99 | 0 | } else { |
100 | 0 | RETURN_IF_ERROR(do_loop<false>(src_offsets, split_offsets, cut, null_map, offsets_inner, |
101 | 0 | offsets_outer)); |
102 | 0 | } |
103 | | |
104 | 0 | auto inner_result = ColumnArray::create(src_data, std::move(col_offsets_inner)); |
105 | 0 | auto outer_result = ColumnArray::create( |
106 | 0 | ColumnNullable::create(std::move(inner_result), |
107 | 0 | ColumnUInt8::create(inner_result->size(), 0)), |
108 | 0 | std::move(col_offsets_outer)); |
109 | 0 | block.replace_by_position(result, std::move(outer_result)); |
110 | 0 | return Status::OK(); |
111 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris18FunctionArraySplitILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
112 | | |
113 | | template <bool CONSIDER_NULL> |
114 | | static Status do_loop(const IColumn::Offsets64& src_offsets, |
115 | | const IColumn::Offsets64& split_offsets, const IColumn::Filter& cut, |
116 | | const NullMap* null_map, PaddedPODArray<IColumn::Offset64>& offsets_inner, |
117 | 0 | PaddedPODArray<IColumn::Offset64>& offsets_outer) { |
118 | 0 | size_t pos = 0; |
119 | 0 | for (auto i = 0; i < src_offsets.size(); i++) { // per cells |
120 | 0 | auto in_offset = src_offsets[i]; |
121 | 0 | auto sp_offset = split_offsets[i]; |
122 | 0 | if (in_offset != sp_offset) [[unlikely]] { |
123 | 0 | return Status::InvalidArgument("function {} has uneven arguments on row {}", name, |
124 | 0 | i); |
125 | 0 | } |
126 | | |
127 | | // [1,2,3,4,5] |
128 | 0 | if (pos < in_offset) { // values in a cell |
129 | 0 | pos += !reverse; |
130 | 0 | for (; pos < in_offset - reverse; ++pos) { |
131 | 0 | if constexpr (CONSIDER_NULL) { |
132 | 0 | if (cut[pos] && !(*null_map)[pos]) { |
133 | 0 | offsets_inner.push_back(pos + reverse); // cut a array [1,2,3] |
134 | 0 | } |
135 | 0 | } else { |
136 | 0 | if (cut[pos]) { |
137 | 0 | offsets_inner.push_back(pos + reverse); // cut a array [1,2,3] |
138 | 0 | } |
139 | 0 | } |
140 | 0 | } |
141 | 0 | pos += reverse; |
142 | | // put the tail offset, always last. |
143 | 0 | offsets_inner.push_back(pos); // put [4,5] |
144 | 0 | } |
145 | |
|
146 | 0 | offsets_outer.push_back(offsets_inner.size()); |
147 | 0 | } |
148 | 0 | return Status::OK(); |
149 | 0 | } Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb1EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb1EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ Unexecuted instantiation: _ZN5doris18FunctionArraySplitILb0EE7do_loopILb0EEENS_6StatusERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESA_RKNS4_IhLm4096ES7_Lm16ELm15EEEPSC_RS8_SF_ |
150 | | }; |
151 | | |
152 | 1 | void register_function_array_splits(SimpleFunctionFactory& factory) { |
153 | 1 | factory.register_function<FunctionArraySplit<true>>(); |
154 | 1 | factory.register_function<FunctionArraySplit<false>>(); |
155 | 1 | } |
156 | | } // namespace doris |