be/src/exprs/aggregate/aggregate_function_foreachv2.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/Combinators/AggregateFunctionForEach.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include <memory> |
22 | | |
23 | | #include "common/logging.h" |
24 | | #include "core/data_type/data_type_array.h" |
25 | | #include "core/data_type/data_type_nullable.h" |
26 | | #include "exprs/aggregate/aggregate_function.h" |
27 | | #include "exprs/aggregate/aggregate_function_foreach.h" |
28 | | #include "exprs/aggregate/aggregate_function_simple_factory.h" |
29 | | #include "exprs/aggregate/helpers.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | | // The difference between AggregateFunctionForEachV2 and AggregateFunctionForEach is that its return value array is always an Array<Nullable<T>>. |
34 | | // For example, AggregateFunctionForEach's count_foreach([1,2,3]) returns Array<Int64>, which is not ideal |
35 | | // because we may have already assumed that the array's elements are always nullable types, and many places have such checks. |
36 | | // V1 code is kept to ensure compatibility during upgrades and downgrades. |
37 | | // V2 code differs from V1 only in the return type and insert_into logic; all other logic is exactly the same. |
38 | | class AggregateFunctionForEachV2 : public AggregateFunctionForEach { |
39 | | public: |
40 | | constexpr static auto AGG_FOREACH_SUFFIX = "_foreachv2"; |
41 | | AggregateFunctionForEachV2(AggregateFunctionPtr nested_function_, const DataTypes& arguments) |
42 | 0 | : AggregateFunctionForEach(nested_function_, arguments) {} |
43 | | |
44 | 0 | DataTypePtr get_return_type() const override { |
45 | 0 | return std::make_shared<DataTypeArray>(make_nullable(nested_function->get_return_type())); |
46 | 0 | } |
47 | | |
48 | 0 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
49 | 0 | const AggregateFunctionForEachData& state = data(place); |
50 | |
|
51 | 0 | auto& arr_to = assert_cast<ColumnArray&>(to); |
52 | 0 | auto& offsets_to = arr_to.get_offsets(); |
53 | 0 | IColumn& elems_nullable = arr_to.get_data(); |
54 | |
|
55 | 0 | DCHECK(elems_nullable.is_nullable()); |
56 | 0 | auto& elems_to = assert_cast<ColumnNullable&>(elems_nullable).get_nested_column(); |
57 | 0 | auto& elements_null_map = |
58 | 0 | assert_cast<ColumnNullable&>(elems_nullable).get_null_map_column(); |
59 | |
|
60 | 0 | if (nested_function->get_return_type()->is_nullable()) { |
61 | 0 | char* nested_state = state.array_of_aggregate_datas; |
62 | 0 | for (size_t i = 0; i < state.dynamic_array_size; ++i) { |
63 | 0 | nested_function->insert_result_into(nested_state, elems_nullable); |
64 | 0 | nested_state += nested_size_of_data; |
65 | 0 | } |
66 | 0 | } else { |
67 | 0 | char* nested_state = state.array_of_aggregate_datas; |
68 | 0 | for (size_t i = 0; i < state.dynamic_array_size; ++i) { |
69 | 0 | nested_function->insert_result_into(nested_state, elems_to); |
70 | 0 | elements_null_map.insert_default(); // not null |
71 | 0 | nested_state += nested_size_of_data; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | offsets_to.push_back(offsets_to.back() + state.dynamic_array_size); |
75 | 0 | } |
76 | | }; |
77 | | |
78 | 1 | void register_aggregate_function_combinator_foreachv2(AggregateFunctionSimpleFactory& factory) { |
79 | 1 | AggregateFunctionCreator creator = |
80 | 1 | [&](const std::string& name, const DataTypes& types, const DataTypePtr& result_type, |
81 | 1 | const bool result_is_nullable, |
82 | 1 | const AggregateFunctionAttr& attr) -> AggregateFunctionPtr { |
83 | 0 | const std::string& suffix = AggregateFunctionForEachV2::AGG_FOREACH_SUFFIX; |
84 | 0 | DataTypes transform_arguments; |
85 | 0 | for (const auto& t : types) { |
86 | 0 | auto item_type = |
87 | 0 | assert_cast<const DataTypeArray*>(remove_nullable(t).get())->get_nested_type(); |
88 | 0 | transform_arguments.push_back(item_type); |
89 | 0 | } |
90 | 0 | auto result_item_type = |
91 | 0 | assert_cast<const DataTypeArray*>(remove_nullable(result_type).get()) |
92 | 0 | ->get_nested_type(); |
93 | 0 | auto nested_function_name = name.substr(0, name.size() - suffix.size()); |
94 | 0 | auto nested_function = |
95 | 0 | factory.get(nested_function_name, transform_arguments, result_item_type, true, |
96 | 0 | BeExecVersionManager::get_newest_version(), attr); |
97 | 0 | if (!nested_function) { |
98 | 0 | throw Exception( |
99 | 0 | ErrorCode::INTERNAL_ERROR, |
100 | 0 | "The combiner did not find a foreach combiner function. nested function " |
101 | 0 | "name {} , args {}", |
102 | 0 | nested_function_name, types_name(types)); |
103 | 0 | } |
104 | 0 | return creator_without_type::create<AggregateFunctionForEachV2>(types, result_is_nullable, |
105 | 0 | attr, nested_function); |
106 | 0 | }; |
107 | 1 | factory.register_foreach_function_combinator( |
108 | 1 | creator, AggregateFunctionForEachV2::AGG_FOREACH_SUFFIX, true); |
109 | 1 | factory.register_foreach_function_combinator( |
110 | 1 | creator, AggregateFunctionForEachV2::AGG_FOREACH_SUFFIX, false); |
111 | 1 | } |
112 | | } // namespace doris |