Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/array/function_array_difference.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <fmt/format.h>
21
#include <glog/logging.h>
22
#include <stddef.h>
23
24
#include <algorithm>
25
#include <boost/iterator/iterator_facade.hpp>
26
#include <memory>
27
#include <ostream>
28
#include <string>
29
#include <utility>
30
31
#include "common/status.h"
32
#include "core/assert_cast.h"
33
#include "core/block/block.h"
34
#include "core/block/column_numbers.h"
35
#include "core/block/column_with_type_and_name.h"
36
#include "core/column/column.h"
37
#include "core/column/column_array.h"
38
#include "core/column/column_decimal.h"
39
#include "core/column/column_nullable.h"
40
#include "core/column/column_vector.h"
41
#include "core/data_type/data_type.h"
42
#include "core/data_type/data_type_array.h"
43
#include "core/data_type/data_type_nullable.h"
44
#include "core/data_type/data_type_number.h"
45
#include "core/types.h"
46
#include "exec/common/util.hpp"
47
#include "exprs/aggregate/aggregate_function.h"
48
#include "exprs/function/function.h"
49
50
namespace doris {
51
class FunctionContext;
52
} // namespace doris
53
54
namespace doris {
55
56
class FunctionArrayDifference : public IFunction {
57
public:
58
    static constexpr auto name = "array_difference";
59
60
2
    static FunctionPtr create() { return std::make_shared<FunctionArrayDifference>(); }
61
62
1
    String get_name() const override { return name; }
63
64
1
    bool is_variadic() const override { return false; }
65
66
0
    size_t get_number_of_arguments() const override { return 1; }
67
68
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
69
0
        DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
70
0
                << "argument for function: " << name << " should be DataTypeArray but it has type "
71
0
                << arguments[0]->get_name() << ".";
72
0
        auto nested_type = assert_cast<const DataTypeArray&>(*(arguments[0])).get_nested_type();
73
0
        bool is_nullable = nested_type->is_nullable();
74
75
        //return type is promoted to prevent result overflow
76
        //like: input is int32 ---> return type will be int64
77
0
        DataTypePtr return_type = nullptr;
78
0
        switch (nested_type->get_primitive_type()) {
79
0
        case TYPE_BOOLEAN:
80
0
        case TYPE_TINYINT:
81
0
            return_type = std::make_shared<DataTypeInt16>();
82
0
            break;
83
0
        case TYPE_SMALLINT:
84
0
            return_type = std::make_shared<DataTypeInt32>();
85
0
            break;
86
0
        case TYPE_INT:
87
0
            return_type = std::make_shared<DataTypeInt64>();
88
0
            break;
89
0
        case TYPE_BIGINT:
90
0
        case TYPE_LARGEINT:
91
0
            return_type = std::make_shared<DataTypeInt128>();
92
0
            break;
93
0
        case TYPE_FLOAT:
94
0
        case TYPE_DOUBLE:
95
0
            return_type = std::make_shared<DataTypeFloat64>();
96
0
            break;
97
0
        case TYPE_DECIMAL32:
98
0
        case TYPE_DECIMAL64:
99
0
        case TYPE_DECIMALV2:
100
0
        case TYPE_DECIMAL128I:
101
0
        case TYPE_DECIMAL256:
102
0
            return arguments[0];
103
0
        default:
104
0
            break;
105
0
        }
106
0
        if (return_type) {
107
0
            return std::make_shared<DataTypeArray>(is_nullable ? make_nullable(return_type)
108
0
                                                               : return_type);
109
0
        }
110
0
        throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
111
0
                               "Function of {}, return type get wrong: and input argument is: {}",
112
0
                               name, arguments[0]->get_name());
113
0
    }
114
115
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
116
0
                        uint32_t result, size_t input_rows_count) const override {
117
0
        const ColumnWithTypeAndName& arg = block.get_by_position(arguments[0]);
118
0
        auto res_column = _execute_non_nullable(arg, input_rows_count);
119
0
        if (!res_column) {
120
0
            return Status::RuntimeError(
121
0
                    fmt::format("unsupported types for function {}({})", get_name(),
122
0
                                block.get_by_position(arguments[0]).type->get_name()));
123
0
        }
124
0
        DCHECK_EQ(arg.column->size(), res_column->size());
125
0
        block.replace_by_position(result, std::move(res_column));
126
0
        return Status::OK();
127
0
    }
128
129
private:
130
    template <typename Element, typename Result>
131
    NO_SANITIZE_UNDEFINED static void impl(const Element* __restrict src, Result* __restrict dst,
132
0
                                           size_t begin, size_t end) {
133
0
        size_t curr_pos = begin;
134
0
        if (curr_pos < end) {
135
0
            Element prev_element = src[curr_pos];
136
0
            dst[curr_pos] = {};
137
0
            curr_pos++;
138
0
            Element curr_element = src[curr_pos];
139
0
            for (; curr_pos < end; ++curr_pos) {
140
0
                curr_element = src[curr_pos];
141
0
                dst[curr_pos] =
142
0
                        static_cast<Result>(curr_element) - static_cast<Result>(prev_element);
143
0
                prev_element = curr_element;
144
0
            }
145
0
        }
146
0
    }
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIhsEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIasEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIsiEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIilEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIlnEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implInnEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIfdEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIddEEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIiEES3_EEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIlEES3_EEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_12Decimal128V3ES2_EEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_14DecimalV2ValueES2_EEvPKT_PT0_mm
Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIN4wide7integerILm256EiEEEES6_EEvPKT_PT0_mm
147
148
    template <PrimitiveType Element, PrimitiveType Result>
149
    ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets,
150
                                       const IColumn& nested_column,
151
0
                                       ColumnPtr nested_null_map) const {
152
0
        using ColVecType = typename PrimitiveTypeTraits<Element>::ColumnType;
153
0
        using ColVecResult = typename PrimitiveTypeTraits<Result>::ColumnType;
154
0
        typename ColVecResult::MutablePtr res_nested = nullptr;
155
156
0
        const auto& src_data = reinterpret_cast<const ColVecType&>(nested_column).get_data();
157
0
        if constexpr (is_decimal(Result)) {
158
0
            res_nested = ColVecResult::create(0, src_data.get_scale());
159
0
        } else {
160
0
            res_nested = ColVecResult::create();
161
0
        }
162
0
        auto size = nested_column.size();
163
0
        typename ColVecResult::Container& res_values = res_nested->get_data();
164
0
        res_values.resize(size);
165
166
0
        size_t pos = 0;
167
0
        for (auto offset : offsets) {
168
0
            impl(src_data.data(), res_values.data(), pos, offset);
169
0
            pos = offset;
170
0
        }
171
0
        if (nested_null_map) {
172
0
            auto null_map_col = ColumnUInt8::create(size, 0);
173
0
            auto& null_map_col_data = null_map_col->get_data();
174
0
            auto nested_colum_data = static_cast<const ColumnUInt8*>(nested_null_map.get());
175
0
            VectorizedUtils::update_null_map(null_map_col_data, nested_colum_data->get_data());
176
0
            for (size_t row = 0; row < offsets.size(); ++row) {
177
0
                auto off = offsets[row - 1];
178
0
                auto len = offsets[row] - off;
179
0
                auto nested_pos = len ? len - 1 : 0;
180
0
                for (; nested_pos > 0; --nested_pos) {
181
0
                    if (null_map_col_data[nested_pos + off - 1]) {
182
0
                        null_map_col_data[nested_pos + off] = 1;
183
0
                    }
184
0
                }
185
0
            }
186
0
            return ColumnNullable::create(std::move(res_nested), std::move(null_map_col));
187
0
        } else {
188
0
            return res_nested;
189
0
        }
190
0
    }
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE2ELS2_4EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE3ELS2_4EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE4ELS2_5EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE5ELS2_6EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE6ELS2_7EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE7ELS2_7EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE8ELS2_9EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE9ELS2_9EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE28ELS2_28EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE29ELS2_29EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE30ELS2_30EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE20ELS2_20EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE35ELS2_35EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_
191
192
    ColumnPtr _execute_non_nullable(const ColumnWithTypeAndName& arg,
193
0
                                    size_t input_rows_count) const {
194
        // check array nested column type and get data
195
0
        auto left_column = arg.column->convert_to_full_column_if_const();
196
0
        const auto& array_column = reinterpret_cast<const ColumnArray&>(*left_column);
197
0
        const auto& offsets = array_column.get_offsets();
198
0
        DCHECK(offsets.size() == input_rows_count);
199
200
0
        ColumnPtr nested_column = nullptr;
201
0
        ColumnPtr nested_null_map = nullptr;
202
0
        if (is_column_nullable(array_column.get_data())) {
203
0
            const auto& nested_null_column =
204
0
                    reinterpret_cast<const ColumnNullable&>(array_column.get_data());
205
0
            nested_column = nested_null_column.get_nested_column_ptr();
206
0
            nested_null_map = nested_null_column.get_null_map_column_ptr();
207
0
        } else {
208
0
            nested_column = array_column.get_data_ptr();
209
0
        }
210
211
0
        ColumnPtr res = nullptr;
212
0
        auto left_element_type =
213
0
                remove_nullable(assert_cast<const DataTypeArray&>(*arg.type).get_nested_type());
214
0
        switch (left_element_type->get_primitive_type()) {
215
0
        case TYPE_BOOLEAN:
216
0
            res = _execute_number_expanded<TYPE_BOOLEAN, TYPE_SMALLINT>(offsets, *nested_column,
217
0
                                                                        nested_null_map);
218
0
            break;
219
0
        case TYPE_TINYINT:
220
0
            res = _execute_number_expanded<TYPE_TINYINT, TYPE_SMALLINT>(offsets, *nested_column,
221
0
                                                                        nested_null_map);
222
0
            break;
223
0
        case TYPE_SMALLINT:
224
0
            res = _execute_number_expanded<TYPE_SMALLINT, TYPE_INT>(offsets, *nested_column,
225
0
                                                                    nested_null_map);
226
0
            break;
227
0
        case TYPE_INT:
228
0
            res = _execute_number_expanded<TYPE_INT, TYPE_BIGINT>(offsets, *nested_column,
229
0
                                                                  nested_null_map);
230
0
            break;
231
0
        case TYPE_BIGINT:
232
0
            res = _execute_number_expanded<TYPE_BIGINT, TYPE_LARGEINT>(offsets, *nested_column,
233
0
                                                                       nested_null_map);
234
0
            break;
235
0
        case TYPE_LARGEINT:
236
0
            res = _execute_number_expanded<TYPE_LARGEINT, TYPE_LARGEINT>(offsets, *nested_column,
237
0
                                                                         nested_null_map);
238
0
            break;
239
0
        case TYPE_FLOAT:
240
0
            res = _execute_number_expanded<TYPE_FLOAT, TYPE_DOUBLE>(offsets, *nested_column,
241
0
                                                                    nested_null_map);
242
0
            break;
243
0
        case TYPE_DOUBLE:
244
0
            res = _execute_number_expanded<TYPE_DOUBLE, TYPE_DOUBLE>(offsets, *nested_column,
245
0
                                                                     nested_null_map);
246
0
            break;
247
0
        case TYPE_DECIMAL32:
248
0
            res = _execute_number_expanded<TYPE_DECIMAL32, TYPE_DECIMAL32>(offsets, *nested_column,
249
0
                                                                           nested_null_map);
250
0
            break;
251
0
        case TYPE_DECIMAL64:
252
0
            res = _execute_number_expanded<TYPE_DECIMAL64, TYPE_DECIMAL64>(offsets, *nested_column,
253
0
                                                                           nested_null_map);
254
0
            break;
255
0
        case TYPE_DECIMAL128I:
256
0
            res = _execute_number_expanded<TYPE_DECIMAL128I, TYPE_DECIMAL128I>(
257
0
                    offsets, *nested_column, nested_null_map);
258
0
            break;
259
0
        case TYPE_DECIMALV2:
260
0
            res = _execute_number_expanded<TYPE_DECIMALV2, TYPE_DECIMALV2>(offsets, *nested_column,
261
0
                                                                           nested_null_map);
262
0
            break;
263
0
        case TYPE_DECIMAL256:
264
0
            res = _execute_number_expanded<TYPE_DECIMAL256, TYPE_DECIMAL256>(
265
0
                    offsets, *nested_column, nested_null_map);
266
0
            break;
267
0
        default:
268
0
            return nullptr;
269
0
        }
270
0
        return ColumnArray::create(res, array_column.get_offsets_ptr());
271
0
    }
272
};
273
274
} // namespace doris