Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/array/function_array_shuffle.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
#include <fmt/format.h>
18
#include <glog/logging.h>
19
#include <stdint.h>
20
#include <time.h>
21
22
#include <algorithm>
23
#include <memory>
24
#include <ostream>
25
#include <random>
26
#include <string>
27
#include <utility>
28
29
#include "common/status.h"
30
#include "core/assert_cast.h"
31
#include "core/block/block.h"
32
#include "core/block/column_numbers.h"
33
#include "core/block/column_with_type_and_name.h"
34
#include "core/column/column.h"
35
#include "core/column/column_array.h"
36
#include "core/data_type/data_type.h"
37
#include "core/types.h"
38
#include "exprs/aggregate/aggregate_function.h"
39
#include "exprs/function/function.h"
40
#include "exprs/function/simple_function_factory.h"
41
42
namespace doris {
43
class FunctionContext;
44
} // namespace doris
45
46
namespace doris {
47
#include "common/compile_check_begin.h"
48
49
class FunctionArrayShuffle : public IFunction {
50
public:
51
    static constexpr auto name = "array_shuffle";
52
2
    static FunctionPtr create() { return std::make_shared<FunctionArrayShuffle>(); }
53
54
    /// Get function name.
55
0
    String get_name() const override { return name; }
56
57
1
    bool is_variadic() const override { return true; }
58
59
0
    size_t get_number_of_arguments() const override { return 1; }
60
61
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
62
0
        DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
63
0
                << "first argument for function: " << name << " should be DataTypeArray"
64
0
                << " and arguments[0] is " << arguments[0]->get_name();
65
0
        return arguments[0];
66
0
    }
67
68
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
69
0
                        uint32_t result, size_t input_rows_count) const override {
70
0
        ColumnPtr src_column =
71
0
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
72
0
        const auto& src_column_array = assert_cast<const ColumnArray&>(*src_column);
73
74
0
        size_t seed = time(nullptr);
75
0
        if (arguments.size() == 2) {
76
0
            ColumnPtr seed_column =
77
0
                    block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
78
0
            seed = assert_cast<const ColumnInt64*>(seed_column.get())->get_element(0);
79
0
        }
80
81
        // time() and seed will not exceed the range of uint32.
82
0
        std::mt19937 g(cast_set<uint32_t>(seed));
83
0
        auto dest_column_ptr = _execute(src_column_array, g);
84
0
        if (!dest_column_ptr) {
85
0
            return Status::RuntimeError(
86
0
                    fmt::format("execute failed or unsupported types for function {}({})",
87
0
                                get_name(), block.get_by_position(arguments[0]).type->get_name()));
88
0
        }
89
90
0
        block.replace_by_position(result, std::move(dest_column_ptr));
91
0
        return Status::OK();
92
0
    }
93
94
private:
95
0
    ColumnPtr _execute(const ColumnArray& src_column_array, std::mt19937& g) const {
96
0
        const auto& src_offsets = src_column_array.get_offsets();
97
0
        const auto src_nested_column = src_column_array.get_data_ptr();
98
99
0
        ColumnArray::Offset64 src_offsets_size = src_offsets.size();
100
0
        IColumn::Permutation permutation(src_nested_column->size());
101
102
0
        for (size_t i = 0; i < src_nested_column->size(); ++i) {
103
0
            permutation[i] = i;
104
0
        }
105
106
0
        for (size_t i = 0; i < src_offsets_size; ++i) {
107
0
            auto last_offset = src_offsets[i - 1];
108
0
            auto src_offset = src_offsets[i];
109
110
0
            std::shuffle(&permutation[last_offset], &permutation[src_offset], g);
111
0
        }
112
0
        return ColumnArray::create(src_nested_column->permute(permutation, 0),
113
0
                                   src_column_array.get_offsets_ptr());
114
0
    }
115
};
116
117
1
void register_function_array_shuffle(SimpleFunctionFactory& factory) {
118
1
    factory.register_function<FunctionArrayShuffle>();
119
1
    factory.register_alias("array_shuffle", "shuffle");
120
1
}
121
122
} // namespace doris