Coverage Report

Created: 2025-04-24 12:23

/root/doris/be/src/vec/functions/random.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <fmt/format.h>
19
#include <glog/logging.h>
20
21
#include <algorithm>
22
#include <boost/iterator/iterator_facade.hpp>
23
#include <cstdint>
24
#include <cstdlib>
25
#include <memory>
26
#include <random>
27
#include <utility>
28
29
#include "common/status.h"
30
#include "udf/udf.h"
31
#include "vec/aggregate_functions/aggregate_function.h"
32
#include "vec/columns/column.h"
33
#include "vec/columns/column_vector.h"
34
#include "vec/columns/columns_number.h"
35
#include "vec/common/assert_cast.h"
36
#include "vec/core/block.h"
37
#include "vec/core/column_numbers.h"
38
#include "vec/core/types.h"
39
#include "vec/data_types/data_type_number.h"
40
#include "vec/functions/function.h"
41
#include "vec/functions/simple_function_factory.h"
42
43
namespace doris::vectorized {
44
class Random : public IFunction {
45
public:
46
    static constexpr auto name = "random";
47
48
8
    static FunctionPtr create() { return std::make_shared<Random>(); }
49
50
0
    String get_name() const override { return name; }
51
52
11
    bool use_default_implementation_for_constants() const override { return false; }
53
54
0
    size_t get_number_of_arguments() const override { return 0; }
55
56
7
    bool is_variadic() const override { return true; }
57
58
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
59
6
        if (arguments.size() == 2) {
60
0
            return std::make_shared<DataTypeInt64>();
61
0
        }
62
6
        return std::make_shared<DataTypeFloat64>();
63
6
    }
64
65
12
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
66
12
        std::shared_ptr<std::mt19937_64> generator(new std::mt19937_64());
67
12
        context->set_function_state(scope, generator);
68
12
        if (scope == FunctionContext::THREAD_LOCAL) {
69
6
            if (context->get_num_args() == 1) {
70
                // This is a call to RandSeed, initialize the seed
71
                // TODO: should we support non-constant seed?
72
6
                if (!context->is_col_constant(0)) {
73
0
                    return Status::InvalidArgument("The param of rand function must be literal");
74
0
                }
75
6
                uint32_t seed = 0;
76
6
                if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) {
77
5
                    seed = context->get_constant_col(0)->column_ptr->get64(0);
78
5
                }
79
6
                generator->seed(seed);
80
6
            } else if (context->get_num_args() == 2) {
81
0
                if (!context->is_col_constant(0) || !context->is_col_constant(1)) {
82
0
                    return Status::InvalidArgument("The param of rand function must be literal");
83
0
                }
84
0
                generator->seed(std::random_device()());
85
0
            } else { // zero args
86
0
                generator->seed(std::random_device()());
87
0
            }
88
6
        }
89
90
12
        return Status::OK();
91
12
    }
92
93
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
94
5
                        size_t result, size_t input_rows_count) const override {
95
5
        if (arguments.size() == 2) {
96
0
            return _execute_int_range(context, block, arguments, result, input_rows_count);
97
0
        }
98
5
        return _execute_float(context, block, arguments, result, input_rows_count);
99
5
    }
100
101
12
    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
102
12
        return Status::OK();
103
12
    }
104
105
private:
106
    static Status _execute_int_range(FunctionContext* context, Block& block,
107
                                     const ColumnNumbers& arguments, size_t result,
108
0
                                     size_t input_rows_count) {
109
0
        auto res_column = ColumnInt64::create(input_rows_count);
110
0
        auto& res_data = static_cast<ColumnInt64&>(*res_column).get_data();
111
112
0
        auto* generator = reinterpret_cast<std::mt19937_64*>(
113
0
                context->get_function_state(FunctionContext::THREAD_LOCAL));
114
0
        DCHECK(generator != nullptr);
115
116
        // checked in open()
117
0
        Int64 min = assert_cast<const ColumnInt64*>(
118
0
                            assert_cast<const ColumnConst*>(
119
0
                                    block.get_by_position(arguments[0]).column.get())
120
0
                                    ->get_data_column_ptr()
121
0
                                    .get())
122
0
                            ->get_element(0);
123
0
        Int64 max = assert_cast<const ColumnInt64*>(
124
0
                            assert_cast<const ColumnConst*>(
125
0
                                    block.get_by_position(arguments[1]).column.get())
126
0
                                    ->get_data_column_ptr()
127
0
                                    .get())
128
0
                            ->get_element(0);
129
0
        if (min >= max) {
130
0
            return Status::InvalidArgument(fmt::format(
131
0
                    "random's lower bound should less than upper bound, but got [{}, {})", min,
132
0
                    max));
133
0
        }
134
135
0
        std::uniform_int_distribution<int64_t> distribution(min, max);
136
0
        for (int i = 0; i < input_rows_count; i++) {
137
0
            res_data[i] = distribution(*generator);
138
0
        }
139
140
0
        block.replace_by_position(result, std::move(res_column));
141
0
        return Status::OK();
142
0
    }
143
144
    static Status _execute_float(FunctionContext* context, Block& block,
145
                                 const ColumnNumbers& arguments, size_t result,
146
5
                                 size_t input_rows_count) {
147
5
        static const double min = 0.0;
148
5
        static const double max = 1.0;
149
5
        auto res_column = ColumnFloat64::create(input_rows_count);
150
5
        auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data();
151
152
5
        auto* generator = reinterpret_cast<std::mt19937_64*>(
153
5
                context->get_function_state(FunctionContext::THREAD_LOCAL));
154
5
        DCHECK(generator != nullptr);
155
156
5
        std::uniform_real_distribution<double> distribution(min, max);
157
10
        for (int i = 0; i < input_rows_count; i++) {
158
5
            res_data[i] = distribution(*generator);
159
5
        }
160
161
5
        block.replace_by_position(result, std::move(res_column));
162
5
        return Status::OK();
163
5
    }
164
};
165
166
1
void register_function_random(SimpleFunctionFactory& factory) {
167
1
    factory.register_function<Random>();
168
1
    factory.register_alias(Random::name, "rand");
169
1
}
170
171
} // namespace doris::vectorized