Coverage Report

Created: 2026-04-10 18:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/partitioner/partitioner.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
22
#include "core/block/block.h"
23
#include "exprs/vexpr.h"
24
#include "exprs/vexpr_context.h"
25
26
namespace doris {
27
28
class PartitionerBase {
29
public:
30
    using HashValType = uint32_t;
31
32
478k
    PartitionerBase(HashValType partition_count) : _partition_count(partition_count) {}
33
479k
    virtual ~PartitionerBase() = default;
34
35
    virtual Status init(const std::vector<TExpr>& texprs) = 0;
36
37
    virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc) = 0;
38
39
    virtual Status open(RuntimeState* state) = 0;
40
41
    virtual Status close(RuntimeState* state) = 0;
42
43
    virtual Status do_partitioning(RuntimeState* state, Block* block) const = 0;
44
45
    virtual const std::vector<HashValType>& get_channel_ids() const = 0;
46
47
    virtual Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) = 0;
48
49
    // use _partition_count as invalid sentinel value. since modulo operation result is [0, partition_count-1]
50
1.06k
    HashValType partition_count() const { return _partition_count; }
51
    // use a individual function to highlight its special meaning
52
1.01k
    HashValType invalid_sentinel() const { return partition_count(); }
53
54
protected:
55
    const HashValType _partition_count;
56
};
57
58
class PartitionFunction {
59
public:
60
    using HashValType = PartitionerBase::HashValType;
61
62
964
    virtual ~PartitionFunction() = default;
63
64
    virtual Status init(const std::vector<TExpr>& texprs) = 0;
65
66
    virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc) = 0;
67
68
    virtual Status open(RuntimeState* state) = 0;
69
70
    virtual Status close(RuntimeState* state) = 0;
71
72
    virtual Status get_partitions(RuntimeState* state, Block* block, size_t partition_count,
73
                                  std::vector<HashValType>& partitions) const = 0;
74
75
    virtual HashValType partition_count() const = 0;
76
77
    virtual Status clone(RuntimeState* state,
78
                         std::unique_ptr<PartitionFunction>& function) const = 0;
79
};
80
81
enum class ShuffleHashMethod {
82
    CRC32,
83
    CRC32C,
84
};
85
86
template <typename ChannelIds>
87
class Crc32HashPartitioner : public PartitionerBase {
88
public:
89
475k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEEC2Ei
Line
Count
Source
89
23
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEEC2Ei
Line
Count
Source
89
12.5k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEEC2Ei
Line
Count
Source
89
10
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEEC2Ei
Line
Count
Source
89
462k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
90
477k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEED2Ev
Line
Count
Source
90
23
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEED2Ev
Line
Count
Source
90
464k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEED2Ev
Line
Count
Source
90
12.5k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEED2Ev
Line
Count
Source
90
10
    ~Crc32HashPartitioner() override = default;
91
92
294k
    Status init(const std::vector<TExpr>& texprs) override {
93
294k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
94
294k
    }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
92
286k
    Status init(const std::vector<TExpr>& texprs) override {
93
286k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
94
286k
    }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
92
18
    Status init(const std::vector<TExpr>& texprs) override {
93
18
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
94
18
    }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
92
8.20k
    Status init(const std::vector<TExpr>& texprs) override {
93
8.20k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
94
8.20k
    }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
92
6
    Status init(const std::vector<TExpr>& texprs) override {
93
6
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
94
6
    }
95
96
294k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
97
294k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
98
294k
    }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
96
285k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
97
285k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
98
285k
    }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
96
15
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
97
15
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
98
15
    }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
96
8.20k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
97
8.20k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
98
8.20k
    }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
96
6
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
97
6
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
98
6
    }
99
100
295k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
100
287k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
100
6
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
100
8.21k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
100
6
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
101
102
265k
    Status close(RuntimeState* state) override { return Status::OK(); }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE5closeEPNS_12RuntimeStateE
Line
Count
Source
102
258k
    Status close(RuntimeState* state) override { return Status::OK(); }
Unexecuted instantiation: _ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE5closeEPNS_12RuntimeStateE
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE5closeEPNS_12RuntimeStateE
Line
Count
Source
102
7.65k
    Status close(RuntimeState* state) override { return Status::OK(); }
Unexecuted instantiation: _ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE5closeEPNS_12RuntimeStateE
103
104
    Status do_partitioning(RuntimeState* state, Block* block) const override;
105
106
298k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE15get_channel_idsEv
Line
Count
Source
106
285k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE15get_channel_idsEv
Line
Count
Source
106
8
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE15get_channel_idsEv
Line
Count
Source
106
12.5k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE15get_channel_idsEv
Line
Count
Source
106
8
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
107
108
    Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) override;
109
110
protected:
111
58.0k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
112
58.0k
        int counter = 0;
113
94.5k
        for (auto ctx : _partition_expr_ctxs) {
114
94.5k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
115
94.5k
        }
116
58.0k
        return Status::OK();
117
58.0k
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
111
6.08k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
112
6.08k
        int counter = 0;
113
6.96k
        for (auto ctx : _partition_expr_ctxs) {
114
6.96k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
115
6.96k
        }
116
6.08k
        return Status::OK();
117
6.08k
    }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
111
8
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
112
8
        int counter = 0;
113
8
        for (auto ctx : _partition_expr_ctxs) {
114
8
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
115
8
        }
116
8
        return Status::OK();
117
8
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
111
8
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
112
8
        int counter = 0;
113
8
        for (auto ctx : _partition_expr_ctxs) {
114
8
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
115
8
        }
116
8
        return Status::OK();
117
8
    }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
111
51.9k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
112
51.9k
        int counter = 0;
113
87.5k
        for (auto ctx : _partition_expr_ctxs) {
114
87.5k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
115
87.5k
        }
116
51.9k
        return Status::OK();
117
51.9k
    }
118
119
181k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
120
181k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
121
454k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
122
273k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
123
273k
        }
124
181k
        return Status::OK();
125
181k
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
119
4.34k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
120
4.34k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
121
8.75k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
122
4.41k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
123
4.41k
        }
124
4.34k
        return Status::OK();
125
4.34k
    }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
119
5
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
120
5
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
121
10
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
122
5
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
123
5
        }
124
5
        return Status::OK();
125
5
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
119
4
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
120
4
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
121
8
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
122
4
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
123
4
        }
124
4
        return Status::OK();
125
4
    }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
119
176k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
120
176k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
121
446k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
122
269k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
123
269k
        }
124
176k
        return Status::OK();
125
176k
    }
126
127
    virtual void _do_hash(const ColumnPtr& column, HashValType* __restrict result, int idx) const;
128
6.10k
    virtual void _initialize_hash_vals(size_t rows) const {
129
6.10k
        _hash_vals.resize(rows);
130
6.10k
        std::ranges::fill(_hash_vals, 0);
131
6.10k
    }
Unexecuted instantiation: _ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE21_initialize_hash_valsEm
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
128
8
    virtual void _initialize_hash_vals(size_t rows) const {
129
8
        _hash_vals.resize(rows);
130
8
        std::ranges::fill(_hash_vals, 0);
131
8
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
128
6.08k
    virtual void _initialize_hash_vals(size_t rows) const {
129
6.08k
        _hash_vals.resize(rows);
130
6.08k
        std::ranges::fill(_hash_vals, 0);
131
6.08k
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
128
8
    virtual void _initialize_hash_vals(size_t rows) const {
129
8
        _hash_vals.resize(rows);
130
8
        std::ranges::fill(_hash_vals, 0);
131
8
    }
132
133
    VExprContextSPtrs _partition_expr_ctxs;
134
    mutable std::vector<HashValType> _hash_vals;
135
};
136
137
struct ShuffleChannelIds {
138
    using HashValType = PartitionerBase::HashValType;
139
2.49M
    HashValType operator()(HashValType l, size_t r) { return l % r; }
140
};
141
142
struct SpillPartitionChannelIds {
143
    using HashValType = PartitionerBase::HashValType;
144
    // Default spill partition mapping used by level-0 partitioning:
145
    // rotate hash bits and apply modulo to get a channel id directly.
146
3.14M
    HashValType operator()(HashValType l, size_t r) { return ((l >> 16) | (l << 16)) % r; }
147
};
148
149
struct SpillRePartitionChannelIds {
150
    using HashValType = PartitionerBase::HashValType;
151
152
    // Repartition mode: return the raw hash value without modulo.
153
    // The caller (SpillRepartitioner) will apply level-aware hash mixing and
154
    // final channel mapping, so repartition behavior can vary by level.
155
20
    HashValType operator()(HashValType l, size_t /*r*/) { return l; }
156
};
157
158
31.6M
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
159
    // Step 1: fold high entropy into low bits
160
31.6M
    h ^= h >> 16;
161
    // Step 2: odd multiplicative scramble (cheap avalanche)
162
31.6M
    h *= 0xA5B35705U;
163
    // Step 3: final fold to break remaining linearity
164
31.6M
    h ^= h >> 13;
165
31.6M
    return h;
166
31.6M
}
Unexecuted instantiation: vexpr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vectorized_fn_call.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vdata_stream_recvr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: dependency.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_task.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hashjoin_build_sink.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: join_build_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: nested_loop_join_build_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: runtime_filter_producer_helper.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_hash_join_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
partitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Line
Count
Source
158
31.6M
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
159
    // Step 1: fold high entropy into low bits
160
31.6M
    h ^= h >> 16;
161
    // Step 2: odd multiplicative scramble (cheap avalanche)
162
31.6M
    h *= 0xA5B35705U;
163
    // Step 3: final fold to break remaining linearity
164
31.6M
    h ^= h >> 13;
165
31.6M
    return h;
166
31.6M
}
Unexecuted instantiation: result_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rowid_fetcher.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: file_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner_scheduler.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: olap_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: olap_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: parallel_scanner_builder.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: file_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: es_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: es_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: group_commit_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: meta_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vorc_reader.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vparquet_reader.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: memory_scratch_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: result_file_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vfile_result_writer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hive_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: tvf_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: iceberg_delete_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: iceberg_merge_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: maxcompute_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: analytic_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: blackhole_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchange_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchanger.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchange_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: aggregation_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_aggregation_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vdata_stream_sender.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_sink_buffer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: tablet_sink_hash_partitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: merge_partitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
iceberg_partition_function.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Line
Count
Source
158
164
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
159
    // Step 1: fold high entropy into low bits
160
164
    h ^= h >> 16;
161
    // Step 2: odd multiplicative scramble (cheap avalanche)
162
164
    h *= 0xA5B35705U;
163
    // Step 3: final fold to break remaining linearity
164
164
    h ^= h >> 13;
165
164
    return h;
166
164
}
Unexecuted instantiation: exchange_writer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: union_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_stream_sink.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_streamer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partition_sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_probe_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: group_commit_block_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: cache_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: dict_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_anchor_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hashjoin_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: nested_loop_join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_hash_join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
spill_repartitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Line
Count
Source
158
650
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
159
    // Step 1: fold high entropy into low bits
160
650
    h ^= h >> 16;
161
    // Step 2: odd multiplicative scramble (cheap avalanche)
162
650
    h *= 0xA5B35705U;
163
    // Step 3: final fold to break remaining linearity
164
650
    h ^= h >> 13;
165
650
    return h;
166
650
}
Unexecuted instantiation: inner_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: full_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: null_aware_left_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: null_aware_left_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: asof_left_inner_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: asof_left_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: analytic_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_merge_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: aggregation_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_aggregation_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: table_function_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: repeat_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: union_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_stream_source.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partition_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: datagen_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: schema_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: cache_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: streaming_aggregation_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: distinct_streaming_aggregation_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: materialization_opertor.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: assert_num_rows_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: empty_set_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: task_scheduler.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: task_queue.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exec_env_init.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: fragment_mgr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: query_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: runtime_state.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: workload_group.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: workload_group_manager.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: query_task_controller.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_task_action.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
167
168
// use high 16 bits as channel id to avoid conflict with crc32c hash table
169
// shuffle hash function same with crc32c hash table(eg join hash table) will lead bad performance
170
// hash table offten use low 16 bits as bucket index, so we shift 16 bits to high bits to avoid conflict
171
struct ShiftChannelIds {
172
    using HashValType = PartitionerBase::HashValType;
173
31.6M
    HashValType operator()(HashValType l, size_t r) { return crc32c_shuffle_mix(l) % r; }
174
};
175
176
class Crc32CHashPartitioner : public Crc32HashPartitioner<ShiftChannelIds> {
177
public:
178
    Crc32CHashPartitioner(int partition_count)
179
462k
            : Crc32HashPartitioner<ShiftChannelIds>(partition_count) {}
180
181
    Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) override;
182
183
private:
184
    void _do_hash(const ColumnPtr& column, HashValType* __restrict result, int idx) const override;
185
186
51.9k
    void _initialize_hash_vals(size_t rows) const override {
187
51.9k
        _hash_vals.resize(rows);
188
        // use golden ratio to initialize hash values to avoid collision with hash table's hash function
189
51.9k
        constexpr HashValType CRC32C_SHUFFLE_SEED = 0x9E3779B9U;
190
51.9k
        std::ranges::fill(_hash_vals, CRC32C_SHUFFLE_SEED);
191
51.9k
    }
192
};
193
194
} // namespace doris