Coverage Report

Created: 2026-03-13 14:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/partitioner/partitioner.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
22
#include "core/block/block.h"
23
#include "exprs/vexpr.h"
24
#include "exprs/vexpr_context.h"
25
26
namespace doris {
27
#include "common/compile_check_begin.h"
28
29
class PartitionerBase {
30
public:
31
    using HashValType = uint32_t;
32
33
460k
    PartitionerBase(HashValType partition_count) : _partition_count(partition_count) {}
34
461k
    virtual ~PartitionerBase() = default;
35
36
    virtual Status init(const std::vector<TExpr>& texprs) = 0;
37
38
    virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc) = 0;
39
40
    virtual Status open(RuntimeState* state) = 0;
41
42
    virtual Status close(RuntimeState* state) = 0;
43
44
    virtual Status do_partitioning(RuntimeState* state, Block* block) const = 0;
45
46
    virtual const std::vector<HashValType>& get_channel_ids() const = 0;
47
48
    virtual Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) = 0;
49
50
    // use _partition_count as invalid sentinel value. since modulo operation result is [0, partition_count-1]
51
14.4k
    HashValType partition_count() const { return _partition_count; }
52
    // use a individual function to highlight its special meaning
53
12.7k
    HashValType invalid_sentinel() const { return partition_count(); }
54
55
protected:
56
    const HashValType _partition_count;
57
};
58
59
template <typename ChannelIds>
60
class Crc32HashPartitioner : public PartitionerBase {
61
public:
62
454k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEEC2Ei
Line
Count
Source
62
23
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEEC2Ei
Line
Count
Source
62
20.6k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEEC2Ei
Line
Count
Source
62
10
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEEC2Ei
Line
Count
Source
62
433k
    Crc32HashPartitioner(int partition_count) : PartitionerBase(partition_count) {}
63
455k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEED2Ev
Line
Count
Source
63
23
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEED2Ev
Line
Count
Source
63
434k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEED2Ev
Line
Count
Source
63
20.7k
    ~Crc32HashPartitioner() override = default;
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEED2Ev
Line
Count
Source
63
10
    ~Crc32HashPartitioner() override = default;
64
65
309k
    Status init(const std::vector<TExpr>& texprs) override {
66
309k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
67
309k
    }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
65
294k
    Status init(const std::vector<TExpr>& texprs) override {
66
294k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
67
294k
    }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
65
18
    Status init(const std::vector<TExpr>& texprs) override {
66
18
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
67
18
    }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
65
15.5k
    Status init(const std::vector<TExpr>& texprs) override {
66
15.5k
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
67
15.5k
    }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE4initERKSt6vectorINS_5TExprESaIS4_EE
Line
Count
Source
65
6
    Status init(const std::vector<TExpr>& texprs) override {
66
6
        return VExpr::create_expr_trees(texprs, _partition_expr_ctxs);
67
6
    }
68
69
308k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
70
308k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
71
308k
    }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
69
293k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
70
293k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
71
293k
    }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
69
15
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
70
15
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
71
15
    }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
69
15.5k
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
70
15.5k
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
71
15.5k
    }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE7prepareEPNS_12RuntimeStateERKNS_13RowDescriptorE
Line
Count
Source
69
6
    Status prepare(RuntimeState* state, const RowDescriptor& row_desc) override {
70
6
        return VExpr::prepare(_partition_expr_ctxs, state, row_desc);
71
6
    }
72
73
309k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
73
294k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
73
6
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
73
15.6k
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
_ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE4openEPNS_12RuntimeStateE
Line
Count
Source
73
6
    Status open(RuntimeState* state) override { return VExpr::open(_partition_expr_ctxs, state); }
74
75
286k
    Status close(RuntimeState* state) override { return Status::OK(); }
_ZN5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE5closeEPNS_12RuntimeStateE
Line
Count
Source
75
271k
    Status close(RuntimeState* state) override { return Status::OK(); }
Unexecuted instantiation: _ZN5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE5closeEPNS_12RuntimeStateE
_ZN5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE5closeEPNS_12RuntimeStateE
Line
Count
Source
75
15.2k
    Status close(RuntimeState* state) override { return Status::OK(); }
Unexecuted instantiation: _ZN5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE5closeEPNS_12RuntimeStateE
76
77
    Status do_partitioning(RuntimeState* state, Block* block) const override;
78
79
319k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE15get_channel_idsEv
Line
Count
Source
79
299k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE15get_channel_idsEv
Line
Count
Source
79
8
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE15get_channel_idsEv
Line
Count
Source
79
19.6k
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE15get_channel_idsEv
Line
Count
Source
79
8
    const std::vector<HashValType>& get_channel_ids() const override { return _hash_vals; }
80
81
    Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) override;
82
83
protected:
84
59.5k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
85
59.5k
        int counter = 0;
86
88.3k
        for (auto ctx : _partition_expr_ctxs) {
87
88.3k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
88
88.3k
        }
89
59.5k
        return Status::OK();
90
59.5k
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
84
6.05k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
85
6.05k
        int counter = 0;
86
6.92k
        for (auto ctx : _partition_expr_ctxs) {
87
6.92k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
88
6.92k
        }
89
6.05k
        return Status::OK();
90
6.05k
    }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
84
8
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
85
8
        int counter = 0;
86
8
        for (auto ctx : _partition_expr_ctxs) {
87
8
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
88
8
        }
89
8
        return Status::OK();
90
8
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
84
8
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
85
8
        int counter = 0;
86
8
        for (auto ctx : _partition_expr_ctxs) {
87
8
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
88
8
        }
89
8
        return Status::OK();
90
8
    }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE28_get_partition_column_resultEPNS_5BlockERSt6vectorIiSaIiEE
Line
Count
Source
84
53.4k
    Status _get_partition_column_result(Block* block, std::vector<int>& result) const {
85
53.4k
        int counter = 0;
86
81.4k
        for (auto ctx : _partition_expr_ctxs) {
87
81.4k
            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
88
81.4k
        }
89
53.4k
        return Status::OK();
90
53.4k
    }
91
92
145k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
93
145k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
94
381k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
95
236k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
96
236k
        }
97
145k
        return Status::OK();
98
145k
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
92
5.14k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
93
5.14k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
94
10.3k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
95
5.20k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
96
5.20k
        }
97
5.14k
        return Status::OK();
98
5.14k
    }
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
92
5
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
93
5
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
94
10
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
95
5
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
96
5
        }
97
5
        return Status::OK();
98
5
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
92
4
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
93
4
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
94
8
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
95
4
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
96
4
        }
97
4
        return Status::OK();
98
4
    }
_ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE16_clone_expr_ctxsEPNS_12RuntimeStateERSt6vectorISt10shared_ptrINS_12VExprContextEESaIS8_EE
Line
Count
Source
92
139k
    Status _clone_expr_ctxs(RuntimeState* state, VExprContextSPtrs& new_partition_expr_ctxs) const {
93
139k
        new_partition_expr_ctxs.resize(_partition_expr_ctxs.size());
94
371k
        for (size_t i = 0; i < _partition_expr_ctxs.size(); i++) {
95
231k
            RETURN_IF_ERROR(_partition_expr_ctxs[i]->clone(state, new_partition_expr_ctxs[i]));
96
231k
        }
97
139k
        return Status::OK();
98
139k
    }
99
100
    virtual void _do_hash(const ColumnPtr& column, HashValType* __restrict result, int idx) const;
101
6.06k
    virtual void _initialize_hash_vals(size_t rows) const {
102
6.06k
        _hash_vals.resize(rows);
103
6.06k
        std::ranges::fill(_hash_vals, 0);
104
6.06k
    }
Unexecuted instantiation: _ZNK5doris20Crc32HashPartitionerINS_15ShiftChannelIdsEE21_initialize_hash_valsEm
_ZNK5doris20Crc32HashPartitionerINS_24SpillPartitionChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
101
8
    virtual void _initialize_hash_vals(size_t rows) const {
102
8
        _hash_vals.resize(rows);
103
8
        std::ranges::fill(_hash_vals, 0);
104
8
    }
_ZNK5doris20Crc32HashPartitionerINS_17ShuffleChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
101
6.05k
    virtual void _initialize_hash_vals(size_t rows) const {
102
6.05k
        _hash_vals.resize(rows);
103
6.05k
        std::ranges::fill(_hash_vals, 0);
104
6.05k
    }
_ZNK5doris20Crc32HashPartitionerINS_26SpillRePartitionChannelIdsEE21_initialize_hash_valsEm
Line
Count
Source
101
8
    virtual void _initialize_hash_vals(size_t rows) const {
102
8
        _hash_vals.resize(rows);
103
8
        std::ranges::fill(_hash_vals, 0);
104
8
    }
105
106
    VExprContextSPtrs _partition_expr_ctxs;
107
    mutable std::vector<HashValType> _hash_vals;
108
};
109
110
struct ShuffleChannelIds {
111
    using HashValType = PartitionerBase::HashValType;
112
4.14M
    HashValType operator()(HashValType l, size_t r) { return l % r; }
113
};
114
115
struct SpillPartitionChannelIds {
116
    using HashValType = PartitionerBase::HashValType;
117
    // Default spill partition mapping used by level-0 partitioning:
118
    // rotate hash bits and apply modulo to get a channel id directly.
119
3.14M
    HashValType operator()(HashValType l, size_t r) { return ((l >> 16) | (l << 16)) % r; }
120
};
121
122
struct SpillRePartitionChannelIds {
123
    using HashValType = PartitionerBase::HashValType;
124
125
    // Repartition mode: return the raw hash value without modulo.
126
    // The caller (SpillRepartitioner) will apply level-aware hash mixing and
127
    // final channel mapping, so repartition behavior can vary by level.
128
20
    HashValType operator()(HashValType l, size_t /*r*/) { return l; }
129
};
130
131
47.7M
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
132
    // Step 1: fold high entropy into low bits
133
47.7M
    h ^= h >> 16;
134
    // Step 2: odd multiplicative scramble (cheap avalanche)
135
47.7M
    h *= 0xA5B35705U;
136
    // Step 3: final fold to break remaining linearity
137
47.7M
    h ^= h >> 13;
138
47.7M
    return h;
139
47.7M
}
Unexecuted instantiation: vexpr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vectorized_fn_call.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vdata_stream_recvr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: dependency.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_task.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hashjoin_build_sink.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: join_build_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: nested_loop_join_build_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: runtime_filter_producer_helper.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_hash_join_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
partitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Line
Count
Source
131
47.7M
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
132
    // Step 1: fold high entropy into low bits
133
47.7M
    h ^= h >> 16;
134
    // Step 2: odd multiplicative scramble (cheap avalanche)
135
47.7M
    h *= 0xA5B35705U;
136
    // Step 3: final fold to break remaining linearity
137
47.7M
    h ^= h >> 13;
138
47.7M
    return h;
139
47.7M
}
Unexecuted instantiation: result_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rowid_fetcher.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: file_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: scanner_scheduler.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: olap_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: olap_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: parallel_scanner_builder.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: file_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: es_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: es_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: group_commit_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_scanner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: meta_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vparquet_reader.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vorc_reader.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: memory_scratch_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: result_file_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vfile_result_writer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hive_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: tvf_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: maxcompute_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: analytic_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: blackhole_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchange_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchanger.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_exchange_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: aggregation_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_aggregation_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: vdata_stream_sender.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_sink_buffer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: tablet_sink_hash_partitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_writer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: union_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_stream_sink.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_streamer.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partition_sort_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_probe_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: group_commit_block_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: cache_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: dict_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_anchor_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: hashjoin_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: nested_loop_join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_hash_join_probe_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
spill_repartitioner.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Line
Count
Source
131
650
static inline PartitionerBase::HashValType crc32c_shuffle_mix(PartitionerBase::HashValType h) {
132
    // Step 1: fold high entropy into low bits
133
650
    h ^= h >> 16;
134
    // Step 2: odd multiplicative scramble (cheap avalanche)
135
650
    h *= 0xA5B35705U;
136
    // Step 3: final fold to break remaining linearity
137
650
    h ^= h >> 13;
138
650
    return h;
139
650
}
Unexecuted instantiation: inner_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: left_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: full_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: right_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: null_aware_left_anti_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: null_aware_left_semi_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: asof_left_inner_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: asof_left_outer_join_impl.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: analytic_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: spill_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: local_merge_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: aggregation_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partitioned_aggregation_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: table_function_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exchange_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: repeat_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: union_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: multi_cast_data_stream_source.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: partition_sort_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: set_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: datagen_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: schema_scan_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: cache_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: rec_cte_source_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: jdbc_table_sink_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: streaming_aggregation_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: distinct_streaming_aggregation_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: materialization_opertor.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: assert_num_rows_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: empty_set_operator.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: task_scheduler.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: task_queue.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: exec_env_init.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: fragment_mgr.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: query_context.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: runtime_state.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: workload_group.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: workload_group_manager.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: query_task_controller.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
Unexecuted instantiation: pipeline_task_action.cpp:_ZN5dorisL18crc32c_shuffle_mixEj
140
141
// use high 16 bits as channel id to avoid conflict with crc32c hash table
142
// shuffle hash function same with crc32c hash table(eg join hash table) will lead bad performance
143
// hash table offten use low 16 bits as bucket index, so we shift 16 bits to high bits to avoid conflict
144
struct ShiftChannelIds {
145
    using HashValType = PartitionerBase::HashValType;
146
47.7M
    HashValType operator()(HashValType l, size_t r) { return crc32c_shuffle_mix(l) % r; }
147
};
148
149
class Crc32CHashPartitioner : public Crc32HashPartitioner<ShiftChannelIds> {
150
public:
151
    Crc32CHashPartitioner(int partition_count)
152
433k
            : Crc32HashPartitioner<ShiftChannelIds>(partition_count) {}
153
154
    Status clone(RuntimeState* state, std::unique_ptr<PartitionerBase>& partitioner) override;
155
156
private:
157
    void _do_hash(const ColumnPtr& column, HashValType* __restrict result, int idx) const override;
158
159
53.4k
    void _initialize_hash_vals(size_t rows) const override {
160
53.4k
        _hash_vals.resize(rows);
161
        // use golden ratio to initialize hash values to avoid collision with hash table's hash function
162
53.4k
        constexpr HashValType CRC32C_SHUFFLE_SEED = 0x9E3779B9U;
163
53.4k
        std::ranges::fill(_hash_vals, CRC32C_SHUFFLE_SEED);
164
53.4k
    }
165
};
166
167
#include "common/compile_check_end.h"
168
} // namespace doris