Coverage Report

Created: 2025-04-29 18:40

/root/doris/be/src/vec/functions/dictionary.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <memory>
21
#include <type_traits>
22
#include <unordered_map>
23
#include <utility>
24
#include <vector>
25
26
#include "vec/columns/column.h"
27
#include "vec/columns/column_string.h"
28
#include "vec/common/assert_cast.h"
29
#include "vec/core/types.h"
30
#include "vec/data_types/data_type.h"
31
#include "vec/data_types/data_type_date_time.h"
32
#include "vec/data_types/data_type_ipv4.h"
33
#include "vec/data_types/data_type_ipv6.h"
34
#include "vec/data_types/data_type_number.h"
35
#include "vec/data_types/data_type_string.h"
36
#include "vec/data_types/data_type_time_v2.h"
37
#include "vec/functions/cast_type_to_either.h"
38
39
namespace doris {
40
class MemTrackerLimiter;
41
}
42
class DictionaryFactory;
43
namespace doris::vectorized {
44
/*
45
 * Dictionary implementation in Doris that provides key-value mapping functionality
46
 * Currently only supports in-memory dictionary storage
47
 */
48
49
struct DictionaryAttribute {
50
    const std::string name; // value name
51
    const DataTypePtr type; // value type
52
};
53
54
// Abstract base class IDictionary that only stores values. Keys are maintained by specific derived classes
55
// IDictionary serves as the foundation for dictionary implementations where:
56
// - Only values are stored at the base level
57
// - Key management is delegated to derived classes
58
// - Provides interface for dictionary operations
59
class IDictionary {
60
public:
61
    IDictionary(std::string name, std::vector<DictionaryAttribute> values);
62
    virtual ~IDictionary();
63
5
    std::string dict_name() const { return _dict_name; }
64
65
    // Returns the result column, throws an exception if there is an issue
66
    // attribute_type , key_type must be no nullable type
67
    virtual ColumnPtr get_column(const std::string& attribute_name,
68
                                 const DataTypePtr& attribute_type, const ColumnPtr& key_column,
69
                                 const DataTypePtr& key_type) const = 0;
70
71
    // Returns multiple result columns, throws an exception if there is an issue
72
    // The default implementation calls get_column. If a more performant implementation is needed, this method can be overridden
73
    virtual ColumnPtrs get_columns(const std::vector<std::string>& attribute_names,
74
                                   const DataTypes& attribute_types, const ColumnPtr& key_column,
75
0
                                   const DataTypePtr& key_type) const {
76
0
        ColumnPtrs columns;
77
0
        for (size_t i = 0; i < attribute_names.size(); ++i) {
78
0
            columns.push_back(
79
0
                    get_column(attribute_names[i], attribute_types[i], key_column, key_type));
80
0
        }
81
0
        return columns;
82
0
    }
83
84
    // Compared to get_column and get_columns, supports multiple key columns and multiple value columns
85
    // The default implementation only supports one key column, such as IPAddressDictionary, HashMapDictionary
86
    // If support for multiple key columns is needed, this method can be overridden
87
    virtual ColumnPtrs get_tuple_columns(const std::vector<std::string>& attribute_names,
88
                                         const DataTypes& attribute_types,
89
                                         const ColumnPtrs& key_columns,
90
0
                                         const DataTypes& key_types) const {
91
0
        if (key_types.size() != 1) {
92
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
93
0
                                   "Dictionary {} does not support multiple key columns",
94
0
                                   dict_name());
95
0
        }
96
0
        return get_columns(attribute_names, attribute_types, key_columns[0], key_types[0]);
97
0
    }
98
99
    bool has_attribute(const std::string& name) const;
100
101
    // will return a non-nullable type
102
    DataTypePtr get_attribute_type(const std::string& name) const;
103
    size_t attribute_index(const std::string& name) const;
104
105
    bool attribute_is_nullable(size_t idx) const;
106
107
    std::variant<std::false_type, std::true_type> attribute_nullable_variant(size_t idx) const;
108
109
    template <typename F>
110
700
    static bool cast_type(const IDataType* type, F&& f) {
111
        // The data types supported by cast_type must be consistent with the AttributeData below.
112
700
        return cast_type_to_either<DataTypeUInt8, DataTypeInt8, DataTypeInt16, DataTypeInt32,
113
700
                                   DataTypeInt64, DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
114
700
                                   DataTypeIPv4, DataTypeIPv6, DataTypeString, DataTypeDateV2,
115
700
                                   DataTypeDateTimeV2, DataTypeDecimal<Decimal32>,
116
700
                                   DataTypeDecimal<Decimal64>, DataTypeDecimal<Decimal128V3>,
117
700
                                   DataTypeDecimal<Decimal256>>(type, std::forward<F>(f));
118
700
    }
119
120
    virtual size_t allocated_bytes() const;
121
122
protected:
123
    friend class DictionaryFactory;
124
125
    // Only used to distinguish from DataTypeString, used for ColumnWithType
126
    struct DictDataTypeString64 {
127
        using ColumnType = ColumnString;
128
    };
129
130
    template <typename Type>
131
    struct ColumnWithType {
132
        // OutputColumnType is used as the result column type
133
        using OutputColumnType = Type::ColumnType;
134
        ColumnPtr column;
135
        ColumnPtr null_map;
136
        // RealColumnType is the real type of the column, as there may be ColumnString64, but the result column will not be ColumnString64
137
        using RealColumnType = std::conditional_t<std::is_same_v<DictDataTypeString64, Type>,
138
                                                  ColumnString64, OutputColumnType>;
139
1.04k
        const RealColumnType* get() const {
140
1.04k
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
1.04k
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE3getEv
Line
Count
Source
139
97
        const RealColumnType* get() const {
140
97
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
97
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE3getEv
Line
Count
Source
139
84
        const RealColumnType* get() const {
140
84
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
84
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE3getEv
Line
Count
Source
139
79
        const RealColumnType* get() const {
140
79
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
79
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE3getEv
Line
Count
Source
139
55
        const RealColumnType* get() const {
140
55
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
55
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE3getEv
Line
Count
Source
139
27
        const RealColumnType* get() const {
140
27
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
27
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE3getEv
Line
Count
Source
139
78
        const RealColumnType* get() const {
140
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
141
78
        }
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE3getEv
142
143
1.04k
        const ColumnUInt8* get_null_map() const {
144
1.04k
            if (!null_map) {
145
1.03k
                return nullptr;
146
1.03k
            }
147
8
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
1.04k
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE12get_null_mapEv
Line
Count
Source
143
97
        const ColumnUInt8* get_null_map() const {
144
97
            if (!null_map) {
145
89
                return nullptr;
146
89
            }
147
8
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
97
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE12get_null_mapEv
Line
Count
Source
143
82
        const ColumnUInt8* get_null_map() const {
144
82
            if (!null_map) {
145
82
                return nullptr;
146
82
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
82
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE12get_null_mapEv
Line
Count
Source
143
79
        const ColumnUInt8* get_null_map() const {
144
79
            if (!null_map) {
145
79
                return nullptr;
146
79
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
79
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE12get_null_mapEv
Line
Count
Source
143
55
        const ColumnUInt8* get_null_map() const {
144
55
            if (!null_map) {
145
55
                return nullptr;
146
55
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
55
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE12get_null_mapEv
Line
Count
Source
143
27
        const ColumnUInt8* get_null_map() const {
144
27
            if (!null_map) {
145
27
                return nullptr;
146
27
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
27
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE12get_null_mapEv
Line
Count
Source
143
78
        const ColumnUInt8* get_null_map() const {
144
78
            if (!null_map) {
145
78
                return nullptr;
146
78
            }
147
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
148
78
        }
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE12get_null_mapEv
149
    };
150
151
    // res_real_column : result column (get_column result)
152
    // res_null : if value is null, will set res_null to true
153
    // value_column : corresponding value column, non-nullable
154
    // value_null_column : corresponding value null map, if the original value is non-nullable, it will be nullptr
155
    // value_idx : index in the value column
156
    template <bool value_is_nullable, typename ResultColumnType>
157
    ALWAYS_INLINE static void set_value_data(ResultColumnType* res_real_column, UInt8& res_null,
158
                                             const auto* value_column,
159
                                             const ColumnUInt8* value_null_column,
160
55.8k
                                             const size_t& value_idx) {
161
55.8k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
19
            if (value_null_column->get_element(value_idx)) {
164
9
                res_null = true;
165
9
                res_real_column->insert_default();
166
9
                return;
167
9
            }
168
19
        }
169
55.8k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.31k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.31k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
51.5k
        } else {
174
51.5k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
51.5k
        }
176
10
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.30k
                                             const size_t& value_idx) {
161
4.30k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.30k
            if (value_null_column->get_element(value_idx)) {
164
4.30k
                res_null = true;
165
4.30k
                res_real_column->insert_default();
166
4.30k
                return;
167
4.30k
            }
168
4.30k
        }
169
4.30k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.30k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.30k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.30k
        } else {
174
4.30k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.30k
        }
176
4.30k
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
19
                                             const size_t& value_idx) {
161
19
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
19
            if (value_null_column->get_element(value_idx)) {
164
9
                res_null = true;
165
9
                res_real_column->insert_default();
166
9
                return;
167
9
            }
168
19
        }
169
10
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
10
            StringRef str_ref = value_column->get_data_at(value_idx);
172
10
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
10
        } else {
174
10
            res_real_column->insert_value(value_column->get_element(value_idx));
175
10
        }
176
10
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.32k
                                             const size_t& value_idx) {
161
4.32k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.32k
            if (value_null_column->get_element(value_idx)) {
164
4.32k
                res_null = true;
165
4.32k
                res_real_column->insert_default();
166
4.32k
                return;
167
4.32k
            }
168
4.32k
        }
169
4.32k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.32k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.32k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.32k
        } else {
174
4.32k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.32k
        }
176
4.32k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
8.58k
                                             const size_t& value_idx) {
161
8.58k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
8.58k
            if (value_null_column->get_element(value_idx)) {
164
8.58k
                res_null = true;
165
8.58k
                res_real_column->insert_default();
166
8.58k
                return;
167
8.58k
            }
168
8.58k
        }
169
8.58k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
8.58k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
8.58k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
8.58k
        } else {
174
8.58k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
8.58k
        }
176
8.58k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Line
Count
Source
160
2.86k
                                             const size_t& value_idx) {
161
2.86k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
2.86k
            if (value_null_column->get_element(value_idx)) {
164
2.86k
                res_null = true;
165
2.86k
                res_real_column->insert_default();
166
2.86k
                return;
167
2.86k
            }
168
2.86k
        }
169
2.86k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
2.86k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
2.86k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
2.86k
        } else {
174
2.86k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
2.86k
        }
176
2.86k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Line
Count
Source
160
1.44k
                                             const size_t& value_idx) {
161
1.44k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
1.44k
            if (value_null_column->get_element(value_idx)) {
164
1.44k
                res_null = true;
165
1.44k
                res_real_column->insert_default();
166
1.44k
                return;
167
1.44k
            }
168
1.44k
        }
169
1.44k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
1.44k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
1.44k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
1.44k
        } else {
174
1.44k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
1.44k
        }
176
1.44k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
160
4.29k
                                             const size_t& value_idx) {
161
4.29k
        if constexpr (value_is_nullable) {
162
            // if the value is null, set the result column to null
163
4.29k
            if (value_null_column->get_element(value_idx)) {
164
4.29k
                res_null = true;
165
4.29k
                res_real_column->insert_default();
166
4.29k
                return;
167
4.29k
            }
168
4.29k
        }
169
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
170
            // If it is a string column, use get_data_at to avoid copying
171
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
172
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
173
4.29k
        } else {
174
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
175
4.29k
        }
176
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
177
178
    /// TODO: Add support for more data types ,such as Array, Map, etc.
179
    using ValueData =
180
            std::variant<ColumnWithType<DataTypeUInt8>, ColumnWithType<DataTypeInt8>,
181
                         ColumnWithType<DataTypeInt16>, ColumnWithType<DataTypeInt32>,
182
                         ColumnWithType<DataTypeInt64>, ColumnWithType<DataTypeInt128>,
183
184
                         ColumnWithType<DataTypeFloat32>, ColumnWithType<DataTypeFloat64>,
185
186
                         ColumnWithType<DataTypeIPv4>, ColumnWithType<DataTypeIPv6>,
187
188
                         ColumnWithType<DataTypeString>, ColumnWithType<DictDataTypeString64>,
189
190
                         ColumnWithType<DataTypeDateV2>, ColumnWithType<DataTypeDateTimeV2>,
191
192
                         ColumnWithType<DataTypeDecimal<Decimal32>>,
193
                         ColumnWithType<DataTypeDecimal<Decimal64>>,
194
                         ColumnWithType<DataTypeDecimal<Decimal128V3>>,
195
                         ColumnWithType<DataTypeDecimal<Decimal256>>>;
196
197
    void load_values(const std::vector<ColumnPtr>& values_column);
198
199
    // _value_data is used to store the data of value columns.
200
    std::vector<ValueData> _values_data;
201
    std::string _dict_name;
202
    std::vector<DictionaryAttribute> _attributes;
203
    // A mapping from attribute names to their corresponding indices.
204
    std::unordered_map<std::string, size_t> _name_to_attributes_index;
205
206
    // mem_tracker comes from DictionaryFactory. If _mem_tracker is nullptr, it means it is in UT.
207
    std::shared_ptr<MemTrackerLimiter> _mem_tracker;
208
};
209
210
using DictionaryPtr = std::shared_ptr<IDictionary>;
211
212
} // namespace doris::vectorized