Coverage Report

Created: 2025-05-21 13:26

/root/doris/be/src/vec/functions/dictionary.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <memory>
21
#include <type_traits>
22
#include <unordered_map>
23
#include <utility>
24
#include <vector>
25
26
#include "vec/columns/column.h"
27
#include "vec/columns/column_string.h"
28
#include "vec/common/assert_cast.h"
29
#include "vec/core/types.h"
30
#include "vec/data_types/data_type.h"
31
#include "vec/data_types/data_type_date_or_datetime_v2.h"
32
#include "vec/data_types/data_type_date_time.h"
33
#include "vec/data_types/data_type_ipv4.h"
34
#include "vec/data_types/data_type_ipv6.h"
35
#include "vec/data_types/data_type_number.h"
36
#include "vec/data_types/data_type_string.h"
37
#include "vec/functions/cast_type_to_either.h"
38
39
namespace doris {
40
class MemTrackerLimiter;
41
}
42
class DictionaryFactory;
43
namespace doris::vectorized {
44
/*
45
 * Dictionary implementation in Doris that provides key-value mapping functionality
46
 * Currently only supports in-memory dictionary storage
47
 */
48
49
const static std::string DICT_DATA_ERROR_TAG = "[INVALID_DICT_MARK]";
50
51
struct DictionaryAttribute {
52
    const std::string name; // value name
53
    const DataTypePtr type; // value type
54
};
55
56
// Abstract base class IDictionary that only stores values. Keys are maintained by specific derived classes
57
// IDictionary serves as the foundation for dictionary implementations where:
58
// - Only values are stored at the base level
59
// - Key management is delegated to derived classes
60
// - Provides interface for dictionary operations
61
class IDictionary {
62
public:
63
    IDictionary(std::string name, std::vector<DictionaryAttribute> values);
64
    virtual ~IDictionary();
65
5
    std::string dict_name() const { return _dict_name; }
66
67
    // Returns the result column, throws an exception if there is an issue
68
    // attribute_type , key_type must be no nullable type
69
    virtual ColumnPtr get_column(const std::string& attribute_name,
70
                                 const DataTypePtr& attribute_type, const ColumnPtr& key_column,
71
                                 const DataTypePtr& key_type) const = 0;
72
73
    // Returns multiple result columns, throws an exception if there is an issue
74
    // The default implementation calls get_column. If a more performant implementation is needed, this method can be overridden
75
    virtual ColumnPtrs get_columns(const std::vector<std::string>& attribute_names,
76
                                   const DataTypes& attribute_types, const ColumnPtr& key_column,
77
0
                                   const DataTypePtr& key_type) const {
78
0
        ColumnPtrs columns;
79
0
        for (size_t i = 0; i < attribute_names.size(); ++i) {
80
0
            columns.push_back(
81
0
                    get_column(attribute_names[i], attribute_types[i], key_column, key_type));
82
0
        }
83
0
        return columns;
84
0
    }
85
86
    // Compared to get_column and get_columns, supports multiple key columns and multiple value columns
87
    // The default implementation only supports one key column, such as IPAddressDictionary, HashMapDictionary
88
    // If support for multiple key columns is needed, this method can be overridden
89
    virtual ColumnPtrs get_tuple_columns(const std::vector<std::string>& attribute_names,
90
                                         const DataTypes& attribute_types,
91
                                         const ColumnPtrs& key_columns,
92
0
                                         const DataTypes& key_types) const {
93
0
        if (key_types.size() != 1) {
94
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
95
0
                                   "Dictionary {} does not support multiple key columns",
96
0
                                   dict_name());
97
0
        }
98
0
        return get_columns(attribute_names, attribute_types, key_columns[0], key_types[0]);
99
0
    }
100
101
    bool has_attribute(const std::string& name) const;
102
103
    // will return a non-nullable type
104
    DataTypePtr get_attribute_type(const std::string& name) const;
105
    size_t attribute_index(const std::string& name) const;
106
107
    bool attribute_is_nullable(size_t idx) const;
108
109
    std::variant<std::false_type, std::true_type> attribute_nullable_variant(size_t idx) const;
110
111
    template <typename F>
112
700
    static bool cast_type(const IDataType* type, F&& f) {
113
        // The data types supported by cast_type must be consistent with the AttributeData below.
114
700
        return cast_type_to_either<DataTypeUInt8, DataTypeInt8, DataTypeInt16, DataTypeInt32,
115
700
                                   DataTypeInt64, DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
116
700
                                   DataTypeIPv4, DataTypeIPv6, DataTypeString, DataTypeDateV2,
117
700
                                   DataTypeDateTimeV2, DataTypeDecimal<Decimal32>,
118
700
                                   DataTypeDecimal<Decimal64>, DataTypeDecimal<Decimal128V3>,
119
700
                                   DataTypeDecimal<Decimal256>>(type, std::forward<F>(f));
120
700
    }
121
122
    virtual size_t allocated_bytes() const;
123
124
protected:
125
    friend class DictionaryFactory;
126
127
    // Only used to distinguish from DataTypeString, used for ColumnWithType
128
    struct DictDataTypeString64 {
129
        using ColumnType = ColumnString;
130
    };
131
132
    template <typename Type>
133
    struct ColumnWithType {
134
        // OutputColumnType is used as the result column type
135
        using OutputColumnType = Type::ColumnType;
136
        ColumnPtr column;
137
        ColumnPtr null_map;
138
        // RealColumnType is the real type of the column, as there may be ColumnString64, but the result column will not be ColumnString64
139
        using RealColumnType = std::conditional_t<std::is_same_v<DictDataTypeString64, Type>,
140
                                                  ColumnString64, OutputColumnType>;
141
1.04k
        const RealColumnType* get() const {
142
1.04k
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
1.04k
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE3getEv
Line
Count
Source
141
97
        const RealColumnType* get() const {
142
97
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
97
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE3getEv
Line
Count
Source
141
84
        const RealColumnType* get() const {
142
84
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
84
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE3getEv
Line
Count
Source
141
79
        const RealColumnType* get() const {
142
79
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
79
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE3getEv
Line
Count
Source
141
55
        const RealColumnType* get() const {
142
55
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
55
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE3getEv
Line
Count
Source
141
27
        const RealColumnType* get() const {
142
27
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
27
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE3getEv
Line
Count
Source
141
78
        const RealColumnType* get() const {
142
78
            return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get());
143
78
        }
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE3getEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE3getEv
144
145
1.04k
        const ColumnUInt8* get_null_map() const {
146
1.04k
            if (!null_map) {
147
1.03k
                return nullptr;
148
1.03k
            }
149
8
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
1.04k
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE12get_null_mapEv
Line
Count
Source
145
97
        const ColumnUInt8* get_null_map() const {
146
97
            if (!null_map) {
147
89
                return nullptr;
148
89
            }
149
8
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
97
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE12get_null_mapEv
Line
Count
Source
145
82
        const ColumnUInt8* get_null_map() const {
146
82
            if (!null_map) {
147
82
                return nullptr;
148
82
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
82
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE12get_null_mapEv
Line
Count
Source
145
79
        const ColumnUInt8* get_null_map() const {
146
79
            if (!null_map) {
147
79
                return nullptr;
148
79
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
79
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE12get_null_mapEv
Line
Count
Source
145
55
        const ColumnUInt8* get_null_map() const {
146
55
            if (!null_map) {
147
55
                return nullptr;
148
55
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
55
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE12get_null_mapEv
Line
Count
Source
145
27
        const ColumnUInt8* get_null_map() const {
146
27
            if (!null_map) {
147
27
                return nullptr;
148
27
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
27
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE12get_null_mapEv
Line
Count
Source
145
78
        const ColumnUInt8* get_null_map() const {
146
78
            if (!null_map) {
147
78
                return nullptr;
148
78
            }
149
0
            return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get());
150
78
        }
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE12get_null_mapEv
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE12get_null_mapEv
151
    };
152
153
    // res_real_column : result column (get_column result)
154
    // res_null : if value is null, will set res_null to true
155
    // value_column : corresponding value column, non-nullable
156
    // value_null_column : corresponding value null map, if the original value is non-nullable, it will be nullptr
157
    // value_idx : index in the value column
158
    template <bool value_is_nullable, typename ResultColumnType>
159
    ALWAYS_INLINE static void set_value_data(ResultColumnType* res_real_column, UInt8& res_null,
160
                                             const auto* value_column,
161
                                             const ColumnUInt8* value_null_column,
162
55.8k
                                             const size_t& value_idx) {
163
55.8k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
19
            if (value_null_column->get_element(value_idx)) {
166
9
                res_null = true;
167
9
                res_real_column->insert_default();
168
9
                return;
169
9
            }
170
19
        }
171
55.8k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.31k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.31k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
51.5k
        } else {
176
51.5k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
51.5k
        }
178
10
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.30k
                                             const size_t& value_idx) {
163
4.30k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.30k
            if (value_null_column->get_element(value_idx)) {
166
4.30k
                res_null = true;
167
4.30k
                res_real_column->insert_default();
168
4.30k
                return;
169
4.30k
            }
170
4.30k
        }
171
4.30k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.30k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.30k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.30k
        } else {
176
4.30k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.30k
        }
178
4.30k
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
19
                                             const size_t& value_idx) {
163
19
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
19
            if (value_null_column->get_element(value_idx)) {
166
9
                res_null = true;
167
9
                res_real_column->insert_default();
168
9
                return;
169
9
            }
170
19
        }
171
10
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
10
            StringRef str_ref = value_column->get_data_at(value_idx);
174
10
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
10
        } else {
176
10
            res_real_column->insert_value(value_column->get_element(value_idx));
177
10
        }
178
10
    }
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.32k
                                             const size_t& value_idx) {
163
4.32k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.32k
            if (value_null_column->get_element(value_idx)) {
166
4.32k
                res_null = true;
167
4.32k
                res_real_column->insert_default();
168
4.32k
                return;
169
4.32k
            }
170
4.32k
        }
171
4.32k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.32k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.32k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.32k
        } else {
176
4.32k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.32k
        }
178
4.32k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
8.58k
                                             const size_t& value_idx) {
163
8.58k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
8.58k
            if (value_null_column->get_element(value_idx)) {
166
8.58k
                res_null = true;
167
8.58k
                res_real_column->insert_default();
168
8.58k
                return;
169
8.58k
            }
170
8.58k
        }
171
8.58k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
8.58k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
8.58k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
8.58k
        } else {
176
8.58k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
8.58k
        }
178
8.58k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Line
Count
Source
162
2.86k
                                             const size_t& value_idx) {
163
2.86k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
2.86k
            if (value_null_column->get_element(value_idx)) {
166
2.86k
                res_null = true;
167
2.86k
                res_real_column->insert_default();
168
2.86k
                return;
169
2.86k
            }
170
2.86k
        }
171
2.86k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
2.86k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
2.86k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
2.86k
        } else {
176
2.86k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
2.86k
        }
178
2.86k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Line
Count
Source
162
1.44k
                                             const size_t& value_idx) {
163
1.44k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
1.44k
            if (value_null_column->get_element(value_idx)) {
166
1.44k
                res_null = true;
167
1.44k
                res_real_column->insert_default();
168
1.44k
                return;
169
1.44k
            }
170
1.44k
        }
171
1.44k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
1.44k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
1.44k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
1.44k
        } else {
176
1.44k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
1.44k
        }
178
1.44k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Line
Count
Source
162
4.29k
                                             const size_t& value_idx) {
163
4.29k
        if constexpr (value_is_nullable) {
164
            // if the value is null, set the result column to null
165
4.29k
            if (value_null_column->get_element(value_idx)) {
166
4.29k
                res_null = true;
167
4.29k
                res_real_column->insert_default();
168
4.29k
                return;
169
4.29k
            }
170
4.29k
        }
171
4.29k
        if constexpr (std::is_same_v<ResultColumnType, ColumnString>) {
172
            // If it is a string column, use get_data_at to avoid copying
173
4.29k
            StringRef str_ref = value_column->get_data_at(value_idx);
174
4.29k
            res_real_column->insert_data(str_ref.data, str_ref.size);
175
4.29k
        } else {
176
4.29k
            res_real_column->insert_value(value_column->get_element(value_idx));
177
4.29k
        }
178
4.29k
    }
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm
179
180
    /// TODO: Add support for more data types ,such as Array, Map, etc.
181
    using ValueData =
182
            std::variant<ColumnWithType<DataTypeUInt8>, ColumnWithType<DataTypeInt8>,
183
                         ColumnWithType<DataTypeInt16>, ColumnWithType<DataTypeInt32>,
184
                         ColumnWithType<DataTypeInt64>, ColumnWithType<DataTypeInt128>,
185
186
                         ColumnWithType<DataTypeFloat32>, ColumnWithType<DataTypeFloat64>,
187
188
                         ColumnWithType<DataTypeIPv4>, ColumnWithType<DataTypeIPv6>,
189
190
                         ColumnWithType<DataTypeString>, ColumnWithType<DictDataTypeString64>,
191
192
                         ColumnWithType<DataTypeDateV2>, ColumnWithType<DataTypeDateTimeV2>,
193
194
                         ColumnWithType<DataTypeDecimal<Decimal32>>,
195
                         ColumnWithType<DataTypeDecimal<Decimal64>>,
196
                         ColumnWithType<DataTypeDecimal<Decimal128V3>>,
197
                         ColumnWithType<DataTypeDecimal<Decimal256>>>;
198
199
    void load_values(const std::vector<ColumnPtr>& values_column);
200
201
    // _value_data is used to store the data of value columns.
202
    std::vector<ValueData> _values_data;
203
    std::string _dict_name;
204
    std::vector<DictionaryAttribute> _attributes;
205
    // A mapping from attribute names to their corresponding indices.
206
    std::unordered_map<std::string, size_t> _name_to_attributes_index;
207
208
    // mem_tracker comes from DictionaryFactory. If _mem_tracker is nullptr, it means it is in UT.
209
    std::shared_ptr<MemTrackerLimiter> _mem_tracker;
210
};
211
212
using DictionaryPtr = std::shared_ptr<IDictionary>;
213
214
} // namespace doris::vectorized