be/src/exprs/function/ip_address_dictionary.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Dictionaries/IPAddressDictionary.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <utility> |
24 | | #include <vector> |
25 | | |
26 | | #include "core/block/column_with_type_and_name.h" |
27 | | #include "core/block/columns_with_type_and_name.h" |
28 | | #include "core/column/column.h" |
29 | | #include "core/types.h" |
30 | | #include "exprs/function/dictionary.h" |
31 | | |
32 | | namespace doris { |
33 | | class IPAddressDictionary : public IDictionary { |
34 | | public: |
35 | | IPAddressDictionary(std::string name, std::vector<DictionaryAttribute> attributes) |
36 | 29 | : IDictionary(std::move(name), std::move(attributes)) {} |
37 | | |
38 | | ~IPAddressDictionary() override; |
39 | | |
40 | | ColumnPtr get_column(const std::string& attribute_name, const DataTypePtr& attribute_type, |
41 | | const ColumnPtr& key_column, const DataTypePtr& key_type) const override; |
42 | | |
43 | | static DictionaryPtr create_ip_trie_dict(const std::string& name, const ColumnPtr& key_column, |
44 | 29 | const ColumnsWithTypeAndName& values_data) { |
45 | 29 | std::vector<DictionaryAttribute> attributes; |
46 | 29 | std::vector<ColumnPtr> values_column; |
47 | 34 | for (const auto& att : values_data) { |
48 | 34 | attributes.push_back({att.name, att.type}); |
49 | 34 | values_column.push_back(att.column); |
50 | 34 | } |
51 | 29 | auto dict = std::make_shared<IPAddressDictionary>(name, attributes); |
52 | 29 | dict->load_data(key_column, values_column); |
53 | 29 | return dict; |
54 | 29 | } |
55 | | /* |
56 | | format_ipv6_cidr is used to standardize CIDR. |
57 | | For example, 192.1.1.1/24 is not a valid CIDR. |
58 | | It should ensure that all bits after the mask are set to 0, resulting in 192.1.1.0/24. |
59 | | */ |
60 | | static IPv6 format_ipv6_cidr(const uint8_t* addr, uint8_t prefix); |
61 | | |
62 | | size_t allocated_bytes() const override; |
63 | | |
64 | | private: |
65 | | using RowIdxConstIter = std::vector<size_t>::const_iterator; |
66 | | |
67 | 98 | RowIdxConstIter ip_not_found() const { return origin_row_idx_column.end(); } |
68 | | |
69 | | RowIdxConstIter look_up_IP(const IPv6& target) const; |
70 | | |
71 | | void load_data(const ColumnPtr& key_column, const std::vector<ColumnPtr>& values_column); |
72 | | |
73 | | std::vector<IPv6> ip_column; |
74 | | |
75 | | std::vector<UInt8> prefix_column; |
76 | | |
77 | | std::vector<size_t> origin_row_idx_column; |
78 | | |
79 | | std::vector<size_t> parent_subnet; |
80 | | }; |
81 | | |
82 | | inline DictionaryPtr create_ip_trie_dict_from_column(const std::string& name, |
83 | | const ColumnWithTypeAndName& key_data, |
84 | 30 | const ColumnsWithTypeAndName& values_data) { |
85 | 30 | auto key_column = key_data.column; |
86 | 30 | auto key_type = key_data.type; |
87 | 30 | if (!is_string_type(key_type->get_primitive_type())) { |
88 | 1 | throw doris::Exception( |
89 | 1 | ErrorCode::INVALID_ARGUMENT, |
90 | 1 | DICT_DATA_ERROR_TAG + |
91 | 1 | "IPAddressDictionary only support string in key , input key type is {} ", |
92 | 1 | key_type->get_name()); |
93 | 1 | } |
94 | | |
95 | 29 | DictionaryPtr dict = IPAddressDictionary::create_ip_trie_dict(name, key_column, values_data); |
96 | 29 | return dict; |
97 | 30 | } |
98 | | } // namespace doris |