be/src/core/string_view.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/facebookincubator/velox/blob/main/velox/type/StringView.h |
19 | | // And modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <glog/logging.h> |
24 | | |
25 | | #include <cstddef> |
26 | | #include <cstdint> |
27 | | #include <cstring> |
28 | | #include <string> |
29 | | |
30 | | #include "core/string_ref.h" |
31 | | |
32 | | namespace doris { |
33 | | // Variable length string or binary type for use in vectors. This has |
34 | | // semantics similar to std::string_view or folly::StringPiece and |
35 | | // exposes a subset of the interface. If the string is 12 characters |
36 | | // or less, it is inlined and no reference is held. If it is longer, a |
37 | | // reference to the string is held and the 4 first characters are |
38 | | // cached in the StringView. This allows failing comparisons early and |
39 | | // reduces the CPU cache working set when dealing with short strings. |
40 | | |
41 | | class StringView { |
42 | | #include "common/compile_check_begin.h" |
43 | | public: |
44 | | using value_type = char; |
45 | | static constexpr size_t kPrefixSize = 4 * sizeof(char); |
46 | | static constexpr size_t kInlineSize = 12; |
47 | | |
48 | 2.55k | StringView() { |
49 | 2.55k | static_assert(sizeof(StringView) == 16); |
50 | 2.55k | memset(this, 0, sizeof(StringView)); |
51 | 2.55k | } |
52 | | |
53 | 7.21k | StringView(const char* data, uint32_t len) : size_(len) { |
54 | 7.21k | DCHECK_GE(len, 0); |
55 | 7.21k | DCHECK(data || len == 0); |
56 | 7.21k | if (isInline()) { |
57 | | // Zero the inline part. |
58 | | // this makes sure that inline strings can be compared for equality with 2 |
59 | | // int64 compares. |
60 | 6.33k | memset(prefix_, 0, kPrefixSize); |
61 | 6.33k | if (size_ == 0) { |
62 | 2.06k | return; |
63 | 2.06k | } |
64 | | // small string: inlined. Zero the last 8 bytes first to allow for whole |
65 | | // word comparison. |
66 | 4.26k | value_.data = nullptr; |
67 | 4.26k | memcpy(prefix_, data, size_); |
68 | 4.26k | } else { |
69 | | // large string: store pointer |
70 | 876 | memcpy(prefix_, data, kPrefixSize); |
71 | 876 | value_.data = data; |
72 | 876 | } |
73 | 7.21k | } |
74 | | |
75 | | StringView(unsigned char* data, uint32_t len) |
76 | 1 | : StringView(reinterpret_cast<const char*>(data), len) {} |
77 | | |
78 | 14.2k | bool isInline() const { return isInline(size_); } |
79 | | |
80 | 14.3k | ALWAYS_INLINE static constexpr bool isInline(uint32_t size) { return size <= kInlineSize; } |
81 | | |
82 | | explicit StringView(std::string&& value) = delete; |
83 | | explicit StringView(const std::string& value) |
84 | 33 | : StringView(value.data(), cast_set<uint32_t>(value.size())) {} |
85 | | explicit StringView(std::string_view value) |
86 | 7 | : StringView(value.data(), cast_set<uint32_t>(value.size())) {} |
87 | | /* implicit */ StringView(const char* data) |
88 | 499 | : StringView(data, cast_set<uint32_t>(strlen(data))) {} |
89 | 2.47k | doris::StringRef to_string_ref() const { return {data(), size()}; } |
90 | | |
91 | | operator std::string_view() && = delete; |
92 | 1 | explicit operator std::string_view() const& { return {data(), size()}; } |
93 | 6 | operator std::string() const { return std::string(data(), size()); } |
94 | 2 | std::string str() const { return *this; } |
95 | | |
96 | | const char* data() && = delete; |
97 | 6.60k | const char* data() const& { return isInline() ? prefix_ : value_.data; } |
98 | | |
99 | 7.30k | uint32_t size() const { return size_; } |
100 | 3 | bool empty() const { return size() == 0; } |
101 | | |
102 | 14 | void set_size(uint32_t size) { size_ = size; } |
103 | | |
104 | | bool operator==(const StringView& other) const; |
105 | 2 | friend std::ostream& operator<<(std::ostream& os, const StringView& stringView) { |
106 | 2 | os.write(stringView.data(), stringView.size()); |
107 | 2 | return os; |
108 | 2 | } |
109 | 8 | auto operator<=>(const StringView& other) const { |
110 | 8 | const auto cmp = compare(other); |
111 | 8 | return cmp < 0 ? std::strong_ordering::less |
112 | 8 | : cmp > 0 ? std::strong_ordering::greater |
113 | 4 | : std::strong_ordering::equal; |
114 | 8 | } |
115 | | |
116 | | // Returns 0, if this == other |
117 | | // < 0, if this < other |
118 | | // > 0, if this > other |
119 | | int32_t compare(const StringView& other) const; |
120 | | |
121 | | const char* begin() && = delete; |
122 | 2 | const char* begin() const& { return data(); } |
123 | | const char* end() && = delete; |
124 | 2 | const char* end() const& { return data() + size(); } |
125 | | |
126 | 18 | std::string dump_hex() const { |
127 | 18 | static const char* kHex = "0123456789ABCDEF"; |
128 | 18 | std::string out; |
129 | 18 | out.reserve(size_ * 2 + 2); |
130 | 18 | out.push_back('0'); |
131 | 18 | out.push_back('x'); |
132 | 18 | const char* ptr = data(); |
133 | 163 | for (uint32_t i = 0; i < size_; ++i) { |
134 | 145 | auto c = static_cast<unsigned char>(ptr[i]); |
135 | 145 | out.push_back(kHex[c >> 4]); |
136 | 145 | out.push_back(kHex[c & 0x0F]); |
137 | 145 | } |
138 | 18 | return out; |
139 | 18 | } |
140 | | |
141 | | private: |
142 | 16 | inline int64_t size_and_prefix_as_int64() const { |
143 | 16 | return reinterpret_cast<const int64_t*>(this)[0]; |
144 | 16 | } |
145 | | |
146 | 4 | inline int64_t inlined_as_int64() const { return reinterpret_cast<const int64_t*>(this)[1]; } |
147 | | |
148 | 2.47k | int32_t prefix_as_int() const { return *reinterpret_cast<const int32_t*>(&prefix_); } |
149 | | |
150 | | uint32_t size_; |
151 | | char prefix_[4]; |
152 | | union { |
153 | | char inlined[8]; |
154 | | const char* data; |
155 | | } value_; |
156 | | }; |
157 | | #include "common/compile_check_end.h" |
158 | | } // namespace doris |