/root/doris/be/src/gutil/strings/charset.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2008 Google Inc. All Rights Reserved. |
2 | | |
3 | | #pragma once |
4 | | |
5 | | #include "gutil/integral_types.h" |
6 | | |
7 | | namespace strings { |
8 | | |
9 | | // A CharSet is a simple map from (1-byte) characters to Booleans. It simply |
10 | | // exposes the mechanism of checking if a given character is in the set, fairly |
11 | | // efficiently. Useful for string tokenizing routines. |
12 | | // |
13 | | // Run on asherah (2 X 2400 MHz CPUs); 2008/11/10-13:18:03 |
14 | | // CPU: Intel Core2 (2 cores) dL1:32KB dL2:4096KB |
15 | | // ***WARNING*** CPU scaling is enabled, the benchmark timings may be noisy, |
16 | | // Benchmark Time(ns) CPU(ns) Iterations |
17 | | // ------------------------------------------------------- |
18 | | // BM_CharSetTesting/1K 21 21 32563138 |
19 | | // BM_CharSetTesting/4K 21 21 31968433 |
20 | | // BM_CharSetTesting/32K 21 21 32114953 |
21 | | // BM_CharSetTesting/256K 22 22 31679082 |
22 | | // BM_CharSetTesting/1M 21 21 32563138 |
23 | | // |
24 | | // This class is thread-compatible. |
25 | | // |
26 | | // This class has an implicit constructor. |
27 | | // Style guide exception granted: |
28 | | // http://goto/style-guide-exception-20978288 |
29 | | |
30 | | class CharSet { |
31 | | public: |
32 | | // Initialize a CharSet containing no characters or the given set of |
33 | | // characters, respectively. |
34 | | CharSet(); |
35 | | // Deliberately an implicit constructor, so anything that takes a CharSet |
36 | | // can also take an explicit list of characters. |
37 | | CharSet(const char* characters); // NOLINT(runtime/explicit) |
38 | | explicit CharSet(const CharSet& other); |
39 | | |
40 | | // Add or remove a character from the set. |
41 | 0 | void Add(unsigned char c) { bits_[Word(c)] |= BitMask(c); } |
42 | 0 | void Remove(unsigned char c) { bits_[Word(c)] &= ~BitMask(c); } |
43 | | |
44 | | // Return true if this character is in the set |
45 | 0 | bool Test(unsigned char c) const { return bits_[Word(c)] & BitMask(c); } |
46 | | |
47 | | private: |
48 | | // The numbers below are optimized for 64-bit hardware. TODO(user): In the |
49 | | // future, we should change this to use uword_t and do various bits of magic |
50 | | // to calculate the numbers at compile time. |
51 | | |
52 | | // In general, |
53 | | // static const int kNumWords = max(32 / sizeof(uword_t), 1); |
54 | | uint64 bits_[4]; |
55 | | |
56 | | // 4 words => the high 2 bits of c are the word number. In general, |
57 | | // kShiftValue = 8 - log2(kNumWords) |
58 | 0 | static int Word(unsigned char c) { return c >> 6; } |
59 | | |
60 | | // And the value we AND with c is ((1 << shift value) - 1) |
61 | | // static const int kLowBitsMask = (256 / kNumWords) - 1; |
62 | 0 | static uint64 BitMask(unsigned char c) { |
63 | 0 | uint64 mask = 1; |
64 | 0 | return mask << (c & 0x3f); |
65 | 0 | } |
66 | | }; |
67 | | |
68 | | } // namespace strings |