Coverage Report

Created: 2026-03-12 14:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/json/path_in_data.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/PathInData.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/segment_v2.pb.h>
24
#include <stddef.h>
25
26
#include <algorithm>
27
#include <memory>
28
#include <string>
29
#include <string_view>
30
#include <vector>
31
32
#include "core/uint128.h"
33
34
namespace doris {
35
36
/// Class that represents path in document, e.g. JSON.
37
class PathInData;
38
using PathInDataPtr = std::shared_ptr<PathInData>;
39
40
class PathInData {
41
public:
42
    struct Part {
43
1.68M
        Part() = default;
44
        Part(std::string_view key_, bool is_nested_, UInt8 anonymous_array_level_)
45
88.0M
                : key(key_), is_nested(is_nested_), anonymous_array_level(anonymous_array_level_) {}
46
18.3M
        bool operator==(const Part& other) const {
47
18.3M
            return this->key == other.key && this->is_nested == other.is_nested &&
48
18.3M
                   this->anonymous_array_level == other.anonymous_array_level;
49
18.3M
        }
50
        /// Name of part of path.
51
        std::string_view key;
52
        /// If this part is Nested, i.e. element
53
        /// related to this key is the array of objects.
54
        bool is_nested = false;
55
        /// Number of array levels between current key and previous key.
56
        /// E.g. in JSON {"k1": [[[{"k2": 1, "k3": 2}]]]}
57
        /// "k1" is nested and has anonymous_array_level = 0.
58
        /// "k2" and "k3" are not nested and have anonymous_array_level = 2.
59
        UInt8 anonymous_array_level = 0;
60
61
        /// Get the total array depth for this part.
62
        /// Used for NestedGroup offset indexing.
63
        /// If is_nested (array<object>), the depth is anonymous_array_level + 1
64
        /// Otherwise it's just the anonymous_array_level.
65
0
        UInt8 get_array_depth() const {
66
0
            return is_nested ? (anonymous_array_level + 1) : anonymous_array_level;
67
0
        }
68
    };
69
    using Parts = std::vector<Part>;
70
46.2M
    PathInData() = default;
71
    explicit PathInData(std::string_view path_, bool is_typed_ = false);
72
    explicit PathInData(const Parts& parts_);
73
    explicit PathInData(std::string_view path_, const Parts& parts_, bool is_typed_ = false);
74
    explicit PathInData(const std::vector<std::string>& paths);
75
    explicit PathInData(const std::string& root, const std::vector<std::string>& paths);
76
    PathInData(const PathInData& other);
77
    PathInData& operator=(const PathInData& other);
78
    static UInt128 get_parts_hash(const Parts& parts_, bool is_typed_ = false);
79
18.9M
    bool empty() const { return parts.empty(); }
80
115M
    const String& get_path() const { return path; }
81
    // if path is v.a.b, then relative path will return a.b
82
    // make sure the parts is not empty
83
0
    std::string_view get_relative_path() const {
84
0
        if (parts.size() <= 1) {
85
0
            // return empty string
86
0
            return {};
87
0
        }
88
0
        return {path.begin() + parts[0].key.size() + 1, path.end()};
89
0
    }
90
126M
    const Parts& get_parts() const { return parts; }
91
0
    bool is_nested(size_t i) const { return parts[i].is_nested; }
92
44.0M
    bool has_nested_part() const { return has_nested; }
93
25.4M
    bool operator==(const PathInData& other) const {
94
25.4M
        return parts == other.parts && is_typed == other.is_typed;
95
25.4M
    }
96
106
    bool operator!=(const PathInData& other) const { return !(*this == other); }
97
    PathInData get_nested_prefix_path() const;
98
    struct Hash {
99
        size_t operator()(const PathInData& value) const;
100
    };
101
    std::string to_jsonpath() const;
102
    PathInData copy_pop_front() const;
103
    PathInData copy_pop_nfront(size_t n) const;
104
    PathInData copy_pop_back() const;
105
    static bool try_strip_prefix(const std::string& name, const std::string& prefix_dot,
106
                                 std::string* out);
107
    static PathInData append(const PathInData& base, std::string_view suffix);
108
    void to_protobuf(segment_v2::ColumnPathInfo* pb, int32_t parent_col_unique_id) const;
109
    void from_protobuf(const segment_v2::ColumnPathInfo& pb);
110
111
785k
    bool get_is_typed() const { return is_typed; }
112
113
    bool need_record_stats() const;
114
115
25.9M
    bool operator<(const PathInData& rhs) const {
116
25.9M
        return std::lexicographical_compare(
117
25.9M
                parts.begin(), parts.end(), rhs.parts.begin(), rhs.parts.end(),
118
90.6M
                [](const auto& a, const auto& b) { return a.key < b.key; });
119
25.9M
    }
120
121
private:
122
    /// Creates full path from parts.
123
    void build_path(const Parts& other_parts);
124
    /// Creates new parts full from full path with correct string pointers.
125
    void build_parts(const Parts& other_parts);
126
    /// The full path. Parts are separated by dots.
127
    String path;
128
    /// Parts of the path. All string_view-s in parts must point to the @path.
129
    Parts parts;
130
    /// True if at least one part is nested.
131
    /// Cached to avoid linear complexity at 'has_nested'.
132
    bool has_nested = false;
133
134
    /// True if the path is typed, e.g. a.b: int
135
    bool is_typed = false;
136
};
137
138
class PathInDataBuilder {
139
public:
140
20.2M
    const PathInData::Parts& get_parts() const { return parts; }
141
    PathInDataBuilder& append(std::string_view key, bool is_array);
142
    PathInDataBuilder& append(const PathInData::Parts& path, bool is_array);
143
    PathInDataBuilder& append(const std::vector<std::string>& parts);
144
    void pop_back();
145
    void pop_back(size_t n);
146
12.6k
    PathInData build() { return PathInData(parts); }
147
148
private:
149
    PathInData::Parts parts;
150
    /// Number of array levels without key to which
151
    /// next non-empty key will be nested.
152
    /// Example: for JSON { "k1": [[{"k2": 1, "k3": 2}] }
153
    // `k2` and `k3` has anonymous_array_level = 1 in that case.
154
    size_t current_anonymous_array_level = 0;
155
};
156
using PathsInData = std::vector<PathInData>;
157
158
struct PathInDataRef {
159
    const PathInData* ref;
160
    struct Hash {
161
47.7k
        size_t operator()(const PathInDataRef& value) const {
162
47.7k
            return PathInData::Hash {}(*value.ref);
163
47.7k
        }
164
    };
165
47.7k
    PathInDataRef(const PathInData* ptr) : ref(ptr) {}
166
0
    PathInDataRef() : ref(nullptr) {}
167
9.41k
    bool operator==(const PathInDataRef& other) const {
168
9.41k
        return (this->ref != nullptr && other.ref != nullptr && *this->ref == *other.ref) ||
169
9.41k
               (this->ref == nullptr && other.ref == nullptr);
170
9.41k
    }
171
};
172
173
} // namespace doris
174
175
template <>
176
struct std::hash<doris::PathInData> {
177
55.2M
    size_t operator()(const doris::PathInData& value) const {
178
55.2M
        return doris::PathInData::Hash {}(value);
179
55.2M
    }
180
};