/root/doris/be/src/olap/olap_common.h

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <gen_cpp/Types_types.h>
#include <netinet/in.h>

#include <atomic>
#include <charconv>
#include <cstdint>
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <ostream>
#include <sstream>
#include <string>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include <utility>

#include "common/config.h"
#include "io/io_common.h"
#include "olap/olap_define.h"
#include "olap/rowset/rowset_fwd.h"
#include "util/hash_util.hpp"
#include "util/time.h"
#include "util/uid_util.h"

namespace doris {

static constexpr int64_t MAX_ROWSET_ID = 1L << 56;
static constexpr int64_t LOW_56_BITS = 0x00ffffffffffffff;

using SchemaHash = int32_t;
using int128_t = __int128;
using uint128_t = unsigned __int128;

using TabletUid = UniqueId;

enum CompactionType { BASE_COMPACTION = 1, CUMULATIVE_COMPACTION = 2, FULL_COMPACTION = 3 };

enum DataDirType {
    SPILL_DISK_DIR,
    OLAP_DATA_DIR,
    DATA_CACHE_DIR,
};

struct DataDirInfo {
    std::string path;
    size_t path_hash = 0;
    int64_t disk_capacity = 1; // actual disk capacity
    int64_t available = 0;     // available space, in bytes unit
    int64_t local_used_capacity = 0;
    int64_t remote_used_capacity = 0;
    int64_t trash_used_capacity = 0;
    bool is_used = false;                                      // whether available mark
    TStorageMedium::type storage_medium = TStorageMedium::HDD; // Storage medium type: SSD|HDD
    DataDirType data_dir_type = DataDirType::OLAP_DATA_DIR;
    std::string bvar_name;
};
struct PredicateFilterInfo {
    int type = 0;
    uint64_t input_row = 0;
    uint64_t filtered_row = 0;
};
// Sort DataDirInfo by available space.
struct DataDirInfoLessAvailability {
    bool operator()(const DataDirInfo& left, const DataDirInfo& right) const {
        return left.available < right.available;
    }
};

struct TabletInfo {
    TabletInfo(TTabletId in_tablet_id, UniqueId in_uid)
            : tablet_id(in_tablet_id), tablet_uid(in_uid) {}

    bool operator<(const TabletInfo& right) const {
        if (tablet_id != right.tablet_id) {
            return tablet_id < right.tablet_id;
        } else {
            return tablet_uid < right.tablet_uid;
        }
    }

    std::string to_string() const {
        std::stringstream ss;
        ss << tablet_id << "." << tablet_uid.to_string();
        return ss.str();
    }

    TTabletId tablet_id;
    UniqueId tablet_uid;
};

struct TabletSize {
    TabletSize(TTabletId in_tablet_id, size_t in_tablet_size)
            : tablet_id(in_tablet_id), tablet_size(in_tablet_size) {}

    TTabletId tablet_id;
    size_t tablet_size;
};

// Define all data types supported by Field.
// If new filed_type is defined, not only new TypeInfo may need be defined,
// but also some functions like get_type_info in types.cpp need to be changed.
enum class FieldType {
    OLAP_FIELD_TYPE_TINYINT = 1, // MYSQL_TYPE_TINY
    OLAP_FIELD_TYPE_UNSIGNED_TINYINT = 2,
    OLAP_FIELD_TYPE_SMALLINT = 3, // MYSQL_TYPE_SHORT
    OLAP_FIELD_TYPE_UNSIGNED_SMALLINT = 4,
    OLAP_FIELD_TYPE_INT = 5, // MYSQL_TYPE_LONG
    OLAP_FIELD_TYPE_UNSIGNED_INT = 6,
    OLAP_FIELD_TYPE_BIGINT = 7, // MYSQL_TYPE_LONGLONG
    OLAP_FIELD_TYPE_UNSIGNED_BIGINT = 8,
    OLAP_FIELD_TYPE_LARGEINT = 9,
    OLAP_FIELD_TYPE_FLOAT = 10,  // MYSQL_TYPE_FLOAT
    OLAP_FIELD_TYPE_DOUBLE = 11, // MYSQL_TYPE_DOUBLE
    OLAP_FIELD_TYPE_DISCRETE_DOUBLE = 12,
    OLAP_FIELD_TYPE_CHAR = 13,     // MYSQL_TYPE_STRING
    OLAP_FIELD_TYPE_DATE = 14,     // MySQL_TYPE_NEWDATE
    OLAP_FIELD_TYPE_DATETIME = 15, // MySQL_TYPE_DATETIME
    OLAP_FIELD_TYPE_DECIMAL = 16,  // DECIMAL, using different store format against MySQL
    OLAP_FIELD_TYPE_VARCHAR = 17,

    OLAP_FIELD_TYPE_STRUCT = 18,  // Struct
    OLAP_FIELD_TYPE_ARRAY = 19,   // ARRAY
    OLAP_FIELD_TYPE_MAP = 20,     // Map
    OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW OLAP_FIELD_TYPE_STRING
    OLAP_FIELD_TYPE_NONE = 22,
    OLAP_FIELD_TYPE_HLL = 23,
    OLAP_FIELD_TYPE_BOOL = 24,
    OLAP_FIELD_TYPE_OBJECT = 25,
    OLAP_FIELD_TYPE_STRING = 26,
    OLAP_FIELD_TYPE_QUANTILE_STATE = 27,
    OLAP_FIELD_TYPE_DATEV2 = 28,
    OLAP_FIELD_TYPE_DATETIMEV2 = 29,
    OLAP_FIELD_TYPE_TIMEV2 = 30,
    OLAP_FIELD_TYPE_DECIMAL32 = 31,
    OLAP_FIELD_TYPE_DECIMAL64 = 32,
    OLAP_FIELD_TYPE_DECIMAL128I = 33,
    OLAP_FIELD_TYPE_JSONB = 34,
    OLAP_FIELD_TYPE_VARIANT = 35,
    OLAP_FIELD_TYPE_AGG_STATE = 36,
    OLAP_FIELD_TYPE_DECIMAL256 = 37,
    OLAP_FIELD_TYPE_IPV4 = 38,
    OLAP_FIELD_TYPE_IPV6 = 39,
};

// Define all aggregation methods supported by Field
// Note that in practice, not all types can use all the following aggregation methods
// For example, it is meaningless to use SUM for the string type (but it will not cause the program to crash)
// The implementation of the Field class does not perform such checks, and should be constrained when creating the table
enum class FieldAggregationMethod {
    OLAP_FIELD_AGGREGATION_NONE = 0,
    OLAP_FIELD_AGGREGATION_SUM = 1,
    OLAP_FIELD_AGGREGATION_MIN = 2,
    OLAP_FIELD_AGGREGATION_MAX = 3,
    OLAP_FIELD_AGGREGATION_REPLACE = 4,
    OLAP_FIELD_AGGREGATION_HLL_UNION = 5,
    OLAP_FIELD_AGGREGATION_UNKNOWN = 6,
    OLAP_FIELD_AGGREGATION_BITMAP_UNION = 7,
    // Replace if and only if added value is not null
    OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL = 8,
    OLAP_FIELD_AGGREGATION_QUANTILE_UNION = 9,
    OLAP_FIELD_AGGREGATION_GENERIC = 10
};

enum class PushType {
    PUSH_NORMAL = 1,          // for broker/hadoop load, not used any more
    PUSH_FOR_DELETE = 2,      // for delete
    PUSH_FOR_LOAD_DELETE = 3, // not used any more
    PUSH_NORMAL_V2 = 4,       // for spark load
};

constexpr bool field_is_slice_type(const FieldType& field_type) {
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
}

constexpr bool field_is_numeric_type(const FieldType& field_type) {
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
}

// <start_version_id, end_version_id>, such as <100, 110>
//using Version = std::pair<TupleVersion, TupleVersion>;

struct Version {
    int64_t first;
    int64_t second;

    Version(int64_t first_, int64_t second_) : first(first_), second(second_) {}
    Version() : first(0), second(0) {}

    static Version mock() {
        // Every time SchemaChange is used for external rowing, some temporary versions (such as 999, 1000, 1001) will be written, in order to avoid Cache conflicts, temporary
        // The version number takes a BIG NUMBER plus the version number of the current SchemaChange
        return Version(1 << 28, 1 << 29);
    }

    friend std::ostream& operator<<(std::ostream& os, const Version& version);

    bool operator!=(const Version& rhs) const { return first != rhs.first || second != rhs.second; }

    bool operator==(const Version& rhs) const { return first == rhs.first && second == rhs.second; }

    bool contains(const Version& other) const {
        return first <= other.first && second >= other.second;
    }

    std::string to_string() const { return fmt::format("[{}-{}]", first, second); }
};

using Versions = std::vector<Version>;

inline std::ostream& operator<<(std::ostream& os, const Version& version) {
    return os << version.to_string();
}

inline std::ostream& operator<<(std::ostream& os, const Versions& versions) {
    for (auto& version : versions) {
        os << version;
    }
    return os;
}

// used for hash-struct of hash_map<Version, Rowset*>.
struct HashOfVersion {
    size_t operator()(const Version& version) const {
        size_t seed = 0;
        seed = HashUtil::hash64(&version.first, sizeof(version.first), seed);
        seed = HashUtil::hash64(&version.second, sizeof(version.second), seed);
        return seed;
    }
};

// It is used to represent Graph vertex.
struct Vertex {
    int64_t value = 0;
    std::list<int64_t> edges;

    Vertex(int64_t v) : value(v) {}
};

class Field;
class WrapperField;
using KeyRange = std::pair<WrapperField*, WrapperField*>;

// ReaderStatistics used to collect statistics when scan data from storage
struct OlapReaderStatistics {
    int64_t io_ns = 0;
    int64_t compressed_bytes_read = 0;

    int64_t decompress_ns = 0;
    int64_t uncompressed_bytes_read = 0;

    // total read bytes in memory
    int64_t bytes_read = 0;

    int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call
    int64_t block_load_ns = 0;
    int64_t blocks_load = 0;
    // Not used any more, will be removed after non-vectorized code is removed
    int64_t block_seek_num = 0;
    // Not used any more, will be removed after non-vectorized code is removed
    int64_t block_seek_ns = 0;

    // block_load_ns
    //      block_init_ns
    //          block_init_seek_ns
    //          generate_row_ranges_ns
    //      predicate_column_read_ns
    //          predicate_column_read_seek_ns
    //      lazy_read_ns
    //          block_lazy_read_seek_ns
    int64_t block_init_ns = 0;
    int64_t block_init_seek_num = 0;
    int64_t block_init_seek_ns = 0;
    int64_t predicate_column_read_ns = 0;
    int64_t non_predicate_read_ns = 0;
    int64_t predicate_column_read_seek_num = 0;
    int64_t predicate_column_read_seek_ns = 0;
    int64_t lazy_read_ns = 0;
    int64_t block_lazy_read_seek_num = 0;
    int64_t block_lazy_read_seek_ns = 0;

    int64_t raw_rows_read = 0;

    int64_t rows_vec_cond_filtered = 0;
    int64_t rows_short_circuit_cond_filtered = 0;
    int64_t vec_cond_input_rows = 0;
    int64_t short_circuit_cond_input_rows = 0;
    int64_t rows_vec_del_cond_filtered = 0;
    int64_t vec_cond_ns = 0;
    int64_t short_cond_ns = 0;
    int64_t expr_filter_ns = 0;
    int64_t output_col_ns = 0;

    std::map<int, PredicateFilterInfo> filter_info;

    int64_t rows_key_range_filtered = 0;
    int64_t rows_stats_filtered = 0;
    int64_t rows_stats_rp_filtered = 0;
    int64_t rows_bf_filtered = 0;
    int64_t rows_dict_filtered = 0;
    // Including the number of rows filtered out according to the Delete information in the Tablet,
    // and the number of rows filtered for marked deleted rows under the unique key model.
    // This metric is mainly used to record the number of rows filtered by the delete condition in Segment V1,
    // and it is also used to record the replaced rows in the Unique key model in the "Reader" class.
    // In segmentv2, if you want to get all filtered rows, you need the sum of "rows_del_filtered" and "rows_conditions_filtered".
    int64_t rows_del_filtered = 0;
    int64_t rows_del_by_bitmap = 0;
    // the number of rows filtered by various column indexes.
    int64_t rows_conditions_filtered = 0;
    int64_t generate_row_ranges_ns = 0;
    int64_t generate_row_ranges_by_bf_ns = 0;
    int64_t generate_row_ranges_by_zonemap_ns = 0;
    int64_t generate_row_ranges_by_dict_ns = 0;

    int64_t index_load_ns = 0;

    int64_t total_pages_num = 0;
    int64_t cached_pages_num = 0;

    int64_t rows_bitmap_index_filtered = 0;
    int64_t bitmap_index_filter_timer = 0;

    int64_t rows_inverted_index_filtered = 0;
    int64_t inverted_index_filter_timer = 0;
    int64_t inverted_index_query_timer = 0;
    int64_t inverted_index_query_cache_hit = 0;
    int64_t inverted_index_query_cache_miss = 0;
    int64_t inverted_index_query_null_bitmap_timer = 0;
    int64_t inverted_index_query_bitmap_copy_timer = 0;
    int64_t inverted_index_searcher_open_timer = 0;
    int64_t inverted_index_searcher_search_timer = 0;
    int64_t inverted_index_searcher_cache_hit = 0;
    int64_t inverted_index_searcher_cache_miss = 0;
    int64_t inverted_index_downgrade_count = 0;

    int64_t output_index_result_column_timer = 0;
    // number of segment filtered by column stat when creating seg iterator
    int64_t filtered_segment_number = 0;
    // total number of segment
    int64_t total_segment_number = 0;

    io::FileCacheStatistics file_cache_stats;
    int64_t load_segments_timer = 0;

    int64_t collect_iterator_merge_next_timer = 0;
    int64_t collect_iterator_normal_next_timer = 0;
    int64_t delete_bitmap_get_agg_ns = 0;
};

using ColumnId = uint32_t;
// Column unique id set
using UniqueIdSet = std::set<uint32_t>;
// Column unique Id -> column id map
using UniqueIdToColumnIdMap = std::map<ColumnId, ColumnId>;
struct RowsetId;
RowsetId next_rowset_id();

// 8 bit rowset id version
// 56 bit, inc number from 1
// 128 bit backend uid, it is a uuid bit, id version
struct RowsetId {
    int8_t version = 0;
    int64_t hi = 0;
    int64_t mi = 0;
    int64_t lo = 0;

    void init(std::string_view rowset_id_str) {
        // for new rowsetid its a 48 hex string
        // if the len < 48, then it is an old format rowset id
        if (rowset_id_str.length() < 48) [[unlikely]] {
            int64_t high;
            auto [_, ec] = std::from_chars(rowset_id_str.data(),
                                           rowset_id_str.data() + rowset_id_str.length(), high);
            if (ec != std::errc {}) [[unlikely]] {
                if (config::force_regenerate_rowsetid_on_start_error) {
                    LOG(WARNING) << "failed to init rowset id: " << rowset_id_str;
                    high = next_rowset_id().hi;
                } else {
                    LOG(FATAL) << "failed to init rowset id: " << rowset_id_str;
                }
            }
            init(1, high, 0, 0);
        } else {
            int64_t high = 0;
            int64_t middle = 0;
            int64_t low = 0;
            from_hex(&high, rowset_id_str.substr(0, 16));
            from_hex(&middle, rowset_id_str.substr(16, 16));
            from_hex(&low, rowset_id_str.substr(32, 16));
            init(high >> 56, high & LOW_56_BITS, middle, low);
        }
    }

    // to compatible with old version
    void init(int64_t rowset_id) { init(1, rowset_id, 0, 0); }

    void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
        version = id_version;
        if (UNLIKELY(high >= MAX_ROWSET_ID)) {
            LOG(FATAL) << "inc rowsetid is too large:" << high;
        }
        hi = (id_version << 56) + (high & LOW_56_BITS);
        mi = middle;
        lo = low;
    }

    std::string to_string() const {
        if (version < 2) {
            return std::to_string(hi & LOW_56_BITS);
        } else {
            char buf[48];
            to_hex(hi, buf);
            to_hex(mi, buf + 16);
            to_hex(lo, buf + 32);
            return {buf, 48};
        }
    }

    // std::unordered_map need this api
    bool operator==(const RowsetId& rhs) const {
        return hi == rhs.hi && mi == rhs.mi && lo == rhs.lo;
    }

    bool operator!=(const RowsetId& rhs) const {
        return hi != rhs.hi || mi != rhs.mi || lo != rhs.lo;
    }

    bool operator<(const RowsetId& rhs) const {
        if (hi != rhs.hi) {
            return hi < rhs.hi;
        } else if (mi != rhs.mi) {
            return mi < rhs.mi;
        } else {
            return lo < rhs.lo;
        }
    }

    friend std::ostream& operator<<(std::ostream& out, const RowsetId& rowset_id) {
        out << rowset_id.to_string();
        return out;
    }
};

using RowsetIdUnorderedSet = std::unordered_set<RowsetId>;

// Extract rowset id from filename, return uninitialized rowset id if filename is invalid
inline RowsetId extract_rowset_id(std::string_view filename) {
    RowsetId rowset_id;
    if (filename.ends_with(".dat")) {
        // filename format: {rowset_id}_{segment_num}.dat
        auto end = filename.find('_');
        if (end == std::string::npos) {
            return rowset_id;
        }
        rowset_id.init(filename.substr(0, end));
        return rowset_id;
    }
    if (filename.ends_with(".idx")) {
        // filename format: {rowset_id}_{segment_num}_{index_id}.idx
        auto end = filename.find('_');
        if (end == std::string::npos) {
            return rowset_id;
        }
        rowset_id.init(filename.substr(0, end));
        return rowset_id;
    }
    return rowset_id;
}

class DeleteBitmap;
// merge on write context
struct MowContext {
    MowContext(int64_t version, int64_t txnid, const RowsetIdUnorderedSet& ids,
               std::vector<RowsetSharedPtr> rowset_ptrs, std::shared_ptr<DeleteBitmap> db)
            : max_version(version),
              txn_id(txnid),
              rowset_ids(ids),
              rowset_ptrs(std::move(rowset_ptrs)),
              delete_bitmap(std::move(db)) {}
    int64_t max_version;
    int64_t txn_id;
    const RowsetIdUnorderedSet& rowset_ids;
    std::vector<RowsetSharedPtr> rowset_ptrs;
    std::shared_ptr<DeleteBitmap> delete_bitmap;
};

// used for controll compaction
struct VersionWithTime {
    std::atomic<int64_t> version;
    int64_t update_ts;

    VersionWithTime() : version(0), update_ts(MonotonicMillis()) {}

    void update_version_monoto(int64_t new_version) {
        int64_t cur_version = version.load(std::memory_order_relaxed);
        while (cur_version < new_version) {
            if (version.compare_exchange_strong(cur_version, new_version, std::memory_order_relaxed,
                                                std::memory_order_relaxed)) {
                update_ts = MonotonicMillis();
                break;
            }
        }
    }
};

} // namespace doris

// This intended to be a "good" hash function.  It may change from time to time.
template <>
struct std::hash<doris::RowsetId> {
    size_t operator()(const doris::RowsetId& rowset_id) const {
        size_t seed = 0;
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi, sizeof(rowset_id.hi),
                                                 seed);
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi, sizeof(rowset_id.mi),
                                                 seed);
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo, sizeof(rowset_id.lo),
                                                 seed);
        return seed;
    }
};

Coverage Report

Created: 2024-11-22 12:31

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#pragma once
19
20		#include <gen_cpp/Types_types.h>
21		#include <netinet/in.h>
22
23		#include <atomic>
24		#include <charconv>
25		#include <cstdint>
26		#include <functional>
27		#include <list>
28		#include <map>
29		#include <memory>
30		#include <ostream>
31		#include <sstream>
32		#include <string>
33		#include <typeinfo>
34		#include <unordered_map>
35		#include <unordered_set>
36		#include <utility>
37
38		#include "common/config.h"
39		#include "io/io_common.h"
40		#include "olap/olap_define.h"
41		#include "olap/rowset/rowset_fwd.h"
42		#include "util/hash_util.hpp"
43		#include "util/time.h"
44		#include "util/uid_util.h"
45
46		namespace doris {
47
48		static constexpr int64_t MAX_ROWSET_ID = 1L << 56;
49		static constexpr int64_t LOW_56_BITS = 0x00ffffffffffffff;
50
51		using SchemaHash = int32_t;
52		using int128_t = __int128;
53		using uint128_t = unsigned __int128;
54
55		using TabletUid = UniqueId;
56
57		enum CompactionType { BASE_COMPACTION = 1, CUMULATIVE_COMPACTION = 2, FULL_COMPACTION = 3 };
58
59		enum DataDirType {
60		SPILL_DISK_DIR,
61		OLAP_DATA_DIR,
62		DATA_CACHE_DIR,
63		};
64
65		struct DataDirInfo {
66		std::string path;
67		size_t path_hash = 0;
68		int64_t disk_capacity = 1; // actual disk capacity
69		int64_t available = 0; // available space, in bytes unit
70		int64_t local_used_capacity = 0;
71		int64_t remote_used_capacity = 0;
72		int64_t trash_used_capacity = 0;
73		bool is_used = false; // whether available mark
74		TStorageMedium::type storage_medium = TStorageMedium::HDD; // Storage medium type: SSD\|HDD
75		DataDirType data_dir_type = DataDirType::OLAP_DATA_DIR;
76		std::string bvar_name;
77		};
78		struct PredicateFilterInfo {
79		int type = 0;
80		uint64_t input_row = 0;
81		uint64_t filtered_row = 0;
82		};
83		// Sort DataDirInfo by available space.
84		struct DataDirInfoLessAvailability {
85	7	bool operator()(const DataDirInfo& left, const DataDirInfo& right) const {
86	7	return left.available < right.available;
87	7	}
88		};
89
90		struct TabletInfo {
91		TabletInfo(TTabletId in_tablet_id, UniqueId in_uid)
92		: tablet_id(in_tablet_id), tablet_uid(in_uid) {}
93
94		bool operator<(const TabletInfo& right) const {
95		if (tablet_id != right.tablet_id) {
96		return tablet_id < right.tablet_id;
97		} else {
98		return tablet_uid < right.tablet_uid;
99		}
100		}
101
102		std::string to_string() const {
103		std::stringstream ss;
104		ss << tablet_id << "." << tablet_uid.to_string();
105		return ss.str();
106		}
107
108		TTabletId tablet_id;
109		UniqueId tablet_uid;
110		};
111
112		struct TabletSize {
113		TabletSize(TTabletId in_tablet_id, size_t in_tablet_size)
114	0	: tablet_id(in_tablet_id), tablet_size(in_tablet_size) {}
115
116		TTabletId tablet_id;
117		size_t tablet_size;
118		};
119
120		// Define all data types supported by Field.
121		// If new filed_type is defined, not only new TypeInfo may need be defined,
122		// but also some functions like get_type_info in types.cpp need to be changed.
123		enum class FieldType {
124		OLAP_FIELD_TYPE_TINYINT = 1, // MYSQL_TYPE_TINY
125		OLAP_FIELD_TYPE_UNSIGNED_TINYINT = 2,
126		OLAP_FIELD_TYPE_SMALLINT = 3, // MYSQL_TYPE_SHORT
127		OLAP_FIELD_TYPE_UNSIGNED_SMALLINT = 4,
128		OLAP_FIELD_TYPE_INT = 5, // MYSQL_TYPE_LONG
129		OLAP_FIELD_TYPE_UNSIGNED_INT = 6,
130		OLAP_FIELD_TYPE_BIGINT = 7, // MYSQL_TYPE_LONGLONG
131		OLAP_FIELD_TYPE_UNSIGNED_BIGINT = 8,
132		OLAP_FIELD_TYPE_LARGEINT = 9,
133		OLAP_FIELD_TYPE_FLOAT = 10, // MYSQL_TYPE_FLOAT
134		OLAP_FIELD_TYPE_DOUBLE = 11, // MYSQL_TYPE_DOUBLE
135		OLAP_FIELD_TYPE_DISCRETE_DOUBLE = 12,
136		OLAP_FIELD_TYPE_CHAR = 13, // MYSQL_TYPE_STRING
137		OLAP_FIELD_TYPE_DATE = 14, // MySQL_TYPE_NEWDATE
138		OLAP_FIELD_TYPE_DATETIME = 15, // MySQL_TYPE_DATETIME
139		OLAP_FIELD_TYPE_DECIMAL = 16, // DECIMAL, using different store format against MySQL
140		OLAP_FIELD_TYPE_VARCHAR = 17,
141
142		OLAP_FIELD_TYPE_STRUCT = 18, // Struct
143		OLAP_FIELD_TYPE_ARRAY = 19, // ARRAY
144		OLAP_FIELD_TYPE_MAP = 20, // Map
145		OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW OLAP_FIELD_TYPE_STRING
146		OLAP_FIELD_TYPE_NONE = 22,
147		OLAP_FIELD_TYPE_HLL = 23,
148		OLAP_FIELD_TYPE_BOOL = 24,
149		OLAP_FIELD_TYPE_OBJECT = 25,
150		OLAP_FIELD_TYPE_STRING = 26,
151		OLAP_FIELD_TYPE_QUANTILE_STATE = 27,
152		OLAP_FIELD_TYPE_DATEV2 = 28,
153		OLAP_FIELD_TYPE_DATETIMEV2 = 29,
154		OLAP_FIELD_TYPE_TIMEV2 = 30,
155		OLAP_FIELD_TYPE_DECIMAL32 = 31,
156		OLAP_FIELD_TYPE_DECIMAL64 = 32,
157		OLAP_FIELD_TYPE_DECIMAL128I = 33,
158		OLAP_FIELD_TYPE_JSONB = 34,
159		OLAP_FIELD_TYPE_VARIANT = 35,
160		OLAP_FIELD_TYPE_AGG_STATE = 36,
161		OLAP_FIELD_TYPE_DECIMAL256 = 37,
162		OLAP_FIELD_TYPE_IPV4 = 38,
163		OLAP_FIELD_TYPE_IPV6 = 39,
164		};
165
166		// Define all aggregation methods supported by Field
167		// Note that in practice, not all types can use all the following aggregation methods
168		// For example, it is meaningless to use SUM for the string type (but it will not cause the program to crash)
169		// The implementation of the Field class does not perform such checks, and should be constrained when creating the table
170		enum class FieldAggregationMethod {
171		OLAP_FIELD_AGGREGATION_NONE = 0,
172		OLAP_FIELD_AGGREGATION_SUM = 1,
173		OLAP_FIELD_AGGREGATION_MIN = 2,
174		OLAP_FIELD_AGGREGATION_MAX = 3,
175		OLAP_FIELD_AGGREGATION_REPLACE = 4,
176		OLAP_FIELD_AGGREGATION_HLL_UNION = 5,
177		OLAP_FIELD_AGGREGATION_UNKNOWN = 6,
178		OLAP_FIELD_AGGREGATION_BITMAP_UNION = 7,
179		// Replace if and only if added value is not null
180		OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL = 8,
181		OLAP_FIELD_AGGREGATION_QUANTILE_UNION = 9,
182		OLAP_FIELD_AGGREGATION_GENERIC = 10
183		};
184
185		enum class PushType {
186		PUSH_NORMAL = 1, // for broker/hadoop load, not used any more
187		PUSH_FOR_DELETE = 2, // for delete
188		PUSH_FOR_LOAD_DELETE = 3, // not used any more
189		PUSH_NORMAL_V2 = 4, // for spark load
190		};
191
192		constexpr bool field_is_slice_type(const FieldType& field_type) {
193		return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR \|\|
194		field_type == FieldType::OLAP_FIELD_TYPE_CHAR \|\|
195		field_type == FieldType::OLAP_FIELD_TYPE_STRING;
196		}
197
198	0	constexpr bool field_is_numeric_type(const FieldType& field_type) {
199	0	return field_type == FieldType::OLAP_FIELD_TYPE_INT \|\|
200	0	field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT \|\|
201	0	field_type == FieldType::OLAP_FIELD_TYPE_BIGINT \|\|
202	0	field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT \|\|
203	0	field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT \|\|
204	0	field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT \|\|
205	0	field_type == FieldType::OLAP_FIELD_TYPE_TINYINT \|\|
206	0	field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE \|\|
207	0	field_type == FieldType::OLAP_FIELD_TYPE_FLOAT \|\|
208	0	field_type == FieldType::OLAP_FIELD_TYPE_DATE \|\|
209	0	field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 \|\|
210	0	field_type == FieldType::OLAP_FIELD_TYPE_DATETIME \|\|
211	0	field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 \|\|
212	0	field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT \|\|
213	0	field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL \|\|
214	0	field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 \|\|
215	0	field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 \|\|
216	0	field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I \|\|
217	0	field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 \|\|
218	0	field_type == FieldType::OLAP_FIELD_TYPE_BOOL \|\|
219	0	field_type == FieldType::OLAP_FIELD_TYPE_IPV4 \|\|
220	0	field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
221	0	}
222
223		// <start_version_id, end_version_id>, such as <100, 110>
224		//using Version = std::pair<TupleVersion, TupleVersion>;
225
226		struct Version {
227		int64_t first;
228		int64_t second;
229
230	1.06M	Version(int64_t first_, int64_t second_) : first(first_), second(second_) {}
231	6.23k	Version() : first(0), second(0) {}
232
233		static Version mock() {
234		// Every time SchemaChange is used for external rowing, some temporary versions (such as 999, 1000, 1001) will be written, in order to avoid Cache conflicts, temporary
235		// The version number takes a BIG NUMBER plus the version number of the current SchemaChange
236		return Version(1 << 28, 1 << 29);
237		}
238
239		friend std::ostream& operator<<(std::ostream& os, const Version& version);
240
241		bool operator!=(const Version& rhs) const { return first != rhs.first \|\| second != rhs.second; }
242
243	4.62k	bool operator==(const Version& rhs) const { return first == rhs.first && second == rhs.second; }
244
245		bool contains(const Version& other) const {
246		return first <= other.first && second >= other.second;
247		}
248
249	481	std::string to_string() const { return fmt::format("[{}-{}]", first, second); }
250		};
251
252		using Versions = std::vector<Version>;
253
254	9	inline std::ostream& operator<<(std::ostream& os, const Version& version) {
255	9	return os << version.to_string();
256	9	}
257
258		inline std::ostream& operator<<(std::ostream& os, const Versions& versions) {
259		for (auto& version : versions) {
260		os << version;
261		}
262		return os;
263		}
264
265		// used for hash-struct of hash_map<Version, Rowset*>.
266		struct HashOfVersion {
267	1.13k	size_t operator()(const Version& version) const {
268	1.13k	size_t seed = 0;
269	1.13k	seed = HashUtil::hash64(&version.first, sizeof(version.first), seed);
270	1.13k	seed = HashUtil::hash64(&version.second, sizeof(version.second), seed);
271	1.13k	return seed;
272	1.13k	}
273		};
274
275		// It is used to represent Graph vertex.
276		struct Vertex {
277		int64_t value = 0;
278		std::list<int64_t> edges;
279
280		Vertex(int64_t v) : value(v) {}
281		};
282
283		class Field;
284		class WrapperField;
285		using KeyRange = std::pair<WrapperField, WrapperField>;
286
287		// ReaderStatistics used to collect statistics when scan data from storage
288		struct OlapReaderStatistics {
289		int64_t io_ns = 0;
290		int64_t compressed_bytes_read = 0;
291
292		int64_t decompress_ns = 0;
293		int64_t uncompressed_bytes_read = 0;
294
295		// total read bytes in memory
296		int64_t bytes_read = 0;
297
298		int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call
299		int64_t block_load_ns = 0;
300		int64_t blocks_load = 0;
301		// Not used any more, will be removed after non-vectorized code is removed
302		int64_t block_seek_num = 0;
303		// Not used any more, will be removed after non-vectorized code is removed
304		int64_t block_seek_ns = 0;
305
306		// block_load_ns
307		// block_init_ns
308		// block_init_seek_ns
309		// generate_row_ranges_ns
310		// predicate_column_read_ns
311		// predicate_column_read_seek_ns
312		// lazy_read_ns
313		// block_lazy_read_seek_ns
314		int64_t block_init_ns = 0;
315		int64_t block_init_seek_num = 0;
316		int64_t block_init_seek_ns = 0;
317		int64_t predicate_column_read_ns = 0;
318		int64_t non_predicate_read_ns = 0;
319		int64_t predicate_column_read_seek_num = 0;
320		int64_t predicate_column_read_seek_ns = 0;
321		int64_t lazy_read_ns = 0;
322		int64_t block_lazy_read_seek_num = 0;
323		int64_t block_lazy_read_seek_ns = 0;
324
325		int64_t raw_rows_read = 0;
326
327		int64_t rows_vec_cond_filtered = 0;
328		int64_t rows_short_circuit_cond_filtered = 0;
329		int64_t vec_cond_input_rows = 0;
330		int64_t short_circuit_cond_input_rows = 0;
331		int64_t rows_vec_del_cond_filtered = 0;
332		int64_t vec_cond_ns = 0;
333		int64_t short_cond_ns = 0;
334		int64_t expr_filter_ns = 0;
335		int64_t output_col_ns = 0;
336
337		std::map<int, PredicateFilterInfo> filter_info;
338
339		int64_t rows_key_range_filtered = 0;
340		int64_t rows_stats_filtered = 0;
341		int64_t rows_stats_rp_filtered = 0;
342		int64_t rows_bf_filtered = 0;
343		int64_t rows_dict_filtered = 0;
344		// Including the number of rows filtered out according to the Delete information in the Tablet,
345		// and the number of rows filtered for marked deleted rows under the unique key model.
346		// This metric is mainly used to record the number of rows filtered by the delete condition in Segment V1,
347		// and it is also used to record the replaced rows in the Unique key model in the "Reader" class.
348		// In segmentv2, if you want to get all filtered rows, you need the sum of "rows_del_filtered" and "rows_conditions_filtered".
349		int64_t rows_del_filtered = 0;
350		int64_t rows_del_by_bitmap = 0;
351		// the number of rows filtered by various column indexes.
352		int64_t rows_conditions_filtered = 0;
353		int64_t generate_row_ranges_ns = 0;
354		int64_t generate_row_ranges_by_bf_ns = 0;
355		int64_t generate_row_ranges_by_zonemap_ns = 0;
356		int64_t generate_row_ranges_by_dict_ns = 0;
357
358		int64_t index_load_ns = 0;
359
360		int64_t total_pages_num = 0;
361		int64_t cached_pages_num = 0;
362
363		int64_t rows_bitmap_index_filtered = 0;
364		int64_t bitmap_index_filter_timer = 0;
365
366		int64_t rows_inverted_index_filtered = 0;
367		int64_t inverted_index_filter_timer = 0;
368		int64_t inverted_index_query_timer = 0;
369		int64_t inverted_index_query_cache_hit = 0;
370		int64_t inverted_index_query_cache_miss = 0;
371		int64_t inverted_index_query_null_bitmap_timer = 0;
372		int64_t inverted_index_query_bitmap_copy_timer = 0;
373		int64_t inverted_index_searcher_open_timer = 0;
374		int64_t inverted_index_searcher_search_timer = 0;
375		int64_t inverted_index_searcher_cache_hit = 0;
376		int64_t inverted_index_searcher_cache_miss = 0;
377		int64_t inverted_index_downgrade_count = 0;
378
379		int64_t output_index_result_column_timer = 0;
380		// number of segment filtered by column stat when creating seg iterator
381		int64_t filtered_segment_number = 0;
382		// total number of segment
383		int64_t total_segment_number = 0;
384
385		io::FileCacheStatistics file_cache_stats;
386		int64_t load_segments_timer = 0;
387
388		int64_t collect_iterator_merge_next_timer = 0;
389		int64_t collect_iterator_normal_next_timer = 0;
390		int64_t delete_bitmap_get_agg_ns = 0;
391		};
392
393		using ColumnId = uint32_t;
394		// Column unique id set
395		using UniqueIdSet = std::set<uint32_t>;
396		// Column unique Id -> column id map
397		using UniqueIdToColumnIdMap = std::map<ColumnId, ColumnId>;
398		struct RowsetId;
399		RowsetId next_rowset_id();
400
401		// 8 bit rowset id version
402		// 56 bit, inc number from 1
403		// 128 bit backend uid, it is a uuid bit, id version
404		struct RowsetId {
405		int8_t version = 0;
406		int64_t hi = 0;
407		int64_t mi = 0;
408		int64_t lo = 0;
409
410	896	void init(std::string_view rowset_id_str) {
411		// for new rowsetid its a 48 hex string
412		// if the len < 48, then it is an old format rowset id
413	896	if (rowset_id_str.length() < 48) [[unlikely]] {
414	65	int64_t high;
415	65	auto [_, ec] = std::from_chars(rowset_id_str.data(),
416	65	rowset_id_str.data() + rowset_id_str.length(), high);
417	65	if (ec != std::errc {}) [[unlikely]] {
418	0	if (config::force_regenerate_rowsetid_on_start_error) {
419	0	LOG(WARNING) << "failed to init rowset id: " << rowset_id_str;
420	0	high = next_rowset_id().hi;
421	0	} else {
422	0	LOG(FATAL) << "failed to init rowset id: " << rowset_id_str;
423	0	}
424	0	}
425	65	init(1, high, 0, 0);
426	831	} else {
427	831	int64_t high = 0;
428	831	int64_t middle = 0;
429	831	int64_t low = 0;
430	831	from_hex(&high, rowset_id_str.substr(0, 16));
431	831	from_hex(&middle, rowset_id_str.substr(16, 16));
432	831	from_hex(&low, rowset_id_str.substr(32, 16));
433	831	init(high >> 56, high & LOW_56_BITS, middle, low);
434	831	}
435	896	}
436
437		// to compatible with old version
438	1.01k	void init(int64_t rowset_id) { init(1, rowset_id, 0, 0); }
439
440	7.99k	void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
441	7.99k	version = id_version;
442	7.99k	if (UNLIKELY(high >= MAX_ROWSET_ID)) {
443	0	LOG(FATAL) << "inc rowsetid is too large:" << high;
444	0	}
445	7.99k	hi = (id_version << 56) + (high & LOW_56_BITS);
446	7.99k	mi = middle;
447	7.99k	lo = low;
448	7.99k	}
449
450	29.4k	std::string to_string() const {
451	29.4k	if (version < 2) {
452	20.5k	return std::to_string(hi & LOW_56_BITS);
453	20.5k	} else {
454	8.88k	char buf[48];
455	8.88k	to_hex(hi, buf);
456	8.88k	to_hex(mi, buf + 16);
457	8.88k	to_hex(lo, buf + 32);
458	8.88k	return {buf, 48};
459	8.88k	}
460	29.4k	}
461
462		// std::unordered_map need this api
463	4.14k	bool operator==(const RowsetId& rhs) const {
464	4.14k	return hi == rhs.hi && mi == rhs.mi && lo == rhs.lo;
465	4.14k	}
466
467		bool operator!=(const RowsetId& rhs) const {
468		return hi != rhs.hi \|\| mi != rhs.mi \|\| lo != rhs.lo;
469		}
470
471	44.6M	bool operator<(const RowsetId& rhs) const {
472	44.6M	if (hi != rhs.hi) {
473	4.54M	return hi < rhs.hi;
474	40.1M	} else if (mi != rhs.mi) {
475	0	return mi < rhs.mi;
476	40.1M	} else {
477	40.1M	return lo < rhs.lo;
478	40.1M	}
479	44.6M	}
480
481	848	friend std::ostream& operator<<(std::ostream& out, const RowsetId& rowset_id) {
482	848	out << rowset_id.to_string();
483	848	return out;
484	848	}
485		};
486
487		using RowsetIdUnorderedSet = std::unordered_set<RowsetId>;
488
489		// Extract rowset id from filename, return uninitialized rowset id if filename is invalid
490	0	inline RowsetId extract_rowset_id(std::string_view filename) {
491	0	RowsetId rowset_id;
492	0	if (filename.ends_with(".dat")) {
493	0	// filename format: {rowset_id}_{segment_num}.dat
494	0	auto end = filename.find('_');
495	0	if (end == std::string::npos) {
496	0	return rowset_id;
497	0	}
498	0	rowset_id.init(filename.substr(0, end));
499	0	return rowset_id;
500	0	}
501	0	if (filename.ends_with(".idx")) {
502	0	// filename format: {rowset_id}_{segment_num}_{index_id}.idx
503	0	auto end = filename.find('_');
504	0	if (end == std::string::npos) {
505	0	return rowset_id;
506	0	}
507	0	rowset_id.init(filename.substr(0, end));
508	0	return rowset_id;
509	0	}
510	0	return rowset_id;
511	0	}
512
513		class DeleteBitmap;
514		// merge on write context
515		struct MowContext {
516		MowContext(int64_t version, int64_t txnid, const RowsetIdUnorderedSet& ids,
517		std::vector<RowsetSharedPtr> rowset_ptrs, std::shared_ptr<DeleteBitmap> db)
518		: max_version(version),
519		txn_id(txnid),
520		rowset_ids(ids),
521		rowset_ptrs(std::move(rowset_ptrs)),
522	8	delete_bitmap(std::move(db)) {}
523		int64_t max_version;
524		int64_t txn_id;
525		const RowsetIdUnorderedSet& rowset_ids;
526		std::vector<RowsetSharedPtr> rowset_ptrs;
527		std::shared_ptr<DeleteBitmap> delete_bitmap;
528		};
529
530		// used for controll compaction
531		struct VersionWithTime {
532		std::atomic<int64_t> version;
533		int64_t update_ts;
534
535		VersionWithTime() : version(0), update_ts(MonotonicMillis()) {}
536
537		void update_version_monoto(int64_t new_version) {
538		int64_t cur_version = version.load(std::memory_order_relaxed);
539		while (cur_version < new_version) {
540		if (version.compare_exchange_strong(cur_version, new_version, std::memory_order_relaxed,
541		std::memory_order_relaxed)) {
542		update_ts = MonotonicMillis();
543		break;
544		}
545		}
546		}
547		};
548
549		} // namespace doris
550
551		// This intended to be a "good" hash function. It may change from time to time.
552		template <>
553		struct std::hash<doris::RowsetId> {
554	1.47k	size_t operator()(const doris::RowsetId& rowset_id) const {
555	1.47k	size_t seed = 0;
556	1.47k	seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi, sizeof(rowset_id.hi),
557	1.47k	seed);
558	1.47k	seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi, sizeof(rowset_id.mi),
559	1.47k	seed);
560	1.47k	seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo, sizeof(rowset_id.lo),
561	1.47k	seed);
562	1.47k	return seed;
563	1.47k	}
564		};