be/src/storage/rowset/beta_rowset_reader.cpp

Source
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "storage/rowset/beta_rowset_reader.h"

#include <stddef.h>

#include <algorithm>
#include <memory>
#include <ostream>
#include <roaring/roaring.hh>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>

#include "common/logging.h"
#include "common/status.h"
#include "core/block/block.h"
#include "io/io_common.h"
#include "runtime/descriptors.h"
#include "runtime/runtime_profile.h"
#include "storage/cache/schema_cache.h"
#include "storage/delete/delete_handler.h"
#include "storage/iterator/vgeneric_iterators.h"
#include "storage/olap_define.h"
#include "storage/predicate/block_column_predicate.h"
#include "storage/predicate/column_predicate.h"
#include "storage/row_cursor.h"
#include "storage/rowset/rowset_meta.h"
#include "storage/rowset/rowset_reader_context.h"
#include "storage/schema.h"
#include "storage/segment/lazy_init_segment_iterator.h"
#include "storage/segment/segment.h"
#include "storage/tablet/tablet_meta.h"
#include "storage/tablet/tablet_schema.h"

namespace doris {
#include "common/compile_check_begin.h"
using namespace ErrorCode;

BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset)
        : _read_context(nullptr), _rowset(std::move(rowset)), _stats(&_owned_stats) {
    _rowset->acquire();
}

void BetaRowsetReader::reset_read_options() {
    _read_options.delete_condition_predicates = AndBlockColumnPredicate::create_shared();
    _read_options.column_predicates.clear();
    _read_options.col_id_to_predicates.clear();
    _read_options.del_predicates_for_zone_map.clear();
    _read_options.key_ranges.clear();
}

RowsetReaderSharedPtr BetaRowsetReader::clone() {
    return RowsetReaderSharedPtr(new BetaRowsetReader(_rowset));
}

void BetaRowsetReader::update_profile(RuntimeProfile* profile) {
    if (_iterator != nullptr) {
        _iterator->update_profile(profile);
    }
}

Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context,
                                               std::vector<RowwiseIteratorUPtr>* out_iters,
                                               bool use_cache) {
    _read_context = read_context;
    // The segment iterator is created with its own statistics,
    // and the member variable '_stats'  is initialized by '_stats(&owned_stats)'.
    // The choice of statistics used depends on the workload of the rowset reader.
    // For instance, if it's for query, the get_segment_iterators function
    // will receive one valid read_context with corresponding valid statistics,
    // and we will use those statistics.
    // However, for compaction or schema change workloads,
    // the read_context passed to the function will have null statistics,
    // and in such cases we will try to use the beta rowset reader's own statistics.
    if (_read_context->stats != nullptr) {
        _stats = _read_context->stats;
    }
    SCOPED_RAW_TIMER(&_stats->rowset_reader_get_segment_iterators_timer_ns);

    RETURN_IF_ERROR(_rowset->load());

    // convert RowsetReaderContext to StorageReadOptions
    _read_options.block_row_max = read_context->batch_size;
    _read_options.stats = _stats;
    _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt;
    _read_options.remaining_conjunct_roots = _read_context->remaining_conjunct_roots;
    _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down;
    _read_options.virtual_column_exprs = _read_context->virtual_column_exprs;

    _read_options.all_access_paths = _read_context->all_access_paths;
    _read_options.predicate_access_paths = _read_context->predicate_access_paths;

    _read_options.ann_topn_runtime = _read_context->ann_topn_runtime;
    _read_options.vir_cid_to_idx_in_block = _read_context->vir_cid_to_idx_in_block;
    _read_options.vir_col_idx_to_type = _read_context->vir_col_idx_to_type;
    _read_options.score_runtime = _read_context->score_runtime;
    _read_options.collection_statistics = _read_context->collection_statistics;
    _read_options.rowset_id = _rowset->rowset_id();
    _read_options.version = _rowset->version();
    _read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
    _read_options.topn_limit = _topn_limit;
    if (_read_context->lower_bound_keys != nullptr) {
        for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
            _read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
                                                  _read_context->is_lower_keys_included->at(i),
                                                  &_read_context->upper_bound_keys->at(i),
                                                  _read_context->is_upper_keys_included->at(i));
        }
    }

    // delete_hanlder is always set, but it maybe not init, so that it will return empty conditions
    // or predicates when it is not inited.
    if (_read_context->delete_handler != nullptr) {
        _read_context->delete_handler->get_delete_conditions_after_version(
                _rowset->end_version(), _read_options.delete_condition_predicates.get(),
                &_read_options.del_predicates_for_zone_map);
    }

    std::vector<uint32_t> read_columns;
    std::set<uint32_t> read_columns_set;
    std::set<uint32_t> delete_columns_set;
    for (int i = 0; i < _read_context->return_columns->size(); ++i) {
        read_columns.push_back(_read_context->return_columns->at(i));
        read_columns_set.insert(_read_context->return_columns->at(i));
    }
    _read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
    for (auto cid : delete_columns_set) {
        if (read_columns_set.find(cid) == read_columns_set.end()) {
            read_columns.push_back(cid);
        }
    }
    // disable condition cache if you have delete condition
    _read_context->condition_cache_digest =
            delete_columns_set.empty() ? _read_context->condition_cache_digest : 0;
    // create segment iterators
    VLOG_NOTICE << "read columns size: " << read_columns.size();
    _input_schema = std::make_shared<Schema>(_read_context->tablet_schema->columns(), read_columns);
    // output_schema only contains return_columns (excludes extra columns like delete-predicate columns).
    // It is used by merge/union iterators to determine how many columns to copy to the output block.
    _output_schema = std::make_shared<Schema>(_read_context->tablet_schema->columns(),
                                              *(_read_context->return_columns));
    if (_read_context->predicates != nullptr) {
        _read_options.column_predicates.insert(_read_options.column_predicates.end(),
                                               _read_context->predicates->begin(),
                                               _read_context->predicates->end());
        for (auto pred : *(_read_context->predicates)) {
            if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
                _read_options.col_id_to_predicates.insert(
                        {pred->column_id(), AndBlockColumnPredicate::create_shared()});
            }
            _read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
                    SingleColumnBlockPredicate::create_unique(pred));
        }
    }

    // Take a delete-bitmap for each segment, the bitmap contains all deletes
    // until the max read version, which is read_context->version.second
    if (_read_context->delete_bitmap != nullptr) {
        {
            SCOPED_RAW_TIMER(&_stats->delete_bitmap_get_agg_ns);
            RowsetId rowset_id = rowset()->rowset_id();
            for (uint32_t seg_id = 0; seg_id < rowset()->num_segments(); ++seg_id) {
                auto d = _read_context->delete_bitmap->get_agg(
                        {rowset_id, seg_id, _read_context->version.second});
                if (d->isEmpty()) {
                    continue; // Empty delete bitmap for the segment
                }
                VLOG_TRACE << "Get the delete bitmap for rowset: " << rowset_id.to_string()
                           << ", segment id:" << seg_id << ", size:" << d->cardinality();
                _read_options.delete_bitmap.emplace(seg_id, std::move(d));
            }
        }
    }

    if (_should_push_down_value_predicates()) {
        // sequence mapping currently only support merge on read, so can not push down value predicates
        if (_read_context->value_predicates != nullptr &&
            !read_context->tablet_schema->has_seq_map()) {
            _read_options.column_predicates.insert(_read_options.column_predicates.end(),
                                                   _read_context->value_predicates->begin(),
                                                   _read_context->value_predicates->end());
            for (auto pred : *(_read_context->value_predicates)) {
                if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
                    _read_options.col_id_to_predicates.insert(
                            {pred->column_id(), AndBlockColumnPredicate::create_shared()});
                }
                _read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
                        SingleColumnBlockPredicate::create_unique(pred));
            }
        }
    }
    _read_options.use_page_cache = _read_context->use_page_cache;
    _read_options.tablet_schema = _read_context->tablet_schema;
    _read_options.enable_unique_key_merge_on_write =
            _read_context->enable_unique_key_merge_on_write;
    _read_options.record_rowids = _read_context->record_rowids;
    _read_options.topn_filter_source_node_ids = _read_context->topn_filter_source_node_ids;
    _read_options.topn_filter_target_node_id = _read_context->topn_filter_target_node_id;
    _read_options.read_orderby_key_reverse = _read_context->read_orderby_key_reverse;
    _read_options.read_orderby_key_columns = _read_context->read_orderby_key_columns;
    _read_options.io_ctx.reader_type = _read_context->reader_type;
    _read_options.io_ctx.file_cache_stats = &_stats->file_cache_stats;
    _read_options.runtime_state = _read_context->runtime_state;
    _read_options.output_columns = _read_context->output_columns;
    _read_options.io_ctx.reader_type = _read_context->reader_type;
    _read_options.io_ctx.is_disposable = _read_context->reader_type != ReaderType::READER_QUERY;
    _read_options.target_cast_type_for_variants = _read_context->target_cast_type_for_variants;
    if (_read_context->runtime_state != nullptr) {
        _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id();
        _read_options.io_ctx.read_file_cache =
                _read_context->runtime_state->query_options().enable_file_cache;
        _read_options.io_ctx.is_disposable =
                _read_context->runtime_state->query_options().disable_file_cache;
    }

    if (_read_context->condition_cache_digest) {
        for (const auto& key_range : _read_options.key_ranges) {
            _read_context->condition_cache_digest =
                    key_range.get_digest(_read_context->condition_cache_digest);
        }
        _read_options.condition_cache_digest = _read_context->condition_cache_digest;
    }

    _read_options.io_ctx.expiration_time = read_context->ttl_seconds;

    bool enable_segment_cache = true;
    auto* state = read_context->runtime_state;
    if (state != nullptr) {
        enable_segment_cache = state->query_options().__isset.enable_segment_cache
                                       ? state->query_options().enable_segment_cache
                                       : true;
    }
    // When reader type is for query, session variable `enable_segment_cache` should be respected.
    bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY &&
                                          enable_segment_cache);

    auto segment_count = _rowset->num_segments();
    auto [seg_start, seg_end] = _segment_offsets;
    // If seg_start == seg_end, it means that the segments of a rowset is not
    // split scanned by multiple scanners, and the rowset reader is used to read the whole rowset.
    if (seg_start == seg_end) {
        seg_start = 0;
        seg_end = segment_count;
    }
    if (_read_context->record_rowids && _read_context->rowid_conversion) {
        // init segment rowid map for rowid conversion
        std::vector<uint32_t> segment_rows;
        RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows, should_use_cache, _stats));
        RETURN_IF_ERROR(_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(),
                                                                          segment_rows));
    }

    for (int64_t i = seg_start; i < seg_end; i++) {
        SCOPED_RAW_TIMER(&_stats->rowset_reader_create_iterators_timer_ns);
        std::unique_ptr<RowwiseIterator> iter;

        /// For iterators, we don't need to initialize them all at once when creating them.
        /// Instead, we should initialize each iterator separately when really using them.
        /// This optimization minimizes the lifecycle of resources like column readers
        /// and prevents excessive memory consumption, especially for wide tables.
        if (_segment_row_ranges.empty()) {
            _read_options.row_ranges.clear();
            iter = std::make_unique<LazyInitSegmentIterator>(_rowset, i, should_use_cache,
                                                             _input_schema, _read_options);
        } else {
            DCHECK_EQ(seg_end - seg_start, _segment_row_ranges.size());
            auto local_options = _read_options;
            local_options.row_ranges = _segment_row_ranges[i - seg_start];
            if (local_options.condition_cache_digest) {
                local_options.condition_cache_digest =
                        local_options.row_ranges.get_digest(local_options.condition_cache_digest);
            }
            iter = std::make_unique<LazyInitSegmentIterator>(_rowset, i, should_use_cache,
                                                             _input_schema, local_options);
        }

        if (iter->empty()) {
            continue;
        }
        out_iters->push_back(std::move(iter));
    }

    return Status::OK();
}

Status BetaRowsetReader::init(RowsetReaderContext* read_context, const RowSetSplits& rs_splits) {
    _read_context = read_context;
    _read_context->rowset_id = _rowset->rowset_id();
    _segment_offsets = rs_splits.segment_offsets;
    _segment_row_ranges = rs_splits.segment_row_ranges;
    return Status::OK();
}

Status BetaRowsetReader::_init_iterator_once() {
    return _init_iter_once.call([this] { return _init_iterator(); });
}

Status BetaRowsetReader::_init_iterator() {
    std::vector<RowwiseIteratorUPtr> iterators;
    RETURN_IF_ERROR(get_segment_iterators(_read_context, &iterators));

    SCOPED_RAW_TIMER(&_stats->rowset_reader_init_iterators_timer_ns);

    if (_read_context->merged_rows == nullptr) {
        _read_context->merged_rows = &_merged_rows;
    }
    // merge or union segment iterator
    if (is_merge_iterator()) {
        auto sequence_loc = -1;
        if (_read_context->sequence_id_idx != -1) {
            for (int loc = 0; loc < _read_context->return_columns->size(); loc++) {
                if (_read_context->return_columns->at(loc) == _read_context->sequence_id_idx) {
                    sequence_loc = loc;
                    break;
                }
            }
        }
        _iterator = new_merge_iterator(std::move(iterators), sequence_loc, _read_context->is_unique,
                                       _read_context->read_orderby_key_reverse,
                                       _read_context->merged_rows, _output_schema);
    } else {
        if (_read_context->read_orderby_key_reverse) {
            // reverse iterators to read backward for ORDER BY key DESC
            std::reverse(iterators.begin(), iterators.end());
        }
        _iterator = new_union_iterator(std::move(iterators), _output_schema);
    }

    auto s = _iterator->init(_read_options);
    if (!s.ok()) {
        LOG(WARNING) << "failed to init iterator: " << s.to_string();
        _iterator.reset();
        return Status::Error<ROWSET_READER_INIT>(s.to_string());
    }
    return Status::OK();
}

bool BetaRowsetReader::_should_push_down_value_predicates() const {
    // if unique table with rowset [0-x] or [0-1] [2-y] [...],
    // value column predicates can be pushdown on rowset [0-x] or [2-y], [2-y]
    // must be compaction, not overlapping and don't have sequence column
    return _rowset->keys_type() == UNIQUE_KEYS &&
           (((_rowset->start_version() == 0 || _rowset->start_version() == 2) &&
             !_rowset->_rowset_meta->is_segments_overlapping() &&
             _read_context->sequence_id_idx == -1) ||
            _read_context->enable_unique_key_merge_on_write);
}
#include "common/compile_check_end.h"
} // namespace doris

Coverage Report

Created: 2026-03-14 06:50

Line	Count	Source
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17
18		#include "storage/rowset/beta_rowset_reader.h"
19
20		#include <stddef.h>
21
22		#include <algorithm>
23		#include <memory>
24		#include <ostream>
25		#include <roaring/roaring.hh>
26		#include <set>
27		#include <string>
28		#include <unordered_map>
29		#include <utility>
30
31		#include "common/logging.h"
32		#include "common/status.h"
33		#include "core/block/block.h"
34		#include "io/io_common.h"
35		#include "runtime/descriptors.h"
36		#include "runtime/runtime_profile.h"
37		#include "storage/cache/schema_cache.h"
38		#include "storage/delete/delete_handler.h"
39		#include "storage/iterator/vgeneric_iterators.h"
40		#include "storage/olap_define.h"
41		#include "storage/predicate/block_column_predicate.h"
42		#include "storage/predicate/column_predicate.h"
43		#include "storage/row_cursor.h"
44		#include "storage/rowset/rowset_meta.h"
45		#include "storage/rowset/rowset_reader_context.h"
46		#include "storage/schema.h"
47		#include "storage/segment/lazy_init_segment_iterator.h"
48		#include "storage/segment/segment.h"
49		#include "storage/tablet/tablet_meta.h"
50		#include "storage/tablet/tablet_schema.h"
51
52		namespace doris {
53		#include "common/compile_check_begin.h"
54		using namespace ErrorCode;
55
56		BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset)
57	884	: _read_context(nullptr), _rowset(std::move(rowset)), _stats(&_owned_stats) {
58	884	_rowset->acquire();
59	884	}
60
61	939	void BetaRowsetReader::reset_read_options() {
62	939	_read_options.delete_condition_predicates = AndBlockColumnPredicate::create_shared();
63	939	_read_options.column_predicates.clear();
64	939	_read_options.col_id_to_predicates.clear();
65	939	_read_options.del_predicates_for_zone_map.clear();
66	939	_read_options.key_ranges.clear();
67	939	}
68
69	0	RowsetReaderSharedPtr BetaRowsetReader::clone() {
70	0	return RowsetReaderSharedPtr(new BetaRowsetReader(_rowset));
71	0	}
72
73	0	void BetaRowsetReader::update_profile(RuntimeProfile* profile) {
74	0	if (_iterator != nullptr) {
75	0	_iterator->update_profile(profile);
76	0	}
77	0	}
78
79		Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context,
80		std::vector<RowwiseIteratorUPtr>* out_iters,
81	1.18k	bool use_cache) {
82	1.18k	_read_context = read_context;
83		// The segment iterator is created with its own statistics,
84		// and the member variable '_stats' is initialized by '_stats(&owned_stats)'.
85		// The choice of statistics used depends on the workload of the rowset reader.
86		// For instance, if it's for query, the get_segment_iterators function
87		// will receive one valid read_context with corresponding valid statistics,
88		// and we will use those statistics.
89		// However, for compaction or schema change workloads,
90		// the read_context passed to the function will have null statistics,
91		// and in such cases we will try to use the beta rowset reader's own statistics.
92	1.18k	if (_read_context->stats != nullptr) {
93	1.09k	_stats = _read_context->stats;
94	1.09k	}
95	1.18k	SCOPED_RAW_TIMER(&_stats->rowset_reader_get_segment_iterators_timer_ns);
96
97	1.18k	RETURN_IF_ERROR(_rowset->load());
98
99		// convert RowsetReaderContext to StorageReadOptions
100	1.18k	_read_options.block_row_max = read_context->batch_size;
101	1.18k	_read_options.stats = _stats;
102	1.18k	_read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt;
103	1.18k	_read_options.remaining_conjunct_roots = _read_context->remaining_conjunct_roots;
104	1.18k	_read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down;
105	1.18k	_read_options.virtual_column_exprs = _read_context->virtual_column_exprs;
106
107	1.18k	_read_options.all_access_paths = _read_context->all_access_paths;
108	1.18k	_read_options.predicate_access_paths = _read_context->predicate_access_paths;
109
110	1.18k	_read_options.ann_topn_runtime = _read_context->ann_topn_runtime;
111	1.18k	_read_options.vir_cid_to_idx_in_block = _read_context->vir_cid_to_idx_in_block;
112	1.18k	_read_options.vir_col_idx_to_type = _read_context->vir_col_idx_to_type;
113	1.18k	_read_options.score_runtime = _read_context->score_runtime;
114	1.18k	_read_options.collection_statistics = _read_context->collection_statistics;
115	1.18k	_read_options.rowset_id = _rowset->rowset_id();
116	1.18k	_read_options.version = _rowset->version();
117	1.18k	_read_options.tablet_id = _rowset->rowset_meta()->tablet_id();
118	1.18k	_read_options.topn_limit = _topn_limit;
119	1.18k	if (_read_context->lower_bound_keys != nullptr) {
120	1.08k	for (int i = 0; i < _read_context->lower_bound_keys->size(); ++i) {
121	0	_read_options.key_ranges.emplace_back(&_read_context->lower_bound_keys->at(i),
122	0	_read_context->is_lower_keys_included->at(i),
123	0	&_read_context->upper_bound_keys->at(i),
124	0	_read_context->is_upper_keys_included->at(i));
125	0	}
126	1.08k	}
127
128		// delete_hanlder is always set, but it maybe not init, so that it will return empty conditions
129		// or predicates when it is not inited.
130	1.18k	if (_read_context->delete_handler != nullptr) {
131	1.08k	_read_context->delete_handler->get_delete_conditions_after_version(
132	1.08k	_rowset->end_version(), _read_options.delete_condition_predicates.get(),
133	1.08k	&_read_options.del_predicates_for_zone_map);
134	1.08k	}
135
136	1.18k	std::vector<uint32_t> read_columns;
137	1.18k	std::set<uint32_t> read_columns_set;
138	1.18k	std::set<uint32_t> delete_columns_set;
139	4.89k	for (int i = 0; i < _read_context->return_columns->size(); ++i) {
140	3.70k	read_columns.push_back(_read_context->return_columns->at(i));
141	3.70k	read_columns_set.insert(_read_context->return_columns->at(i));
142	3.70k	}
143	1.18k	_read_options.delete_condition_predicates->get_all_column_ids(delete_columns_set);
144	1.18k	for (auto cid : delete_columns_set) {
145	406	if (read_columns_set.find(cid) == read_columns_set.end()) {
146	254	read_columns.push_back(cid);
147	254	}
148	406	}
149		// disable condition cache if you have delete condition
150	1.18k	_read_context->condition_cache_digest =
151	1.18k	delete_columns_set.empty() ? _read_context->condition_cache_digest : 0;
152		// create segment iterators
153	1.18k	VLOG_NOTICE << "read columns size: " << read_columns.size();
154	1.18k	_input_schema = std::make_shared<Schema>(_read_context->tablet_schema->columns(), read_columns);
155		// output_schema only contains return_columns (excludes extra columns like delete-predicate columns).
156		// It is used by merge/union iterators to determine how many columns to copy to the output block.
157	1.18k	_output_schema = std::make_shared<Schema>(_read_context->tablet_schema->columns(),
158	1.18k	*(_read_context->return_columns));
159	1.18k	if (_read_context->predicates != nullptr) {
160	1.08k	_read_options.column_predicates.insert(_read_options.column_predicates.end(),
161	1.08k	_read_context->predicates->begin(),
162	1.08k	_read_context->predicates->end());
163	1.08k	for (auto pred : *(_read_context->predicates)) {
164	0	if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
165	0	_read_options.col_id_to_predicates.insert(
166	0	{pred->column_id(), AndBlockColumnPredicate::create_shared()});
167	0	}
168	0	_read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
169	0	SingleColumnBlockPredicate::create_unique(pred));
170	0	}
171	1.08k	}
172
173		// Take a delete-bitmap for each segment, the bitmap contains all deletes
174		// until the max read version, which is read_context->version.second
175	1.18k	if (_read_context->delete_bitmap != nullptr) {
176	5	{
177	5	SCOPED_RAW_TIMER(&_stats->delete_bitmap_get_agg_ns);
178	5	RowsetId rowset_id = rowset()->rowset_id();
179	39	for (uint32_t seg_id = 0; seg_id < rowset()->num_segments(); ++seg_id) {
180	34	auto d = _read_context->delete_bitmap->get_agg(
181	34	{rowset_id, seg_id, _read_context->version.second});
182	34	if (d->isEmpty()) {
183	11	continue; // Empty delete bitmap for the segment
184	11	}
185	23	VLOG_TRACE << "Get the delete bitmap for rowset: " << rowset_id.to_string()
186	0	<< ", segment id:" << seg_id << ", size:" << d->cardinality();
187	23	_read_options.delete_bitmap.emplace(seg_id, std::move(d));
188	23	}
189	5	}
190	5	}
191
192	1.18k	if (_should_push_down_value_predicates()) {
193		// sequence mapping currently only support merge on read, so can not push down value predicates
194	603	if (_read_context->value_predicates != nullptr &&
195	603	!read_context->tablet_schema->has_seq_map()) {
196	538	_read_options.column_predicates.insert(_read_options.column_predicates.end(),
197	538	_read_context->value_predicates->begin(),
198	538	_read_context->value_predicates->end());
199	538	for (auto pred : *(_read_context->value_predicates)) {
200	0	if (_read_options.col_id_to_predicates.count(pred->column_id()) < 1) {
201	0	_read_options.col_id_to_predicates.insert(
202	0	{pred->column_id(), AndBlockColumnPredicate::create_shared()});
203	0	}
204	0	_read_options.col_id_to_predicates[pred->column_id()]->add_column_predicate(
205	0	SingleColumnBlockPredicate::create_unique(pred));
206	0	}
207	538	}
208	603	}
209	1.18k	_read_options.use_page_cache = _read_context->use_page_cache;
210	1.18k	_read_options.tablet_schema = _read_context->tablet_schema;
211	1.18k	_read_options.enable_unique_key_merge_on_write =
212	1.18k	_read_context->enable_unique_key_merge_on_write;
213	1.18k	_read_options.record_rowids = _read_context->record_rowids;
214	1.18k	_read_options.topn_filter_source_node_ids = _read_context->topn_filter_source_node_ids;
215	1.18k	_read_options.topn_filter_target_node_id = _read_context->topn_filter_target_node_id;
216	1.18k	_read_options.read_orderby_key_reverse = _read_context->read_orderby_key_reverse;
217	1.18k	_read_options.read_orderby_key_columns = _read_context->read_orderby_key_columns;
218	1.18k	_read_options.io_ctx.reader_type = _read_context->reader_type;
219	1.18k	_read_options.io_ctx.file_cache_stats = &_stats->file_cache_stats;
220	1.18k	_read_options.runtime_state = _read_context->runtime_state;
221	1.18k	_read_options.output_columns = _read_context->output_columns;
222	1.18k	_read_options.io_ctx.reader_type = _read_context->reader_type;
223	1.18k	_read_options.io_ctx.is_disposable = _read_context->reader_type != ReaderType::READER_QUERY;
224	1.18k	_read_options.target_cast_type_for_variants = _read_context->target_cast_type_for_variants;
225	1.18k	if (_read_context->runtime_state != nullptr) {
226	0	_read_options.io_ctx.query_id = &_read_context->runtime_state->query_id();
227	0	_read_options.io_ctx.read_file_cache =
228	0	_read_context->runtime_state->query_options().enable_file_cache;
229	0	_read_options.io_ctx.is_disposable =
230	0	_read_context->runtime_state->query_options().disable_file_cache;
231	0	}
232
233	1.18k	if (_read_context->condition_cache_digest) {
234	0	for (const auto& key_range : _read_options.key_ranges) {
235	0	_read_context->condition_cache_digest =
236	0	key_range.get_digest(_read_context->condition_cache_digest);
237	0	}
238	0	_read_options.condition_cache_digest = _read_context->condition_cache_digest;
239	0	}
240
241	1.18k	_read_options.io_ctx.expiration_time = read_context->ttl_seconds;
242
243	1.18k	bool enable_segment_cache = true;
244	1.18k	auto* state = read_context->runtime_state;
245	1.18k	if (state != nullptr) {
246	0	enable_segment_cache = state->query_options().__isset.enable_segment_cache
247	0	? state->query_options().enable_segment_cache
248	0	: true;
249	0	}
250		// When reader type is for query, session variable `enable_segment_cache` should be respected.
251	1.18k	bool should_use_cache = use_cache \|\| (_read_context->reader_type == ReaderType::READER_QUERY &&
252	1.18k	enable_segment_cache);
253
254	1.18k	auto segment_count = _rowset->num_segments();
255	1.18k	auto [seg_start, seg_end] = _segment_offsets;
256		// If seg_start == seg_end, it means that the segments of a rowset is not
257		// split scanned by multiple scanners, and the rowset reader is used to read the whole rowset.
258	1.18k	if (seg_start == seg_end) {
259	1.18k	seg_start = 0;
260	1.18k	seg_end = segment_count;
261	1.18k	}
262	1.18k	if (_read_context->record_rowids && _read_context->rowid_conversion) {
263		// init segment rowid map for rowid conversion
264	394	std::vector<uint32_t> segment_rows;
265	394	RETURN_IF_ERROR(_rowset->get_segment_num_rows(&segment_rows, should_use_cache, _stats));
266	394	RETURN_IF_ERROR(_read_context->rowid_conversion->init_segment_map(rowset()->rowset_id(),
267	394	segment_rows));
268	394	}
269
270	6.77k	for (int64_t i = seg_start; i < seg_end; i++) {
271	5.59k	SCOPED_RAW_TIMER(&_stats->rowset_reader_create_iterators_timer_ns);
272	5.59k	std::unique_ptr<RowwiseIterator> iter;
273
274		/// For iterators, we don't need to initialize them all at once when creating them.
275		/// Instead, we should initialize each iterator separately when really using them.
276		/// This optimization minimizes the lifecycle of resources like column readers
277		/// and prevents excessive memory consumption, especially for wide tables.
278	5.59k	if (_segment_row_ranges.empty()) {
279	5.59k	_read_options.row_ranges.clear();
280	5.59k	iter = std::make_unique<LazyInitSegmentIterator>(_rowset, i, should_use_cache,
281	5.59k	_input_schema, _read_options);
282	5.59k	} else {
283	0	DCHECK_EQ(seg_end - seg_start, _segment_row_ranges.size());
284	0	auto local_options = _read_options;
285	0	local_options.row_ranges = _segment_row_ranges[i - seg_start];
286	0	if (local_options.condition_cache_digest) {
287	0	local_options.condition_cache_digest =
288	0	local_options.row_ranges.get_digest(local_options.condition_cache_digest);
289	0	}
290	0	iter = std::make_unique<LazyInitSegmentIterator>(_rowset, i, should_use_cache,
291	0	_input_schema, local_options);
292	0	}
293
294	5.59k	if (iter->empty()) {
295	0	continue;
296	0	}
297	5.59k	out_iters->push_back(std::move(iter));
298	5.59k	}
299
300	1.18k	return Status::OK();
301	1.18k	}
302
303	247	Status BetaRowsetReader::init(RowsetReaderContext* read_context, const RowSetSplits& rs_splits) {
304	247	_read_context = read_context;
305	247	_read_context->rowset_id = _rowset->rowset_id();
306	247	_segment_offsets = rs_splits.segment_offsets;
307	247	_segment_row_ranges = rs_splits.segment_row_ranges;
308	247	return Status::OK();
309	247	}
310
311	6.74k	Status BetaRowsetReader::_init_iterator_once() {
312	6.74k	return _init_iter_once.call([this] { return _init_iterator(); });
313	6.74k	}
314
315	247	Status BetaRowsetReader::_init_iterator() {
316	247	std::vector<RowwiseIteratorUPtr> iterators;
317	247	RETURN_IF_ERROR(get_segment_iterators(_read_context, &iterators));
318
319	247	SCOPED_RAW_TIMER(&_stats->rowset_reader_init_iterators_timer_ns);
320
321	247	if (_read_context->merged_rows == nullptr) {
322	103	_read_context->merged_rows = &_merged_rows;
323	103	}
324		// merge or union segment iterator
325	247	if (is_merge_iterator()) {
326	8	auto sequence_loc = -1;
327	8	if (_read_context->sequence_id_idx != -1) {
328	0	for (int loc = 0; loc < _read_context->return_columns->size(); loc++) {
329	0	if (_read_context->return_columns->at(loc) == _read_context->sequence_id_idx) {
330	0	sequence_loc = loc;
331	0	break;
332	0	}
333	0	}
334	0	}
335	8	_iterator = new_merge_iterator(std::move(iterators), sequence_loc, _read_context->is_unique,
336	8	_read_context->read_orderby_key_reverse,
337	8	_read_context->merged_rows, _output_schema);
338	239	} else {
339	239	if (_read_context->read_orderby_key_reverse) {
340		// reverse iterators to read backward for ORDER BY key DESC
341	0	std::reverse(iterators.begin(), iterators.end());
342	0	}
343	239	_iterator = new_union_iterator(std::move(iterators), _output_schema);
344	239	}
345
346	247	auto s = _iterator->init(_read_options);
347	247	if (!s.ok()) {
348	0	LOG(WARNING) << "failed to init iterator: " << s.to_string();
349	0	_iterator.reset();
350	0	return Status::Error<ROWSET_READER_INIT>(s.to_string());
351	0	}
352	247	return Status::OK();
353	247	}
354
355	1.18k	bool BetaRowsetReader::_should_push_down_value_predicates() const {
356		// if unique table with rowset [0-x] or [0-1] [2-y] [...],
357		// value column predicates can be pushdown on rowset [0-x] or [2-y], [2-y]
358		// must be compaction, not overlapping and don't have sequence column
359	1.18k	return _rowset->keys_type() == UNIQUE_KEYS &&
360	1.18k	(((_rowset->start_version() == 0 \|\| _rowset->start_version() == 2) &&
361	673	!_rowset->_rowset_meta->is_segments_overlapping() &&
362	673	_read_context->sequence_id_idx == -1) \|\|
363	673	_read_context->enable_unique_key_merge_on_write);
364	1.18k	}
365		#include "common/compile_check_end.h"
366		} // namespace doris