From d4919972b6349db7f994e945aa3927da2b40c4be Mon Sep 17 00:00:00 2001 From: Wojtek Date: Tue, 2 Jun 2026 10:47:50 +0200 Subject: [PATCH] bugfixes --- python_tests/test_atomic.py | 52 ++++++++++ .../collections/full_text/FT_BaseIndex.cpp | 6 +- .../collections/full_text/FT_BaseIndex.hpp | 6 +- .../full_text/FT_IndexIterator.hpp | 96 ++++++++++++------- .../range_tree/FT_BoundIterator.hpp | 5 +- 5 files changed, 123 insertions(+), 42 deletions(-) diff --git a/python_tests/test_atomic.py b/python_tests/test_atomic.py index b3e5bba6..aa12477a 100644 --- a/python_tests/test_atomic.py +++ b/python_tests/test_atomic.py @@ -1113,7 +1113,9 @@ def test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_ run_pytest_child( "python_tests/test_atomic.py::test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_rollback_child", env_flag="DB0_ATOMIC_LEAKED_FIND_ITERATOR_CHILD", + timeout=30, failure_label="atomic leaked find iterator child", + pytest_args=("-o", "faulthandler_timeout=10"), ) @@ -1122,6 +1124,24 @@ def test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_ reason="executed by test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_rollback", ) def test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_rollback_child(db0_no_autocommit): + def advance(iterator, count): + values = [] + for _ in range(count): + try: + values.append(next(iterator).value) + except StopIteration: + break + return values + + def assert_no_rolled_back_values(values, iteration): + assert not any( + isinstance(value, tuple) + and len(value) >= 2 + and value[0] == "rolled-back" + and value[1] == iteration + for value in values + ) + first = MemoTestClass("first") db0.commit() @@ -1143,6 +1163,38 @@ def test_leaked_find_iterator_advanced_inside_canceled_atomic_repositions_after_ with pytest.raises(StopIteration): next(iterator) + rng = random.Random(0xDB0F17D) + leaked_iterators = [iterator] + seeds = [MemoTestClass(("seed", index)) for index in range(32)] + db0.commit() + + iteration_count = int(os.environ.get("DB0_ATOMIC_LEAKED_FIND_ITERATOR_ITERATIONS", "100")) + for iteration in range(iteration_count): + iterator = iter(db0.find(MemoTestClass)) + advance(iterator, rng.randrange(1, len(seeds) + 5)) + + with db0.atomic() as atomic: + for offset in range(rng.randrange(1, 8)): + MemoTestClass(("rolled-back", iteration, offset)) + + values = advance(iterator, rng.randrange(1, len(seeds) + 8)) + assert_no_rolled_back_values(values, iteration) + + if rng.random() < 0.5: + list(db0.find(MemoTestClass)) + + atomic.cancel() + + values = advance(iterator, rng.randrange(1, 16)) + assert_no_rolled_back_values(values, iteration) + + leaked_iterators.append(iterator) + if len(leaked_iterators) > 64: + del leaked_iterators[:32] + + if iteration % 25 == 0: + gc.collect() + @pytest.mark.skip(reason=ATOMIC_LEAKED_ITERATOR_REPRO_SKIP) def test_leaked_iterator_advanced_inside_canceled_atomic_repositions_after_rollback(run_pytest_child): diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp index 85ded1ae..bb77830e 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.cpp @@ -56,7 +56,8 @@ namespace db0 } return std::unique_ptr >( new FT_IndexIterator( - *inverted_list_ptr, direction, key, std::move(index_key_sequence)) + *inverted_list_ptr, direction, key, std::move(index_key_sequence), + [this, key]() { return this->tryGetExistingInvertedList(key); }) ); } @@ -79,7 +80,8 @@ namespace db0 // key inverted index factory.add(std::unique_ptr >( new FT_IndexIterator( - *inverted_list_ptr, -1, key, std::move(index_key_sequence))) + *inverted_list_ptr, -1, key, std::move(index_key_sequence), + [this, key]() { return this->tryGetExistingInvertedList(key); })) ); return true; } diff --git a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp index 4dff7527..87eb6372 100644 --- a/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp +++ b/src/dbzero/core/collections/full_text/FT_BaseIndex.hpp @@ -77,9 +77,11 @@ namespace db0 // build query tree for (const auto &inverted_list: inverted_lists) { // key inverted index + auto key = inverted_list.first; factory.add(std::unique_ptr >( - new FT_IndexIterator( - *inverted_list.second, -1, inverted_list.first, {})) + new FT_IndexIterator( + *inverted_list.second, -1, key, {}, + [this, key]() { return this->tryGetExistingInvertedList(key); })) ); } return result; diff --git a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp index 9b2ef37b..73ce7f13 100644 --- a/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp +++ b/src/dbzero/core/collections/full_text/FT_IndexIterator.hpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace db0 @@ -27,6 +28,8 @@ namespace db0 using self_t = FT_IndexIterator; using super_t = FT_Iterator; using iterator = typename bindex_t::joinable_const_iterator; + using data_ptr = std::shared_ptr; + using data_resolver = std::function; // data is the live b-index view used to build the native iterator. // detach/reattach rebuilds only the native iterator state from this view, @@ -38,7 +41,7 @@ namespace db0 // tag iterators it stores the root-to-leaf tag key path so deserialize // can traverse from the root TagIndex instead of persisting child index addresses. FT_IndexIterator(const bindex_t &data, int direction, std::optional index_key = {}, - std::vector &&index_key_sequence = {}); + std::vector &&index_key_sequence = {}, data_resolver resolver = {}); FT_IndexIterator(bindex_t &&data, int direction, std::optional index_key = {}, std::vector &&index_key_sequence = {}); @@ -47,7 +50,8 @@ namespace db0 * Construct over already initialized simple iterator */ FT_IndexIterator(const bindex_t &data, int direction, const iterator &it, - std::optional index_key = {}, std::vector &&index_key_sequence = {}); + std::optional index_key = {}, std::vector &&index_key_sequence = {}, + data_resolver resolver = {}); virtual ~FT_IndexIterator() = default; @@ -93,8 +97,15 @@ namespace db0 void getSignature(std::vector &) const override; protected: - std::unique_ptr m_owned_data; - const bindex_t *m_data = nullptr; + // Current b-index view used by m_iterator. Resolver-backed iterators clear it + // on detach so reattachment cannot accidentally reuse a stale MorphingBIndex + // view. Temporary in-memory indexes own the moved b-index through this pointer. + data_ptr m_data; + // Optional reattachment hook. Persistent tag iterators use it to re-resolve + // the current inverted-list b-index after rollback/morphing may have changed + // the underlying MorphingBIndex address/type. Temporary indexes resolve to + // their owned m_data; plain borrowed indexes leave it empty. + data_resolver m_data_resolver; const int m_direction; // underlying native iterator (joinable_const_iterator) mutable iterator m_iterator; @@ -108,11 +119,14 @@ namespace db0 const std::vector m_index_key_sequence; FT_IndexIterator(std::uint64_t uid, const bindex_t &data, int direction, - std::optional index_key = {}, std::vector &&index_key_sequence = {}); + std::optional index_key = {}, std::vector &&index_key_sequence = {}, + data_resolver resolver = {}); FT_IndexIterator(std::uint64_t uid, bindex_t &&data, int direction, std::optional index_key = {}, std::vector &&index_key_sequence = {}); + static data_ptr borrowData(const bindex_t &data); + /** * Get valid iterator after detach * @return @@ -132,10 +146,12 @@ namespace db0 template FT_IndexIterator::FT_IndexIterator(const bindex_t &data, int direction, - std::optional index_key, std::vector &&index_key_sequence) - : m_data(&data) + std::optional index_key, std::vector &&index_key_sequence, + data_resolver resolver) + : m_data(borrowData(data)) + , m_data_resolver(std::move(resolver)) , m_direction(direction) - , m_iterator(m_data->beginJoin(direction)) + , m_iterator(data.beginJoin(direction)) , m_index_key(index_key) , m_index_key_sequence(std::move(index_key_sequence)) { @@ -144,8 +160,8 @@ namespace db0 template FT_IndexIterator::FT_IndexIterator(bindex_t &&data, int direction, std::optional index_key, std::vector &&index_key_sequence) - : m_owned_data(std::make_unique(std::move(data))) - , m_data(m_owned_data.get()) + : m_data(std::make_shared(std::move(data))) + , m_data_resolver([data = m_data]() { return data; }) , m_direction(direction) , m_iterator(m_data->beginJoin(direction)) , m_index_key(index_key) @@ -155,8 +171,10 @@ namespace db0 template FT_IndexIterator::FT_IndexIterator(const bindex_t &data, int direction, const iterator &it, - std::optional index_key, std::vector &&index_key_sequence) - : m_data(&data) + std::optional index_key, std::vector &&index_key_sequence, + data_resolver resolver) + : m_data(borrowData(data)) + , m_data_resolver(std::move(resolver)) , m_direction(direction) , m_iterator(it) , m_index_key(index_key) @@ -166,11 +184,13 @@ namespace db0 template FT_IndexIterator::FT_IndexIterator(std::uint64_t uid, const bindex_t &data, int direction, - std::optional index_key, std::vector &&index_key_sequence) + std::optional index_key, std::vector &&index_key_sequence, + data_resolver resolver) : FT_Iterator(uid) - , m_data(&data) + , m_data(borrowData(data)) + , m_data_resolver(std::move(resolver)) , m_direction(direction) - , m_iterator(m_data->beginJoin(direction)) + , m_iterator(data.beginJoin(direction)) , m_index_key(index_key) , m_index_key_sequence(std::move(index_key_sequence)) { @@ -180,8 +200,8 @@ namespace db0 FT_IndexIterator::FT_IndexIterator(std::uint64_t uid, bindex_t &&data, int direction, std::optional index_key, std::vector &&index_key_sequence) : FT_Iterator(uid) - , m_owned_data(std::make_unique(std::move(data))) - , m_data(m_owned_data.get()) + , m_data(std::make_shared(std::move(data))) + , m_data_resolver([data = m_data]() { return data; }) , m_direction(direction) , m_iterator(m_data->beginJoin(direction)) , m_index_key(index_key) @@ -248,15 +268,9 @@ namespace db0 template std::unique_ptr > FT_IndexIterator::beginTyped(int direction) const { - if (m_owned_data) { - return std::unique_ptr >( - new FT_IndexIterator(this->m_uid, bindex_t(*m_data), direction, this->m_index_key, - std::vector(this->m_index_key_sequence)) - ); - } return std::unique_ptr >( new FT_IndexIterator(this->m_uid, *m_data, direction, this->m_index_key, - std::vector(this->m_index_key_sequence)) + std::vector(this->m_index_key_sequence), this->m_data_resolver) ); } @@ -298,6 +312,9 @@ namespace db0 this->m_detach_key = *(this->m_iterator); } this->m_iterator.reset(); + if (this->m_data_resolver) { + this->m_data.reset(); + } this->m_is_detached = true; } } @@ -306,19 +323,23 @@ namespace db0 typename FT_IndexIterator::iterator &FT_IndexIterator::getIterator() { if (m_is_detached) { - try { - m_force_end = false; - m_iterator = m_data->beginJoin(m_direction); - if (m_detach_key) { - if (!m_iterator.join(*m_detach_key, m_direction)) { - m_iterator.stop(); - m_force_end = true; - } - } else { + m_force_end = false; + if (m_data_resolver) { + m_data = m_data_resolver(); + } + if (!m_data) { + m_force_end = true; + m_is_detached = false; + return m_iterator; + } + m_iterator = m_data->beginJoin(m_direction); + if (m_detach_key) { + if (!m_iterator.join(*m_detach_key, m_direction)) { m_iterator.stop(); m_force_end = true; } - } catch (...) { + } else { + m_iterator.stop(); m_force_end = true; } m_is_detached = false; @@ -326,6 +347,13 @@ namespace db0 return m_iterator; } + template + typename FT_IndexIterator::data_ptr + FT_IndexIterator::borrowData(const bindex_t &data) + { + return data_ptr(&data, [](const bindex_t *) {}); + } + template const typename FT_IndexIterator::iterator &FT_IndexIterator::getIterator() const { diff --git a/src/dbzero/core/collections/range_tree/FT_BoundIterator.hpp b/src/dbzero/core/collections/range_tree/FT_BoundIterator.hpp index 6f459c47..7d843ba3 100644 --- a/src/dbzero/core/collections/range_tree/FT_BoundIterator.hpp +++ b/src/dbzero/core/collections/range_tree/FT_BoundIterator.hpp @@ -132,10 +132,7 @@ namespace db0 template std::unique_ptr > FT_BoundIterator::beginTyped(int direction) const { - if (super_t::m_owned_data) { - return std::unique_ptr >(new self_t(IndexT(*super_t::m_data), direction, m_key_range)); - } - return std::unique_ptr >(new self_t(*super_t::m_data, direction, m_key_range)); + return std::unique_ptr >(new self_t(IndexT(*super_t::m_data), direction, m_key_range)); } }