-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[opt](partial update) Remove unnecessary lock and refactor some code for partial update #40062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -462,6 +462,70 @@ void SegmentWriter::_serialize_block_to_row_column(vectorized::Block& block) { | |
| << watch.elapsed_time() / 1000; | ||
| } | ||
|
|
||
| Status SegmentWriter::probe_key_for_mow( | ||
|
bobhan1 marked this conversation as resolved.
|
||
| std::string key, std::size_t segment_pos, bool have_input_seq_column, bool have_delete_sign, | ||
| PartialUpdateReadPlan& read_plan, const std::vector<RowsetSharedPtr>& specified_rowsets, | ||
| std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches, | ||
| bool& has_default_or_nullable, std::vector<bool>& use_default_or_null_flag, | ||
| PartialUpdateStats& stats) { | ||
| RowLocation loc; | ||
| // save rowset shared ptr so this rowset wouldn't delete | ||
| RowsetSharedPtr rowset; | ||
| auto st = _tablet->lookup_row_key(key, _tablet_schema.get(), have_input_seq_column, | ||
| specified_rowsets, &loc, _mow_context->max_version, | ||
| segment_caches, &rowset); | ||
| if (st.is<KEY_NOT_FOUND>()) { | ||
| if (_opts.rowset_ctx->partial_update_info->is_strict_mode) { | ||
| ++stats.num_rows_filtered; | ||
| // delete the invalid newly inserted row | ||
| _mow_context->delete_bitmap->add( | ||
| {_opts.rowset_ctx->rowset_id, _segment_id, DeleteBitmap::TEMP_VERSION_COMMON}, | ||
| segment_pos); | ||
| } else if (!have_delete_sign) { | ||
| RETURN_IF_ERROR( | ||
| _opts.rowset_ctx->partial_update_info->handle_non_strict_mode_not_found_error( | ||
| *_tablet_schema)); | ||
| } | ||
| ++stats.num_rows_new_added; | ||
| has_default_or_nullable = true; | ||
| use_default_or_null_flag.emplace_back(true); | ||
| return Status::OK(); | ||
| } | ||
| if (!st.ok() && !st.is<KEY_ALREADY_EXISTS>()) { | ||
| LOG(WARNING) << "failed to lookup row key, error: " << st; | ||
| return st; | ||
| } | ||
|
|
||
| // 1. if the delete sign is marked, it means that the value columns of the row will not | ||
| // be read. So we don't need to read the missing values from the previous rows. | ||
| // 2. the one exception is when there are sequence columns in the table, we need to read | ||
| // the sequence columns, otherwise it may cause the merge-on-read based compaction | ||
| // policy to produce incorrect results | ||
| if (have_delete_sign && !_tablet_schema->has_sequence_col()) { | ||
| has_default_or_nullable = true; | ||
| use_default_or_null_flag.emplace_back(true); | ||
| } else { | ||
| // partial update should not contain invisible columns | ||
| use_default_or_null_flag.emplace_back(false); | ||
| _rsid_to_rowset.emplace(rowset->rowset_id(), rowset); | ||
| read_plan.prepare_to_read(loc, segment_pos); | ||
| } | ||
|
|
||
| if (st.is<KEY_ALREADY_EXISTS>()) { | ||
| // although we need to mark delete current row, we still need to read missing columns | ||
| // for this row, we need to ensure that each column is aligned | ||
| _mow_context->delete_bitmap->add( | ||
| {_opts.rowset_ctx->rowset_id, _segment_id, DeleteBitmap::TEMP_VERSION_COMMON}, | ||
| segment_pos); | ||
| ++stats.num_rows_deleted; | ||
| } else { | ||
| _mow_context->delete_bitmap->add( | ||
| {loc.rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON}, loc.row_id); | ||
| ++stats.num_rows_updated; | ||
| } | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| // for partial update, we should do following steps to fill content of block: | ||
| // 1. set block data to data convertor, and get all key_column's converted slice | ||
| // 2. get pk of input block, and read missing columns | ||
|
|
@@ -482,6 +546,8 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* | |
| DCHECK(_is_mow()); | ||
|
|
||
| DCHECK(_opts.rowset_ctx->partial_update_info); | ||
| DCHECK(row_pos == 0); | ||
|
|
||
| // find missing column cids | ||
| const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; | ||
| const auto& including_cids = _opts.rowset_ctx->partial_update_info->update_cids; | ||
|
|
@@ -526,35 +592,13 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* | |
| const auto* delete_sign_column_data = | ||
| BaseTablet::get_delete_sign_column_data(full_block, row_pos + num_rows); | ||
|
|
||
| std::vector<RowsetSharedPtr> specified_rowsets; | ||
| { | ||
| std::shared_lock rlock(_tablet->get_header_lock()); | ||
| specified_rowsets = _mow_context->rowset_ptrs; | ||
| if (specified_rowsets.size() != _mow_context->rowset_ids.size()) { | ||
| // Only when this is a strict mode partial update that missing rowsets here will lead to problems. | ||
| // In other case, the missing rowsets will be calculated in later phases(commit phase/publish phase) | ||
| LOG(WARNING) << fmt::format( | ||
| "[Memtable Flush] some rowsets have been deleted due to " | ||
| "compaction(specified_rowsets.size()={}, but rowset_ids.size()={}) in " | ||
| "partial update. tablet_id: {}, cur max_version: {}, transaction_id: {}", | ||
| specified_rowsets.size(), _mow_context->rowset_ids.size(), _tablet->tablet_id(), | ||
| _mow_context->max_version, _mow_context->txn_id); | ||
| if (_opts.rowset_ctx->partial_update_info->is_strict_mode) { | ||
| return Status::InternalError<false>( | ||
| "[Memtable Flush] some rowsets have been deleted due to " | ||
| "compaction in strict mode partial update"); | ||
| } | ||
| } | ||
| } | ||
| const std::vector<RowsetSharedPtr>& specified_rowsets = _mow_context->rowset_ptrs; | ||
| std::vector<std::unique_ptr<SegmentCacheHandle>> segment_caches(specified_rowsets.size()); | ||
|
|
||
| PartialUpdateReadPlan read_plan; | ||
|
|
||
| // locate rows in base data | ||
| int64_t num_rows_updated = 0; | ||
| int64_t num_rows_new_added = 0; | ||
| int64_t num_rows_deleted = 0; | ||
| int64_t num_rows_filtered = 0; | ||
| PartialUpdateStats stats; | ||
|
|
||
| for (size_t block_pos = row_pos; block_pos < row_pos + num_rows; block_pos++) { | ||
| // block segment | ||
|
|
@@ -581,76 +625,10 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* | |
| bool have_delete_sign = | ||
| (delete_sign_column_data != nullptr && delete_sign_column_data[block_pos] != 0); | ||
|
|
||
| RowLocation loc; | ||
| // save rowset shared ptr so this rowset wouldn't delete | ||
| RowsetSharedPtr rowset; | ||
| auto st = _tablet->lookup_row_key(key, _tablet_schema.get(), have_input_seq_column, | ||
| specified_rowsets, &loc, _mow_context->max_version, | ||
| segment_caches, &rowset); | ||
| if (st.is<KEY_NOT_FOUND>()) { | ||
| if (_opts.rowset_ctx->partial_update_info->is_strict_mode) { | ||
| ++num_rows_filtered; | ||
| // delete the invalid newly inserted row | ||
| _mow_context->delete_bitmap->add({_opts.rowset_ctx->rowset_id, _segment_id, | ||
| DeleteBitmap::TEMP_VERSION_COMMON}, | ||
| segment_pos); | ||
|
|
||
| } else { | ||
| if (!_opts.rowset_ctx->partial_update_info->can_insert_new_rows_in_partial_update && | ||
| !have_delete_sign) { | ||
| std::string error_column; | ||
| for (auto cid : _opts.rowset_ctx->partial_update_info->missing_cids) { | ||
| const TabletColumn& col = _tablet_schema->column(cid); | ||
| if (!col.has_default_value() && !col.is_nullable() && | ||
| _tablet_schema->auto_increment_column() != col.name()) { | ||
| error_column = col.name(); | ||
| break; | ||
| } | ||
| } | ||
| return Status::Error<INVALID_SCHEMA, false>( | ||
| "the unmentioned column `{}` should have default value or be nullable " | ||
| "for " | ||
| "newly inserted rows in non-strict mode partial update", | ||
| error_column); | ||
| } | ||
| } | ||
| ++num_rows_new_added; | ||
| has_default_or_nullable = true; | ||
| use_default_or_null_flag.emplace_back(true); | ||
| continue; | ||
| } | ||
| if (!st.ok() && !st.is<KEY_ALREADY_EXISTS>()) { | ||
| LOG(WARNING) << "failed to lookup row key, error: " << st; | ||
| return st; | ||
| } | ||
|
|
||
| // 1. if the delete sign is marked, it means that the value columns of the row will not | ||
| // be read. So we don't need to read the missing values from the previous rows. | ||
| // 2. the one exception is when there is sequence column in the table, we need to read | ||
| // the sequence columns, otherwise it may cause the merge-on-read based compaction | ||
| // policy to produce incorrect results | ||
| if (have_delete_sign && !_tablet_schema->has_sequence_col()) { | ||
| has_default_or_nullable = true; | ||
| use_default_or_null_flag.emplace_back(true); | ||
| } else { | ||
| // partial update should not contain invisible columns | ||
| use_default_or_null_flag.emplace_back(false); | ||
| _rsid_to_rowset.emplace(rowset->rowset_id(), rowset); | ||
| read_plan.prepare_to_read(loc, segment_pos); | ||
| } | ||
|
|
||
| if (st.is<KEY_ALREADY_EXISTS>()) { | ||
| // although we need to mark delete current row, we still need to read missing columns | ||
| // for this row, we need to ensure that each column is aligned | ||
| _mow_context->delete_bitmap->add( | ||
| {_opts.rowset_ctx->rowset_id, _segment_id, DeleteBitmap::TEMP_VERSION_COMMON}, | ||
| segment_pos); | ||
| ++num_rows_deleted; | ||
| } else { | ||
| _mow_context->delete_bitmap->add( | ||
| {loc.rowset_id, loc.segment_id, DeleteBitmap::TEMP_VERSION_COMMON}, loc.row_id); | ||
| ++num_rows_updated; | ||
| } | ||
| RETURN_IF_ERROR(probe_key_for_mow(key, segment_pos, have_input_seq_column, have_delete_sign, | ||
| read_plan, specified_rowsets, segment_caches, | ||
| has_default_or_nullable, use_default_or_null_flag, | ||
| stats)); | ||
| } | ||
| CHECK_EQ(use_default_or_null_flag.size(), num_rows); | ||
|
|
||
|
|
@@ -684,29 +662,21 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* | |
| converted_result.second->get_data(), | ||
| num_rows)); | ||
| } | ||
| _num_rows_updated += num_rows_updated; | ||
| _num_rows_deleted += num_rows_deleted; | ||
| _num_rows_new_added += num_rows_new_added; | ||
| _num_rows_filtered += num_rows_filtered; | ||
| _num_rows_updated += stats.num_rows_updated; | ||
| _num_rows_deleted += stats.num_rows_deleted; | ||
| _num_rows_new_added += stats.num_rows_new_added; | ||
| _num_rows_filtered += stats.num_rows_filtered; | ||
| if (_tablet_schema->has_sequence_col() && !have_input_seq_column) { | ||
| DCHECK_NE(seq_column, nullptr); | ||
| DCHECK_EQ(_num_rows_written, row_pos) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why removed these DCHECKs?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zhannngchen These DCHECKs is checked explicitly below |
||
| << "_num_rows_written: " << _num_rows_written << ", row_pos" << row_pos; | ||
| DCHECK_EQ(_primary_key_index_builder->num_rows(), _num_rows_written) | ||
| << "primary key index builder num rows(" << _primary_key_index_builder->num_rows() | ||
| << ") not equal to segment writer's num rows written(" << _num_rows_written << ")"; | ||
| if (_num_rows_written != row_pos || | ||
| _primary_key_index_builder->num_rows() != _num_rows_written) { | ||
| return Status::InternalError( | ||
| "Correctness check failed, _num_rows_written: {}, row_pos: {}, primary key " | ||
| "index builder num rows: {}", | ||
| _num_rows_written, row_pos, _primary_key_index_builder->num_rows()); | ||
| } | ||
| for (size_t block_pos = row_pos; block_pos < row_pos + num_rows; block_pos++) { | ||
| std::string key = _full_encode_keys(key_columns, block_pos - row_pos); | ||
| _encode_seq_column(seq_column, block_pos - row_pos, &key); | ||
| RETURN_IF_ERROR(_primary_key_index_builder->add_item(key)); | ||
| } | ||
| RETURN_IF_ERROR( | ||
| _generate_primary_key_index(_key_coders, key_columns, seq_column, num_rows, false)); | ||
| } | ||
|
|
||
| _num_rows_written += num_rows; | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.