[opt](hash_join) Merging all build blocks at once will cause performance issue#37471
[opt](hash_join) Merging all build blocks at once will cause performance issue#37471mrhhsg merged 1 commit intoapache:masterfrom
Conversation
|
Thank you for your contribution to Apache Doris. Since 2024-03-18, the Document has been moved to doris-website. |
|
run buildall |
| @@ -100,30 +100,33 @@ size_t PartitionedHashJoinSinkLocalState::revocable_mem_size(RuntimeState* state | |||
| } | |||
|
|
|||
| Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeState* state) { | |||
There was a problem hiding this comment.
warning: function '_revoke_unpartitioned_block' has cognitive complexity of 52 (threshold 50) [readability-function-cognitive-complexity]
Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeState* state) {
^Additional context
be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:108: +1, including nesting penalty of 0, nesting level increased to 1
if (inner_sink_state_) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:113: +1, including nesting penalty of 0, nesting level increased to 1
if (build_block.rows() <= 1) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:119: +1, including nesting penalty of 0, nesting level increased to 1
if (build_block.columns() > num_slots) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:123: nesting level increased to 1
auto spill_func = [build_block = std::move(build_block), state, this]() mutable {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:130: nesting level increased to 2
[](std::vector<uint32_t>& indices) { indices.reserve(reserved_size); });
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:132: nesting level increased to 2
auto flush_rows = [&state, this](std::unique_ptr<vectorized::MutableBlock>& partition_block,
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:137: +3, including nesting penalty of 2, nesting level increased to 3
if (!status.ok()) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:149: +2, including nesting penalty of 1, nesting level increased to 2
while (offset < total_rows) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:153: +3, including nesting penalty of 2, nesting level increased to 3
for (size_t i = 0; i != build_block.columns(); ++i) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:167: +3, including nesting penalty of 2, nesting level increased to 3
for (size_t i = 0; i != sub_block.rows(); ++i) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:171: +3, including nesting penalty of 2, nesting level increased to 3
for (uint32_t partition_idx = 0; partition_idx != p._partition_count; ++partition_idx) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:177: +4, including nesting penalty of 3, nesting level increased to 4
if (UNLIKELY(!partition_block)) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:185: +4, including nesting penalty of 3, nesting level increased to 4
if (!st.ok()) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:195: +4, including nesting penalty of 3, nesting level increased to 4
if (partition_block->rows() >= reserved_size || is_last_block) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:195: +1
if (partition_block->rows() >= reserved_size || is_last_block) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:196: +5, including nesting penalty of 4, nesting level increased to 5
if (!flush_rows(partition_block, spilling_stream)) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:208: nesting level increased to 1
auto exception_catch_func = [spill_func, this]() mutable {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:209: nesting level increased to 2
auto status = [&]() {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:210: +3, including nesting penalty of 2, nesting level increased to 3
RETURN_IF_CATCH_EXCEPTION(spill_func());
^be/src/common/exception.h:89: expanded from macro 'RETURN_IF_CATCH_EXCEPTION'
do { \
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:210: +4, including nesting penalty of 3, nesting level increased to 4
RETURN_IF_CATCH_EXCEPTION(spill_func());
^be/src/common/exception.h:94: expanded from macro 'RETURN_IF_CATCH_EXCEPTION'
} catch (const doris::Exception& e) { \
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:210: +5, including nesting penalty of 4, nesting level increased to 5
RETURN_IF_CATCH_EXCEPTION(spill_func());
^be/src/common/exception.h:95: expanded from macro 'RETURN_IF_CATCH_EXCEPTION'
if (e.code() == doris::ErrorCode::MEM_ALLOC_FAILED) { \
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:214: +2, including nesting penalty of 1, nesting level increased to 2
if (!status.ok()) {
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:228: +1, including nesting penalty of 0, nesting level increased to 1
DBUG_EXECUTE_IF(
^be/src/util/debug_points.h:36: expanded from macro 'DBUG_EXECUTE_IF'
if (UNLIKELY(config::enable_debug_points)) { \
^be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:228: +2, including nesting penalty of 1, nesting level increased to 2
DBUG_EXECUTE_IF(
^be/src/util/debug_points.h:38: expanded from macro 'DBUG_EXECUTE_IF'
if (dp) { \
^
TPC-H: Total hot run time: 40248 ms |
TPC-DS: Total hot run time: 174236 ms |
ClickBench: Total hot run time: 30.31 s |
|
run buildall |
TPC-H: Total hot run time: 39759 ms |
TPC-DS: Total hot run time: 173918 ms |
ClickBench: Total hot run time: 29.82 s |
|
run buildall |
TPC-H: Total hot run time: 39702 ms |
TPC-DS: Total hot run time: 173814 ms |
ClickBench: Total hot run time: 30.66 s |
|
run buildall |
TPC-H: Total hot run time: 40510 ms |
TPC-DS: Total hot run time: 174220 ms |
ClickBench: Total hot run time: 30.4 s |
|
PR approved by at least one committer and no changes requested. |
|
PR approved by anyone and no changes requested. |
…nce issue (apache#37471) ## Proposed changes For better performance, blocks should be merged as soon as possible in the sink operator, allowing parallel execution with the upstream pipeline.
…nce issue (#37471) ## Proposed changes For better performance, blocks should be merged as soon as possible in the sink operator, allowing parallel execution with the upstream pipeline.
Proposed changes
For better performance, blocks should be merged as soon as possible in the sink operator, allowing parallel execution with the upstream pipeline.