From a4fde629624478ce159101089c15ced16b36eb56 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 7 Jun 2026 00:26:53 +0200 Subject: [PATCH 1/4] Remove PHASE_MERGE_THROWS --- src/coreclr/jit/compiler.cpp | 4 - src/coreclr/jit/compiler.h | 2 - src/coreclr/jit/compmemkind.h | 1 - src/coreclr/jit/compphases.h | 1 - src/coreclr/jit/fgehopt.cpp | 241 ---------------------------------- src/coreclr/jit/fgopt.cpp | 31 ++++- 6 files changed, 25 insertions(+), 255 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a786daa966f13b..f78d4d33e60457 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4518,10 +4518,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return fgHeadTailMerge(true); }); - // Merge common throw blocks - // - DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); - // Run an early flow graph simplification pass // DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b25ce125243fa1..23f0851ef28e95 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5955,8 +5955,6 @@ class Compiler void fgUpdateACDsBeforeEHTableEntryRemoval(unsigned XTnum); - PhaseStatus fgTailMergeThrows(); - bool fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, BasicBlock* handler, BlockToBlockMap& continuationMap); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index b6cc1a7cfc251b..8996a1fe31210d 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -62,7 +62,6 @@ CompMemKindMacro(SideEffects) CompMemKindMacro(ObjectAllocator) CompMemKindMacro(VariableLiveRanges) CompMemKindMacro(ClassLayout) -CompMemKindMacro(TailMergeThrows) CompMemKindMacro(EarlyProp) CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index d49a6cafbb063d..a600209aa390f9 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -65,7 +65,6 @@ CompPhaseNameMacro(PHASE_COMPUTE_BLOCK_WEIGHTS, "Compute block weights", CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", false, -1, false) CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE, "Head and tail merge", false, -1, false) CompPhaseNameMacro(PHASE_EARLY_QMARK_EXPANSION, "Early QMARK expansion", false, -1, false) -CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", false, -1, false) CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false) CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE2, "Post-morph head and tail merge", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW, "Optimize control flow", false, -1, false) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 4a83c80dd02137..2897b888f659bc 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2077,247 +2077,6 @@ bool Compiler::fgRetargetBranchesToCanonicalCallFinally(BasicBlock* block, return true; } -//------------------------------------------------------------------------ -// fgTailMergeThrows: Tail merge throw blocks and blocks with no return calls. -// -// Returns: -// PhaseStatus indicating what, if anything, was changed. -// -// Notes: -// Scans the flow graph for throw blocks and blocks with no return calls -// that can be merged, and opportunistically merges them. -// -// Does not handle throws yet as the analysis is more costly and less -// likely to pay off. So analysis is restricted to blocks with just one -// statement. -// -// For throw helper call merging, we are looking for examples like -// the below. Here BB17 and BB21 have identical trees that call noreturn -// methods, so we can modify BB16 to branch to BB21 and delete BB17. -// -// Also note as a quirk of how we model flow that both BB17 and BB21 -// have successor blocks. We don't turn these into true throw blocks -// until morph. -// -// BB16 [005..006) -> BB18 (cond), preds={} succs={BB17,BB18} -// -// * JTRUE void -// \--* NE int -// ... -// -// BB17 [005..006), preds={} succs={BB19} -// -// * CALL void ThrowHelper.ThrowArgumentOutOfRangeException -// \--* CNS_INT int 33 -// -// BB20 [005..006) -> BB22 (cond), preds={} succs={BB21,BB22} -// -// * JTRUE void -// \--* LE int -// ... -// -// BB21 [005..006), preds={} succs={BB22} -// -// * CALL void ThrowHelper.ThrowArgumentOutOfRangeException -// \--* CNS_INT int 33 -// -PhaseStatus Compiler::fgTailMergeThrows() -{ - noway_assert(opts.OptimizationEnabled()); - - JITDUMP("\n*************** In fgTailMergeThrows\n"); - - // Early out case for most methods. Throw helpers are rare. - if (optNoReturnCallCount < 2) - { - JITDUMP("Method does not have multiple noreturn calls.\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - else - { - JITDUMP("Scanning the %u candidates\n", optNoReturnCallCount); - } - - // This transformation requires block pred lists to be built - // so that flow can be safely updated. - assert(fgPredsComputed); - - struct ThrowHelper - { - BasicBlock* m_block; - GenTreeCall* m_call; - - ThrowHelper() - : m_block(nullptr) - , m_call(nullptr) - { - } - - ThrowHelper(BasicBlock* block, GenTreeCall* call) - : m_block(block) - , m_call(call) - { - } - - static bool Equals(const ThrowHelper& x, const ThrowHelper& y) - { - return BasicBlock::sameEHRegion(x.m_block, y.m_block) && GenTreeCall::Equals(x.m_call, y.m_call); - } - - static unsigned GetHashCode(const ThrowHelper& x) - { - return static_cast(reinterpret_cast(x.m_call->gtCallMethHnd)); - } - }; - - typedef JitHashTable CallToBlockMap; - - CompAllocator allocator(getAllocator(CMK_TailMergeThrows)); - CallToBlockMap callMap(allocator); - BlockToBlockMap blockMap(allocator); - - // We run two passes here. - // - // The first pass finds candidate blocks. The first candidate for - // each unique kind of throw is chosen as the canonical example of - // that kind of throw. Subsequent matching candidates are mapped - // to that throw. - // - // The second pass modifies flow so that predecessors of - // non-canonical throw blocks now transfer control to the - // appropriate canonical block. - - // First pass - // - // Scan for THROW blocks. Note early on in compilation (before morph) - // noreturn blocks are not marked as BBJ_THROW. - // - // Walk blocks from last to first so that any branches we - // introduce to the canonical blocks end up lexically forward - // and there is less jumbled flow to sort out later. - for (BasicBlock* block = fgLastBB; block != nullptr; block = block->Prev()) - { - // Workaround: don't consider try entry blocks as candidates - // for merging; if the canonical throw is later in the same try, - // we'll create invalid flow. - if (bbIsTryBeg(block)) - { - continue; - } - - // We only look at the first statement for throw helper calls. - // Remainder of the block will be dead code. - // - // Throw helper calls could show up later in the block; we - // won't try merging those as we'd need to match up all the - // prior statements or split the block at this point, etc. - // - Statement* const stmt = block->firstStmt(); - - if (stmt == nullptr) - { - continue; - } - - // ...that is a call - GenTree* const tree = stmt->GetRootNode(); - - if (!tree->IsCall()) - { - continue; - } - - // ...that does not return - GenTreeCall* const call = tree->AsCall(); - - if (!call->IsNoReturn()) - { - continue; - } - - // Ok, we've found a suitable call. See if this is one we know - // about already, or something new. - BasicBlock* canonicalBlock = nullptr; - - JITDUMP("\n*** Does not return call\n"); - DISPTREE(call); - - // Have we found an equivalent call already? - ThrowHelper key(block, call); - if (callMap.Lookup(key, &canonicalBlock)) - { - // Yes, this one can be optimized away... - JITDUMP(" in " FMT_BB " can be dup'd to canonical " FMT_BB "\n", block->bbNum, canonicalBlock->bbNum); - blockMap.Set(block, canonicalBlock); - } - else - { - // No, add this as the canonical example - JITDUMP(" in " FMT_BB " is unique, marking it as canonical\n", block->bbNum); - callMap.Set(key, block); - } - } - - // Bail if no candidates were found - const unsigned numCandidates = blockMap.GetCount(); - if (numCandidates == 0) - { - JITDUMP("\n*************** no throws can be tail merged, sorry\n"); - return PhaseStatus::MODIFIED_NOTHING; - } - - JITDUMP("\n*** found %d merge candidates, rewriting flow\n\n", numCandidates); - bool modifiedProfile = false; - - // Second pass. - // - // We walk the map rather than the block list, to save a bit of time. - for (BlockToBlockMap::Node* const iter : BlockToBlockMap::KeyValueIteration(&blockMap)) - { - BasicBlock* const nonCanonicalBlock = iter->GetKey(); - BasicBlock* const canonicalBlock = iter->GetValue(); - weight_t removedWeight = BB_ZERO_WEIGHT; - - // Walk pred list of the non canonical block, updating flow to target - // the canonical block instead. - for (FlowEdge* const predEdge : nonCanonicalBlock->PredEdgesEditing()) - { - removedWeight += predEdge->getLikelyWeight(); - BasicBlock* const predBlock = predEdge->getSourceBlock(); - JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum); - fgReplaceJumpTarget(predBlock, nonCanonicalBlock, canonicalBlock); - } - - if (canonicalBlock->hasProfileWeight()) - { - canonicalBlock->increaseBBProfileWeight(removedWeight); - modifiedProfile = true; - - // Don't bother updating flow into nonCanonicalBlock, since it is now unreachable - } - } - - // In practice, when we have true profile data, we can repair it locally above, since the no-return - // calls mean that there is no contribution from the throw blocks to any of their successors. - // However, these blocks won't be morphed into BBJ_THROW blocks until later, - // so mark profile data as inconsistent for now. - if (modifiedProfile) - { - JITDUMP( - "fgTailMergeThrows: Modified flow into no-return blocks that still have successors. Data %s inconsistent.\n", - fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - } - - // Update the count of noreturn call sites - // - JITDUMP("Made %u updates\n", numCandidates); - assert(numCandidates < optNoReturnCallCount); - optNoReturnCallCount -= numCandidates; - - return PhaseStatus::MODIFIED_EVERYTHING; -} - //------------------------------------------------------------------------ // fgCloneTryRegion: clone a try region // diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index ffa3d88cba33eb..d550c2de1b65d6 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5146,7 +5146,12 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // Note we check this rather than countOfInEdges because we don't care // about dups, just the number of unique pred blocks. // - if (predInfo.Height() > mergeLimit) + // This cap only applies to common-successor tail merging. The terminal + // block (return/throw) merging below subsumes the old fgTailMergeThrows + // phase, which merged all throw blocks without a cap, so we do not limit + // it here (throw-heavy methods routinely exceed the cap). + // + if ((commSucc != nullptr) && (predInfo.Height() > mergeLimit)) { // Too many preds to consider return false; @@ -5492,13 +5497,27 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) } } - predInfo.Reset(); - for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) + // tailMergePreds merges at most one group of matching blocks per call, so + // keep rebuilding the candidate list and retrying until no more merges are + // found. This ensures distinct groups (e.g. multiple throw helpers, or a + // mix of return and throw blocks) all get merged. Blocks that were merged + // become BBJ_ALWAYS and are skipped on subsequent passes. + // + bool retryRetOrThrowMerge = true; + while (retryRetOrThrowMerge) { - predInfo.Push(PredInfo(block, block->lastStmt())); - } + predInfo.Reset(); + for (BasicBlock* const block : retOrThrowBlocks.BottomUpOrder()) + { + if (!block->KindIs(BBJ_RETURN, BBJ_THROW) || block->isEmpty()) + { + continue; + } + predInfo.Push(PredInfo(block, block->lastStmt())); + } - tailMergePreds(nullptr); + retryRetOrThrowMerge = tailMergePreds(nullptr); + } // Work through any retries // From 323a3af038a47f4d67f17f72d3837c3c21eea703 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 7 Jun 2026 01:11:12 +0200 Subject: [PATCH 2/4] JIT: cap return/throw tail merge at 2x mergeLimit Bounds the terminal-block merge work instead of leaving it unlimited. 2x (100) is the minimal multiple that avoids code-size regressions vs the fgTailMergeThrows baseline (SPMI asmdiffs over libraries.pmi/benchmarks/aspnet); 1x regresses +5,982 bytes on libraries.pmi. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/fgopt.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 3eae3705a2c61b..bcd5a59602f809 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5148,10 +5148,12 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // // This cap only applies to common-successor tail merging. The terminal // block (return/throw) merging below subsumes the old fgTailMergeThrows - // phase, which merged all throw blocks without a cap, so we do not limit - // it here (throw-heavy methods routinely exceed the cap). + // phase and routinely sees throw-heavy methods that exceed the per-block + // cap, so it uses a larger limit. 2x was empirically the smallest + // multiple that avoids code-size regressions (measured via SPMI asmdiffs). // - if ((commSucc != nullptr) && (predInfo.Height() > mergeLimit)) + const int effectiveLimit = (commSucc != nullptr) ? mergeLimit : (2 * mergeLimit); + if (predInfo.Height() > effectiveLimit) { // Too many preds to consider return false; From 313c96ac87f8843e7717df27ffa86ead275ded3c Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 7 Jun 2026 01:22:31 +0200 Subject: [PATCH 3/4] Update fgopt.cpp --- src/coreclr/jit/fgopt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index bcd5a59602f809..6b2f8ea0376356 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5149,10 +5149,10 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // This cap only applies to common-successor tail merging. The terminal // block (return/throw) merging below subsumes the old fgTailMergeThrows // phase and routinely sees throw-heavy methods that exceed the per-block - // cap, so it uses a larger limit. 2x was empirically the smallest + // cap, so it uses a larger limit. 3x was empirically the smallest // multiple that avoids code-size regressions (measured via SPMI asmdiffs). // - const int effectiveLimit = (commSucc != nullptr) ? mergeLimit : (2 * mergeLimit); + const int effectiveLimit = (commSucc != nullptr) ? mergeLimit : (3 * mergeLimit); if (predInfo.Height() > effectiveLimit) { // Too many preds to consider From a4c574e9248512a894265da062eb4c932772f2b8 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 7 Jun 2026 01:34:17 +0200 Subject: [PATCH 4/4] Update fgopt.cpp --- src/coreclr/jit/fgopt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 6b2f8ea0376356..a219ffd5e66733 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5149,10 +5149,10 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // This cap only applies to common-successor tail merging. The terminal // block (return/throw) merging below subsumes the old fgTailMergeThrows // phase and routinely sees throw-heavy methods that exceed the per-block - // cap, so it uses a larger limit. 3x was empirically the smallest + // cap, so it uses a larger limit. 4x was empirically the smallest // multiple that avoids code-size regressions (measured via SPMI asmdiffs). // - const int effectiveLimit = (commSucc != nullptr) ? mergeLimit : (3 * mergeLimit); + const int effectiveLimit = (commSucc != nullptr) ? mergeLimit : (4 * mergeLimit); if (predInfo.Height() > effectiveLimit) { // Too many preds to consider