From eb30313bfb143942ce293f3629ca2e271bec835f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 22 Jun 2022 20:34:54 -0700 Subject: [PATCH 1/8] JIT: strengthen checking of the loop table Add loop table checking to the post-phase list, conditional on whether the table is expected to be valid. Declare that the table is valid from the end of the find loops phase to the end of the optimization phases. Add checks that sibling loops are fully disjoint, no child shares top with its parent, and all top-entry loops have at most one non-loop backedge. Closes #71084. --- src/coreclr/jit/compiler.cpp | 4 +- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/fgdiagnostic.cpp | 66 +++++++++++++++++++++++++++++--- src/coreclr/jit/optimizer.cpp | 14 +++---- src/coreclr/jit/phase.cpp | 1 + 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 460412d84597e2..a663d60d8ab065 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4983,7 +4983,9 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl #endif // Dominator and reachability sets are no longer valid. - fgDomsComputed = false; + // The loop table is no longer valid. + fgDomsComputed = false; + optLoopTableValid = false; // Insert GC Polls DoPhase(this, PHASE_INSERT_GC_POLLS, &Compiler::fgInsertGCPolls); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b5bfdf926f2126..c3ad702d5c7a4e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6273,6 +6273,7 @@ class Compiler public: LoopDsc* optLoopTable; // loop descriptor table + bool optLoopTableValid; // info in loop table should be valid unsigned char optLoopCount; // number of tracked loops unsigned char loopAlignCandidates; // number of loops identified for alignment diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index abb60b810da03a..efa7a97bc6ae7d 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3576,14 +3576,19 @@ void Compiler::fgDebugCheckNodesUniqueness() // - All parents of the loop with the block contain that block // - If the loop has a pre-header, it is valid // - The loop flags are valid +// - no loop shares `top` with any of its children +// - no loop shares `bottom` with the header of any of its siblings +// - no top-entry loop has an that comes from outside the loop other than from lpHead // void Compiler::fgDebugCheckLoopTable() { #ifdef DEBUG - if (verbose) + if (!optLoopTableValid) { - printf("*************** In fgDebugCheckLoopTable\n"); + JITDUMP("*************** In fgDebugCheckLoopTable: not checking, loop table no longer maintained\n"); + return; } + JITDUMP("*************** In fgDebugCheckLoopTable\n"); #endif // DEBUG if (optLoopCount > 0) @@ -3668,6 +3673,41 @@ void Compiler::fgDebugCheckLoopTable() { return lpDisjoint(blockNumMap, loop, lp2.lpTop, lp2.lpBottom); } + + // Like Disjoint, but also checks lpHead + // + static bool lpFullyDisjoint(const unsigned* blockNumMap, const LoopDsc* loop, const LoopDsc& lp2) + { + return lpDisjoint(blockNumMap, loop, lp2.lpTop, lp2.lpBottom) && + !lpContains(blockNumMap, loop, lp2.lpHead) && !lpContains(blockNumMap, &lp2, loop->lpHead); + } + + // If a top-entry loop, lpHead must be only non-loop pred of lpTop + static bool lpHasWellFormedBackedges(const unsigned* blockNumMap, const LoopDsc* loop) + { + if (loop->lpTop != loop->lpEntry) + { + // not top-entry, assume ok for now + return true; + } + + bool foundHead = false; + for (BasicBlock* const pred : loop->lpTop->PredBlocks()) + { + if (pred == loop->lpHead) + { + foundHead = true; + continue; + } + + if (!lpContains(blockNumMap, loop, pred)) + { + return false; + } + } + + return foundHead; + } }; // Check the loop table itself. @@ -3690,6 +3730,7 @@ void Compiler::fgDebugCheckLoopTable() assert(loop.lpBottom != nullptr); assert(MappedChecks::lpWellFormed(blockNumMap, &loop)); + assert(MappedChecks::lpHasWellFormedBackedges(blockNumMap, &loop)); if (loop.lpExitCnt == 1) { @@ -3705,7 +3746,7 @@ void Compiler::fgDebugCheckLoopTable() { // This is a top-level loop. - // Verify all top-level loops are disjoint. We don't have a list of just these (such as a + // Verify all top-level loops are fully disjoint. We don't have a list of just these (such as a // top-level pseudo-loop entry with a list of all top-level lists), so we have to iterate // over the entire loop table. for (unsigned j = 0; j < optLoopCount; j++) @@ -3725,7 +3766,7 @@ void Compiler::fgDebugCheckLoopTable() // Only consider top-level loops continue; } - assert(MappedChecks::lpDisjoint(blockNumMap, &loop, otherLoop)); + assert(MappedChecks::lpFullyDisjoint(blockNumMap, &loop, otherLoop)); } } else @@ -3758,7 +3799,7 @@ void Compiler::fgDebugCheckLoopTable() assert(childLoop.lpParent == i); } - // Verify all child loops are disjoint. + // Verify all child loops are fully disjoint. for (unsigned child = loop.lpChild; // child != BasicBlock::NOT_IN_LOOP; // child = optLoopTable[child].lpSibling) @@ -3777,8 +3818,21 @@ void Compiler::fgDebugCheckLoopTable() { continue; } - assert(MappedChecks::lpDisjoint(blockNumMap, &childLoop, child2Loop)); + assert(MappedChecks::lpFullyDisjoint(blockNumMap, &childLoop, child2Loop)); + } + } + + // Verify no child shares lpTop with its parent. + for (unsigned child = loop.lpChild; // + child != BasicBlock::NOT_IN_LOOP; // + child = optLoopTable[child].lpSibling) + { + const LoopDsc& childLoop = optLoopTable[child]; + if (childLoop.lpFlags & LPFLG_REMOVED) + { + continue; } + assert(loop.lpTop != childLoop.lpTop); } } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 315b19caf7ddf8..6a129d0cedb161 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -24,9 +24,10 @@ void Compiler::optInit() loopAlignCandidates = 0; /* Initialize the # of tracked loops to 0 */ - optLoopCount = 0; - optLoopTable = nullptr; - optCurLoopEpoch = 0; + optLoopCount = 0; + optLoopTable = nullptr; + optLoopTableValid = false; + optCurLoopEpoch = 0; #ifdef DEBUG loopsAligned = 0; @@ -4557,7 +4558,6 @@ PhaseStatus Compiler::optUnrollLoops() #ifdef DEBUG fgDebugCheckBBlist(true); - fgDebugCheckLoopTable(); #endif // DEBUG return PhaseStatus::MODIFIED_EVERYTHING; @@ -5466,11 +5466,11 @@ void Compiler::optFindLoops() optIdentifyLoopsForAlignment(); // Check if any of the loops need alignment } + optLoopsMarked = true; + #ifdef DEBUG - fgDebugCheckLoopTable(); + optLoopTableValid = true; #endif - - optLoopsMarked = true; } //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 8d002dfe08c614..0ba2650452016f 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -232,6 +232,7 @@ void Phase::PostPhase(PhaseStatus status) comp->fgDebugCheckLinks(); comp->fgDebugCheckNodesUniqueness(); comp->fgVerifyHandlerTab(); + comp->fgDebugCheckLoopTable(); } } From 6cd576190725464784b49418b62d2379f37883c7 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 23 Jun 2022 13:52:30 -0700 Subject: [PATCH 2/8] opt more phases into post phase checks --- src/coreclr/jit/assertionprop.cpp | 26 +++++++++-------- src/coreclr/jit/compiler.cpp | 4 +++ src/coreclr/jit/compiler.h | 21 ++++++-------- src/coreclr/jit/compphases.h | 1 + src/coreclr/jit/earlyprop.cpp | 47 +++++++++++++++++-------------- src/coreclr/jit/fgdiagnostic.cpp | 4 +-- src/coreclr/jit/jiteh.cpp | 2 +- src/coreclr/jit/lclvars.cpp | 21 ++++++++++---- src/coreclr/jit/loopcloning.cpp | 9 ------ src/coreclr/jit/lower.cpp | 3 +- src/coreclr/jit/optimizer.cpp | 33 +++++++++++++--------- src/coreclr/jit/phase.cpp | 11 +++++++- src/coreclr/jit/ssabuilder.cpp | 10 ++----- src/coreclr/jit/valuenum.cpp | 12 ++++++-- 14 files changed, 116 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index ccade20df0fbc7..86b6a434e50940 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -462,12 +462,13 @@ Compiler::fgWalkResult Compiler::optAddCopiesCallback(GenTree** pTree, fgWalkDat return WALK_CONTINUE; } -/***************************************************************************** - * - * Add new copies before Assertion Prop. - */ - -void Compiler::optAddCopies() +//------------------------------------------------------------------------------ +// optAddCopies: Add new copies before Assertion Prop. +// +// Returns: +// suitable phase satus +// +PhaseStatus Compiler::optAddCopies() { unsigned lclNum; LclVarDsc* varDsc; @@ -477,19 +478,16 @@ void Compiler::optAddCopies() { printf("\n*************** In optAddCopies()\n\n"); } - if (verboseTrees) - { - printf("Blocks/Trees at start of phase\n"); - fgDispBasicBlocks(true); - } #endif // Don't add any copies if we have reached the tracking limit. if (lvaHaveManyLocals()) { - return; + return PhaseStatus::MODIFIED_NOTHING; } + bool modified = false; + for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++) { var_types typ = varDsc->TypeGet(); @@ -893,6 +891,8 @@ void Compiler::optAddCopies() tree->gtFlags |= (copyAsgn->gtFlags & GTF_ALL_EFFECT); } + modified = true; + #ifdef DEBUG if (verbose) { @@ -902,6 +902,8 @@ void Compiler::optAddCopies() } #endif } + + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------------------------ diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a663d60d8ab065..5daa7cb15ef698 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4840,6 +4840,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl if (opts.OptimizationEnabled()) { + // xxx + // + DoPhase(this, PHASE_OPTIMIZE_ADD_COPIES, &Compiler::optAddCopies); + // Optimize boolean conditions // DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c3ad702d5c7a4e..2d7513c05eaf7d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3245,7 +3245,7 @@ class Compiler void lvaSortByRefCount(); - void lvaMarkLocalVars(); // Local variable ref-counting + PhaseStatus lvaMarkLocalVars(); // Local variable ref-counting void lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers); void lvaMarkLocalVars(BasicBlock* block, bool isRecompute); @@ -4713,7 +4713,7 @@ class Compiler inline bool PreciseRefCountsRequired(); // Performs SSA conversion. - void fgSsaBuild(); + PhaseStatus fgSsaBuild(); // Reset any data structures to the state expected by "fgSsaBuild", so it can be run again. void fgResetForSsa(); @@ -4737,7 +4737,7 @@ class Compiler // Do value numbering (assign a value number to each // tree node). - void fgValueNumber(); + PhaseStatus fgValueNumber(); void fgValueNumberLocalStore(GenTree* storeNode, GenTreeLclVarCommon* lclDefNode, @@ -6018,7 +6018,7 @@ class Compiler void optPerformHoistExpr(GenTree* expr, BasicBlock* exprBb, unsigned lnum); public: - void optOptimizeBools(); + PhaseStatus optOptimizeBools(); public: PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. @@ -6299,7 +6299,7 @@ class Compiler BasicBlock* exit, unsigned char exitCnt); - void optClearLoopIterInfo(); + PhaseStatus optClearLoopIterInfo(); #ifdef DEBUG void optPrintLoopInfo(unsigned lnum, bool printVerbose = false); @@ -6910,9 +6910,9 @@ class Compiler GenTree* optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind); GenTree* optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap); bool optDoEarlyPropForBlock(BasicBlock* block); - bool optDoEarlyPropForFunc(); - void optEarlyProp(); - void optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap); + bool optDoEarlyPropForFunc(); + PhaseStatus optEarlyProp(); + bool optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap); GenTree* optFindNullCheckToFold(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap); bool optIsNullCheckFoldingLegal(GenTree* tree, GenTree* nullCheckTree, @@ -7311,7 +7311,7 @@ class Compiler static void optDumpAssertionIndices(const char* header, ASSERT_TP assertions, const char* footer = nullptr); static void optDumpAssertionIndices(ASSERT_TP assertions, const char* footer = nullptr); - void optAddCopies(); + PhaseStatus optAddCopies(); /************************************************************************** * Range checks @@ -7367,9 +7367,6 @@ class Compiler bool optReachWithoutCall(BasicBlock* srcBB, BasicBlock* dstBB); -protected: - bool optLoopsMarked; - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index cb0d284ff56add..455ea7e2242a32 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -66,6 +66,7 @@ CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", CompPhaseNameMacro(PHASE_CLEAR_LOOP_INFO, "Clear loop info", "LP-CLEAR", false, -1, false) CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1, false) CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1, false) +CompPhaseNameMacro(PHASE_OPTIMIZE_ADD_COPIES, "Opt add copies", "OPT-ADD-CP", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1, false) CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", "OPER-ORD", false, -1, false) CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", "BLK-ORD", false, -1, true) diff --git a/src/coreclr/jit/earlyprop.cpp b/src/coreclr/jit/earlyprop.cpp index e9b9eabd6d6c50..20245b8d2d9af8 100644 --- a/src/coreclr/jit/earlyprop.cpp +++ b/src/coreclr/jit/earlyprop.cpp @@ -68,6 +68,9 @@ void Compiler::optCheckFlagsAreSet(unsigned methodFlag, //------------------------------------------------------------------------------------------ // optEarlyProp: The entry point of the early value propagation. // +// Returns: +// suitable phase status +// // Notes: // This phase performs an SSA-based value propagation, including // 1. Array length propagation. @@ -90,21 +93,18 @@ void Compiler::optCheckFlagsAreSet(unsigned methodFlag, // Null check folding tries to find GT_INDIR(obj + const) that GT_NULLCHECK(obj) can be folded into // and removed. Currently, the algorithm only matches GT_INDIR and GT_NULLCHECK in the same basic block. -void Compiler::optEarlyProp() +PhaseStatus Compiler::optEarlyProp() { -#ifdef DEBUG - if (verbose) - { - printf("*************** In optEarlyProp()\n"); - } -#else if (!optDoEarlyPropForFunc()) { - return; + // We perhaps should verify the OMF are set properly + // + JITDUMP("no arrays or null checks in the method\n"); + return PhaseStatus::MODIFIED_NOTHING; } -#endif assert(fgSsaPassesCompleted == 1); + unsigned numChanges = 0; for (BasicBlock* const block : Blocks()) { @@ -148,19 +148,15 @@ void Compiler::optEarlyProp() assert(optDoEarlyPropForFunc() && optDoEarlyPropForBlock(block)); gtSetStmtInfo(stmt); fgSetStmtSeq(stmt); + numChanges++; } stmt = next; } } -#ifdef DEBUG - if (verbose) - { - JITDUMP("\nAfter optEarlyProp:\n"); - fgDispBasicBlocks(/*dumpTrees*/ true); - } -#endif + JITDUMP("\nOptimized %u trees\n", numChanges); + return numChanges > 0 ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //---------------------------------------------------------------- @@ -178,11 +174,12 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck { GenTree* objectRefPtr = nullptr; optPropKind propKind = optPropKind::OPK_INVALID; + bool folded = false; if (tree->OperIsIndirOrArrLength()) { // optFoldNullCheck takes care of updating statement info if a null check is removed. - optFoldNullCheck(tree, nullCheckMap); + folded = optFoldNullCheck(tree, nullCheckMap); } else { @@ -196,12 +193,12 @@ GenTree* Compiler::optEarlyPropRewriteTree(GenTree* tree, LocalNumberToNullCheck } else { - return nullptr; + return folded ? tree : nullptr; } if (!objectRefPtr->OperIsScalarLocal() || !lvaInSsa(objectRefPtr->AsLclVarCommon()->GetLclNum())) { - return nullptr; + return folded ? tree : nullptr; } #ifdef DEBUG else @@ -415,13 +412,16 @@ GenTree* Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropK // tree - The input indirection tree. // nullCheckMap - Map of the local numbers to the latest NULLCHECKs on those locals in the current basic block // +// Returns: +// true if a null check was folded +// // Notes: // If a GT_NULLCHECK node is post-dominated by an indirection node on the same local and the trees between // the GT_NULLCHECK and the indirection don't have unsafe side effects, the GT_NULLCHECK can be removed. // The indir will cause a NullReferenceException if and only if GT_NULLCHECK will cause the same // NullReferenceException. -void Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap) +bool Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nullCheckMap) { #ifdef DEBUG if (tree->OperGet() == GT_NULLCHECK) @@ -432,13 +432,14 @@ void Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nu #else if ((compCurBB->bbFlags & BBF_HAS_NULLCHECK) == 0) { - return; + return false; } #endif GenTree* nullCheckTree = optFindNullCheckToFold(tree, nullCheckMap); GenTree* nullCheckParent = nullptr; Statement* nullCheckStmt = nullptr; + bool folded = false; if ((nullCheckTree != nullptr) && optIsNullCheckFoldingLegal(tree, nullCheckTree, &nullCheckParent, &nullCheckStmt)) { #ifdef DEBUG @@ -470,6 +471,8 @@ void Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nu Statement* curStmt = compCurStmt; fgMorphBlockStmt(compCurBB, nullCheckStmt DEBUGARG("optFoldNullCheck")); compCurStmt = curStmt; + + folded = true; } if ((tree->OperGet() == GT_NULLCHECK) && (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)) @@ -477,6 +480,8 @@ void Compiler::optFoldNullCheck(GenTree* tree, LocalNumberToNullCheckTreeMap* nu nullCheckMap->Set(tree->gtGetOp1()->AsLclVarCommon()->GetLclNum(), tree, LocalNumberToNullCheckTreeMap::SetKind::Overwrite); } + + return folded; } //---------------------------------------------------------------- diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index efa7a97bc6ae7d..55869a29026aab 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3578,14 +3578,14 @@ void Compiler::fgDebugCheckNodesUniqueness() // - The loop flags are valid // - no loop shares `top` with any of its children // - no loop shares `bottom` with the header of any of its siblings -// - no top-entry loop has an that comes from outside the loop other than from lpHead +// - no top-entry loop has a predecessor that comes from outside the loop other than from lpHead // void Compiler::fgDebugCheckLoopTable() { #ifdef DEBUG if (!optLoopTableValid) { - JITDUMP("*************** In fgDebugCheckLoopTable: not checking, loop table no longer maintained\n"); + JITDUMP("*************** In fgDebugCheckLoopTable: loop table not valid\n"); return; } JITDUMP("*************** In fgDebugCheckLoopTable\n"); diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 7a5297ad61b26f..cccf9a72022e2a 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1663,7 +1663,7 @@ void Compiler::fgRemoveEH() assert(!fgDomsComputed); assert(!fgFuncletsCreated); assert(fgFirstFuncletBB == nullptr); // this should follow from "!fgFuncletsCreated" - assert(!optLoopsMarked); + assert(!optLoopTableValid); unsigned XTnum; EHblkDsc* HBtab; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index a1439c7e4bd5c3..e958bb95f6a4e7 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4567,6 +4567,9 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute) //------------------------------------------------------------------------ // lvaMarkLocalVars: enable normal ref counting, compute initial counts, sort locals table // +// Returns: +// suitable phase status +// // Notes: // Now behaves differently in minopts / debug. Instead of actually inspecting // the IR and counting references, the jit assumes all locals are referenced @@ -4575,9 +4578,8 @@ void Compiler::lvaMarkLocalVars(BasicBlock* block, bool isRecompute) // Also, when optimizing, lays the groundwork for assertion prop and more. // See details in lvaMarkLclRefs. -void Compiler::lvaMarkLocalVars() +PhaseStatus Compiler::lvaMarkLocalVars() { - JITDUMP("\n*************** In lvaMarkLocalVars()"); // If we have direct pinvokes, verify the frame list root local was set up properly @@ -4590,6 +4592,8 @@ void Compiler::lvaMarkLocalVars() } } + bool addedLocal = false; + #if !defined(FEATURE_EH_FUNCLETS) // Grab space for exception handling @@ -4616,6 +4620,8 @@ void Compiler::lvaMarkLocalVars() LclVarDsc* shadowSPslotsVar = lvaGetDesc(lvaShadowSPslotsVar); shadowSPslotsVar->lvType = TYP_BLK; shadowSPslotsVar->lvExactSize = (slotsNeeded * TARGET_POINTER_SIZE); + + addedLocal = true; } #endif // !FEATURE_EH_FUNCLETS @@ -4630,6 +4636,7 @@ void Compiler::lvaMarkLocalVars() LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym); lclPSPSym->lvType = TYP_I_IMPL; lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); + addedLocal = true; } #endif // FEATURE_EH_FUNCLETS @@ -4652,6 +4659,7 @@ void Compiler::lvaMarkLocalVars() lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar")); LclVarDsc* locAllocSPvar = lvaGetDesc(lvaLocAllocSPvar); locAllocSPvar->lvType = TYP_I_IMPL; + addedLocal = true; } #endif // JIT32_GCENCODER } @@ -4671,7 +4679,9 @@ void Compiler::lvaMarkLocalVars() // If we don't need precise reference counts, e.g. we're not optimizing, we're done. if (!PreciseRefCountsRequired()) { - return; + // This phase may add new locals + // + return addedLocal ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } const bool reportParamTypeArg = lvaReportParamTypeArg(); @@ -4690,8 +4700,9 @@ void Compiler::lvaMarkLocalVars() assert(PreciseRefCountsRequired()); - // Note: optAddCopies() depends on lvaRefBlks, which is set in lvaMarkLocalVars(BasicBlock*), called above. - optAddCopies(); + // This phase may add new locals. + // + return addedLocal ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 05c96837bc4ef0..4f03d781439187 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -3003,14 +3003,6 @@ PhaseStatus Compiler::optCloneLoops() return PhaseStatus::MODIFIED_NOTHING; } -#ifdef DEBUG - if (verbose) - { - printf("\nBefore loop cloning:\n"); - fgDispBasicBlocks(/*dumpTrees*/ true); - } -#endif - LoopCloneContext context(optLoopCount, getAllocator(CMK_LoopClone)); // Obtain array optimization candidates in the context. @@ -3116,7 +3108,6 @@ PhaseStatus Compiler::optCloneLoops() fgDispBasicBlocks(/*dumpTrees*/ true); } - fgDebugCheckLoopTable(); #endif return PhaseStatus::MODIFIED_EVERYTHING; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0450beb2b26073..93c8f89adbe0d5 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6407,8 +6407,7 @@ PhaseStatus Lowering::DoPhase() // local var liveness can delete code, which may create empty blocks if (comp->opts.OptimizationEnabled()) { - comp->optLoopsMarked = false; - bool modified = comp->fgUpdateFlowGraph(); + bool modified = comp->fgUpdateFlowGraph(); modified |= comp->fgRemoveDeadBlocks(); if (modified) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6a129d0cedb161..e30a636fa832a0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -19,7 +19,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void Compiler::optInit() { - optLoopsMarked = false; fgHasLoops = false; loopAlignCandidates = 0; @@ -393,7 +392,7 @@ void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk) void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop) { - if (!optLoopsMarked) + if (!optLoopTableValid) { return; } @@ -592,7 +591,7 @@ void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmar // but becomes invalid afterwards. Clear the info that might be used incorrectly afterwards // in JitDump or by subsequent phases. // -void Compiler::optClearLoopIterInfo() +PhaseStatus Compiler::optClearLoopIterInfo() { for (unsigned lnum = 0; lnum < optLoopCount; lnum++) { @@ -605,6 +604,8 @@ void Compiler::optClearLoopIterInfo() loop.lpConstInit = -1; loop.lpTestTree = nullptr; } + + return PhaseStatus::MODIFIED_NOTHING; } #ifdef DEBUG @@ -5323,7 +5324,8 @@ void Compiler::optResetLoopInfo() // TODO: the loop table is always allocated as the same (maximum) size, so this is wasteful. // We could zero it out (possibly only in DEBUG) to be paranoid, but there's no reason to // force it to be re-allocated. - optLoopTable = nullptr; + optLoopTable = nullptr; + optLoopTableValid = false; for (BasicBlock* const block : Blocks()) { @@ -5466,11 +5468,7 @@ void Compiler::optFindLoops() optIdentifyLoopsForAlignment(); // Check if any of the loops need alignment } - optLoopsMarked = true; - -#ifdef DEBUG optLoopTableValid = true; -#endif } //----------------------------------------------------------------------------- @@ -9701,6 +9699,9 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) //----------------------------------------------------------------------------- // optOptimizeBools: Folds boolean conditionals for GT_JTRUE/GT_RETURN nodes // +// Returns: +// suitable phase status +// // Notes: // If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE of the form // "if (boolVal ==/!= 0/1)", the GT_EQ/GT_NE nodes are translated into a @@ -9754,7 +9755,7 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest) // - x == 1 || y == 1 ==> (x|y)!=0: Skip cases where either x or y is greater than 1, e.g., x=2, y=0 // - x == 0 || y == 0 ==> (x&y)==0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1 // -void Compiler::optOptimizeBools() +PhaseStatus Compiler::optOptimizeBools() { #ifdef DEBUG if (verbose) @@ -9767,10 +9768,14 @@ void Compiler::optOptimizeBools() } } #endif - bool change; + bool change = false; + unsigned numCond = 0; + unsigned numReturn = 0; + unsigned numPasses = 0; do { + numPasses++; change = false; for (BasicBlock* const b1 : Blocks()) @@ -9812,6 +9817,7 @@ void Compiler::optOptimizeBools() if (optBoolsDsc.optOptimizeBoolsCondBlock()) { change = true; + numCond++; } } else if (b2->bbJumpKind == BBJ_RETURN) @@ -9836,6 +9842,7 @@ void Compiler::optOptimizeBools() if (optBoolsDsc.optOptimizeBoolsReturnBlock(b3)) { change = true; + numReturn++; } } else @@ -9847,9 +9854,9 @@ void Compiler::optOptimizeBools() } } while (change); -#ifdef DEBUG - fgDebugCheckBBlist(); -#endif + JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); + + return ((numCond + numReturn) == 0) ? PhaseStatus::MODIFIED_NOTHING : PhaseStatus::MODIFIED_EVERYTHING; } typedef JitHashTable, unsigned> LclVarRefCounts; diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 0ba2650452016f..81f6fe6ac52043 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -183,10 +183,19 @@ void Phase::PostPhase(PhaseStatus status) PHASE_FWD_SUB, PHASE_MORPH_GLOBAL, PHASE_INVERT_LOOPS, - PHASE_OPTIMIZE_LAYOUT, + PHASE_OPTIMIZE_FLOW, PHASE_FIND_LOOPS, + PHASE_CLONE_LOOPS, + PHASE_UNROLL_LOOPS, + PHASE_CLEAR_LOOP_INFO, + PHASE_MARK_LOCAL_VARS, + PHASE_OPTIMIZE_ADD_COPIES, + PHASE_OPTIMIZE_BOOLS, PHASE_BUILD_SSA, + PHASE_EARLY_PROP, + PHASE_VALUE_NUMBER, PHASE_INSERT_GC_POLLS, + PHASE_OPTIMIZE_LAYOUT, PHASE_RATIONALIZE, PHASE_LOWERING, PHASE_STACK_LEVEL_SETTER}; diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 06c9fb322efa24..7f62e8c38c7627 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -53,7 +53,7 @@ static inline BasicBlock* IntersectDom(BasicBlock* finger1, BasicBlock* finger2) // SSA // ================================================================================= -void Compiler::fgSsaBuild() +PhaseStatus Compiler::fgSsaBuild() { // If this is not the first invocation, reset data structures for SSA. if (fgSsaPassesCompleted > 0) @@ -68,13 +68,7 @@ void Compiler::fgSsaBuild() JitTestCheckSSA(); #endif // DEBUG -#ifdef DEBUG - if (verbose) - { - JITDUMP("\nAfter fgSsaBuild:\n"); - fgDispBasicBlocks(/*dumpTrees*/ true); - } -#endif // DEBUG + return PhaseStatus::MODIFIED_EVERYTHING; } void Compiler::fgResetForSsa() diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 3ec54f47e886f6..72bd80eb3b1e5e 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -7279,7 +7279,13 @@ struct ValueNumberState } }; -void Compiler::fgValueNumber() +//------------------------------------------------------------------------ +// fgValueNumber: Run value numbering for the entire method +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::fgValueNumber() { #ifdef DEBUG // This could be a JITDUMP, but some people find it convenient to set a breakpoint on the printf. @@ -7292,7 +7298,7 @@ void Compiler::fgValueNumber() // If we skipped SSA, skip VN as well. if (fgSsaPassesCompleted == 0) { - return; + return PhaseStatus::MODIFIED_NOTHING; } // Allocate the value number store. @@ -7475,6 +7481,8 @@ void Compiler::fgValueNumber() #endif // DEBUG fgVNPassesCompleted++; + + return PhaseStatus::MODIFIED_EVERYTHING; } void Compiler::fgValueNumberBlock(BasicBlock* blk) From 09d47cc620e38ccf2618237187dd09974f58b0d1 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 23 Jun 2022 15:32:17 -0700 Subject: [PATCH 3/8] even more phases --- src/coreclr/jit/compiler.cpp | 3 --- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/flowgraph.cpp | 9 ++++++--- src/coreclr/jit/phase.cpp | 2 ++ 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5daa7cb15ef698..8aed0b68a8b918 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4847,8 +4847,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Optimize boolean conditions // DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools); - - // optOptimizeBools() might have changed the number of blocks; the dominators/reachability might be bad. } // Figure out the order in which operators are to be evaluated @@ -4858,7 +4856,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Weave the tree lists. Anyone who modifies the tree shapes after // this point is responsible for calling fgSetStmtSeq() to keep the // nodes properly linked. - // This can create GC poll calls, and create new BasicBlocks (without updating dominators/reachability). // DoPhase(this, PHASE_SET_BLOCK_ORDER, &Compiler::fgSetBlockOrder); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2d7513c05eaf7d..cc87219923bed7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5263,12 +5263,12 @@ class Compiler bool fgUpdateFlowGraph(bool doTailDup = false); - void fgFindOperOrder(); + PhaseStatus fgFindOperOrder(); // method that returns if you should split here typedef bool(fgSplitPredicate)(GenTree* tree, GenTree* parent, fgWalkData* data); - void fgSetBlockOrder(); + PhaseStatus fgSetBlockOrder(); void fgRemoveReturnBlock(BasicBlock* block); diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 33d6fab81e5aef..0f0cc1273ef98e 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -2885,7 +2885,7 @@ void Compiler::fgAddInternal() /*****************************************************************************/ /*****************************************************************************/ -void Compiler::fgFindOperOrder() +PhaseStatus Compiler::fgFindOperOrder() { #ifdef DEBUG if (verbose) @@ -2908,6 +2908,8 @@ void Compiler::fgFindOperOrder() gtSetStmtInfo(stmt); } } + + return PhaseStatus::MODIFIED_EVERYTHING; } //------------------------------------------------------------------------ @@ -3944,7 +3946,7 @@ GenTree* Compiler::fgSetTreeSeq(GenTree* tree, bool isLIR) * Also finds blocks that need GC polls and inserts them as needed. */ -void Compiler::fgSetBlockOrder() +PhaseStatus Compiler::fgSetBlockOrder() { #ifdef DEBUG if (verbose) @@ -4047,8 +4049,9 @@ void Compiler::fgSetBlockOrder() { printf("The biggest BB has %4u tree nodes\n", BasicBlock::s_nMaxTrees); } - fgDebugCheckLinks(); #endif // DEBUG + + return PhaseStatus::MODIFIED_NOTHING; } /*****************************************************************************/ diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 81f6fe6ac52043..306cc9c00c4f02 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -191,6 +191,8 @@ void Phase::PostPhase(PhaseStatus status) PHASE_MARK_LOCAL_VARS, PHASE_OPTIMIZE_ADD_COPIES, PHASE_OPTIMIZE_BOOLS, + PHASE_FIND_OPER_ORDER, + PHASE_SET_BLOCK_ORDER, PHASE_BUILD_SSA, PHASE_EARLY_PROP, PHASE_VALUE_NUMBER, From 0b9f8b5a729291879e57bc0922954f450deec177 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 23 Jun 2022 19:47:04 -0700 Subject: [PATCH 4/8] add hoisting, placeholders for missing phases --- src/coreclr/jit/compiler.h | 13 +--- src/coreclr/jit/optimizer.cpp | 140 +++++++++++++++++++--------------- src/coreclr/jit/phase.cpp | 61 +++++++++++---- 3 files changed, 130 insertions(+), 84 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cc87219923bed7..3c84f8e94c348c 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5917,7 +5917,7 @@ class Compiler protected: // Do hoisting for all loops. - void optHoistLoopCode(); + PhaseStatus optHoistLoopCode(); // To represent sets of VN's that have already been hoisted in outer loops. typedef JitHashTable, bool> VNSet; @@ -5958,17 +5958,10 @@ class Compiler // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed // by the loop "lnum" itself. - // - // "m_pHoistedInCurLoop" helps a lot in eliminating duplicate expressions getting hoisted - // and reducing the count of total expressions hoisted out of loop. When calculating the - // profitability, we compare this with number of registers and hence, lower the number of expressions - // getting hoisted, better chances that they will get enregistered and CSE considering them. - // - void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); + bool optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) - // Returns the new preheaders created. - void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); + bool optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) // outside of that loop. diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index e30a636fa832a0..4fcadcf30ccb53 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6410,13 +6410,19 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign #endif // LOOP_HOIST_STATS } -void Compiler::optHoistLoopCode() +//------------------------------------------------------------------------ +// optHoistLoopNest: run loop hoisting for indicated loop and all contained loops +// +// Returns: +// suitable phase status +// +PhaseStatus Compiler::optHoistLoopCode() { // If we don't have any loops in the method then take an early out now. if (optLoopCount == 0) { JITDUMP("\nNo loops; no hoisting\n"); - return; + return PhaseStatus::MODIFIED_NOTHING; } #ifdef DEBUG @@ -6424,7 +6430,7 @@ void Compiler::optHoistLoopCode() if (jitNoHoist > 0) { JITDUMP("\nJitNoHoist set; no hoisting\n"); - return; + return PhaseStatus::MODIFIED_NOTHING; } #endif @@ -6449,7 +6455,9 @@ void Compiler::optHoistLoopCode() // methHashHi = (unsigned(atoi(histr)) << 2); // So we don't have to use negative numbers. } if (methHash < methHashLo || methHash > methHashHi) - return; + { + return PhaseStatus::MODIFIED_NOTHING; + } printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash); #endif // DEBUG #endif // 0 -- debugging loop hoisting issues @@ -6458,15 +6466,14 @@ void Compiler::optHoistLoopCode() if (verbose) { printf("\n*************** In optHoistLoopCode()\n"); - printf("Blocks/Trees before phase\n"); - fgDispBasicBlocks(true); fgDispHandlerTab(); optPrintLoopTable(); } #endif - // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which - // they are invariant.) + // Consider all the loop nests, in inner-to-outer order + // + bool modified = false; LoopHoistContext hoistCtxt(this); for (unsigned lnum = 0; lnum < optLoopCount; lnum++) { @@ -6478,57 +6485,53 @@ void Compiler::optHoistLoopCode() if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP) { - optHoistLoopNest(lnum, &hoistCtxt); - } - } - -#if DEBUG - if (fgModified) - { - if (verbose) - { - printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n"); - fgDispBasicBlocks(true); - printf(""); + modified |= optHoistLoopNest(lnum, &hoistCtxt); } - - // Make sure that the predecessor lists are accurate - fgDebugCheckBBlist(); } -#endif #ifdef DEBUG // Test Data stuff.. - // If we have no test data, early out. - if (m_nodeTestData == nullptr) - { - return; - } - NodeToTestDataMap* testData = GetNodeTestData(); - for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) + // + if (m_nodeTestData = nullptr) { - TestLabelAndNum tlAndN; - GenTree* node = ki.Get(); - bool b = testData->Lookup(node, &tlAndN); - assert(b); - if (tlAndN.m_tl != TL_LoopHoist) - { - continue; - } - // Otherwise, it is a loop hoist annotation. - assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved. - if (tlAndN.m_num >= 0) + NodeToTestDataMap* testData = GetNodeTestData(); + for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) { - printf("Node "); - printTreeID(node); - printf(" was declared 'must hoist', but has not been hoisted.\n"); - assert(false); + TestLabelAndNum tlAndN; + GenTree* node = ki.Get(); + bool b = testData->Lookup(node, &tlAndN); + assert(b); + if (tlAndN.m_tl != TL_LoopHoist) + { + continue; + } + // Otherwise, it is a loop hoist annotation. + assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved. + if (tlAndN.m_num >= 0) + { + printf("Node "); + printTreeID(node); + printf(" was declared 'must hoist', but has not been hoisted.\n"); + assert(false); + } } } #endif // DEBUG + + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } -void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) +//------------------------------------------------------------------------ +// optHoistLoopNest: run loop hoisting for indicated loop and all contained loops +// +// Arguments: +// lnum - loop to process +// hoistCtxt - context for the hoisting +// +// Returns: +// true if any hoisting was done +// +bool Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) { // Do this loop, then recursively do all nested loops. JITDUMP("\n%s " FMT_LP "\n", optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP ? "Loop Nest" : "Nested Loop", @@ -6543,6 +6546,8 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) BasicBlockList* preHeadersOfChildLoops = nullptr; BasicBlockList* firstPreHeader = nullptr; + bool modified = false; + if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); @@ -6551,7 +6556,7 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { - optHoistLoopNest(child, hoistCtxt); + modified |= optHoistLoopNest(child, hoistCtxt); if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) { @@ -6581,10 +6586,23 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) } } - optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); + modified |= optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); + + return modified; } -void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders) +//------------------------------------------------------------------------ +// optHoistThisLoop: run loop hoisting for the indicated loop +// +// Arguments: +// lnum - loop to process +// hoistCtxt - context for the hoisting +// existingPreHeaders - list of preheaders created for child loops +// +// Returns: +// true if any hoisting was done +// +bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6593,7 +6611,7 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi if (pLoopDsc->lpFlags & LPFLG_REMOVED) { JITDUMP(" ... not hoisting " FMT_LP ": removed\n", lnum); - return; + return false; } // Ensure the per-loop sets/tables are empty. @@ -6790,6 +6808,10 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi defExec.Push(pLoopDsc->lpEntry); optHoistLoopBlocks(lnum, &defExec, hoistCtxt); + + const unsigned numHoisted = pLoopDsc->lpHoistedFPExprCount + pLoopDsc->lpHoistedExprCount; + + return numHoisted > 0; } bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) @@ -7643,9 +7665,6 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - // Create a loop pre-header in which to put the hoisted code. - fgCreateLoopPreHeader(lnum); - // If the block we're hoisting from and the pre-header are in different EH regions, don't hoist. // TODO: we could probably hoist things that won't raise exceptions, such as constants. if (!BasicBlock::sameTryRegion(optLoopTable[lnum].lpHead, treeBb)) @@ -7656,6 +7675,9 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } + // Create a loop pre-header in which to put the hoisted code. + fgCreateLoopPreHeader(lnum); + // Expression can be hoisted optPerformHoistExpr(tree, treeBb, lnum); @@ -9761,17 +9783,13 @@ PhaseStatus Compiler::optOptimizeBools() if (verbose) { printf("*************** In optOptimizeBools()\n"); - if (verboseTrees) - { - printf("Blocks/Trees before phase\n"); - fgDispBasicBlocks(true); - } } #endif bool change = false; unsigned numCond = 0; unsigned numReturn = 0; unsigned numPasses = 0; + unsigned stress = false; do { @@ -9849,6 +9867,7 @@ PhaseStatus Compiler::optOptimizeBools() { #ifdef DEBUG optBoolsDsc.optOptimizeBoolsGcStress(); + stress = true; #endif } } @@ -9856,7 +9875,8 @@ PhaseStatus Compiler::optOptimizeBools() JITDUMP("\noptimized %u BBJ_COND cases, %u BBJ_RETURN cases in %u passes\n", numCond, numReturn, numPasses); - return ((numCond + numReturn) == 0) ? PhaseStatus::MODIFIED_NOTHING : PhaseStatus::MODIFIED_EVERYTHING; + const bool modified = stress || ((numCond + numReturn) > 0); + return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } typedef JitHashTable, unsigned> LclVarRefCounts; diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 306cc9c00c4f02..67fe1f46eda4dc 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -167,23 +167,38 @@ void Phase::PostPhase(PhaseStatus status) // clang-format off static Phases s_allowlist[] = { + // pre import PHASE_INCPROFILE, PHASE_IBCPREP, PHASE_IMPORTATION, PHASE_PATCHPOINTS, PHASE_IBCINSTR, PHASE_INDXCALL, + // post import + // morph init PHASE_MORPH_INLINE, PHASE_ALLOCATE_OBJECTS, + // add internal PHASE_EMPTY_TRY, PHASE_EMPTY_FINALLY, PHASE_MERGE_FINALLY_CHAINS, PHASE_CLONE_FINALLY, + // finally flags + // compute preds PHASE_MERGE_THROWS, + // early fg update + // promote structs + // mark addr exposed locals PHASE_FWD_SUB, + // morph implicit byref PHASE_MORPH_GLOBAL, + // gs cookie + // compute ege weights + // create funclets PHASE_INVERT_LOOPS, PHASE_OPTIMIZE_FLOW, + // reachability + // block weights PHASE_FIND_LOOPS, PHASE_CLONE_LOOPS, PHASE_UNROLL_LOOPS, @@ -194,23 +209,44 @@ void Phase::PostPhase(PhaseStatus status) PHASE_FIND_OPER_ORDER, PHASE_SET_BLOCK_ORDER, PHASE_BUILD_SSA, + // (ssa subphases) PHASE_EARLY_PROP, PHASE_VALUE_NUMBER, + PHASE_HOIST_LOOP_CODE, + // copy prop + // PHASE_OPTIMIZE_BRANCHES, + // cse + // assertion prop + // range check + // update flow + // edge weights 2 PHASE_INSERT_GC_POLLS, PHASE_OPTIMIZE_LAYOUT, + // first cold block PHASE_RATIONALIZE, PHASE_LOWERING, - PHASE_STACK_LEVEL_SETTER}; + // lsra + PHASE_STACK_LEVEL_SETTER + // align loops + // codegen + }; // clang-format on + // Also note when this phase has not opted into the active post phase checks. + // + const bool doPostPhaseChecks = comp->activePhaseChecks == PhaseChecks::CHECK_ALL; + const char* checkMessage = + madeChanges && doPostPhaseChecks ? " [phase has not yet enabled common post phase checks]" : ""; + if (madeChanges) { for (size_t i = 0; i < sizeof(s_allowlist) / sizeof(Phases); i++) { if (m_phase == s_allowlist[i]) { - doPostPhase = true; + doPostPhase = true; + checkMessage = ""; break; } } @@ -220,12 +256,12 @@ void Phase::PostPhase(PhaseStatus status) { if (comp->compIsForInlining()) { - printf("\n*************** Inline @[%06u] Finishing PHASE %s%s\n", - Compiler::dspTreeID(comp->impInlineInfo->iciCall), m_name, statusMessage); + printf("\n*************** Inline @[%06u] Finishing PHASE %s%s%s%s\n", + Compiler::dspTreeID(comp->impInlineInfo->iciCall), m_name, statusMessage, checkMessage); } else { - printf("\n*************** Finishing PHASE %s%s\n", m_name, statusMessage); + printf("\n*************** Finishing PHASE %s%s%s\n", m_name, statusMessage, checkMessage); } if (doPostPhase) @@ -235,16 +271,13 @@ void Phase::PostPhase(PhaseStatus status) } } - if (doPostPhase) + if (doPostPhase && doPostPhaseChecks) { - if (comp->activePhaseChecks == PhaseChecks::CHECK_ALL) - { - comp->fgDebugCheckBBlist(); - comp->fgDebugCheckLinks(); - comp->fgDebugCheckNodesUniqueness(); - comp->fgVerifyHandlerTab(); - comp->fgDebugCheckLoopTable(); - } + comp->fgDebugCheckBBlist(); + comp->fgDebugCheckLinks(); + comp->fgDebugCheckNodesUniqueness(); + comp->fgVerifyHandlerTab(); + comp->fgDebugCheckLoopTable(); } // Optionally check profile data, if we have any. From ffe8911d311fa794dcbb11f3cab2dbd8907a567f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 24 Jun 2022 18:57:02 -0700 Subject: [PATCH 5/8] add comment showing when phase checks turn on --- src/coreclr/jit/phase.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index 67fe1f46eda4dc..bfe4f079c2e1da 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -191,6 +191,9 @@ void Phase::PostPhase(PhaseStatus status) // mark addr exposed locals PHASE_FWD_SUB, // morph implicit byref + // + // (enable all phase checks) + // PHASE_MORPH_GLOBAL, // gs cookie // compute ege weights From 9a584687a22869016f055d0e42d1d92493536fbf Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 24 Jun 2022 19:37:01 -0700 Subject: [PATCH 6/8] fix typo --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 4fcadcf30ccb53..101c076ad7c972 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6492,7 +6492,7 @@ PhaseStatus Compiler::optHoistLoopCode() #ifdef DEBUG // Test Data stuff.. // - if (m_nodeTestData = nullptr) + if (m_nodeTestData == nullptr) { NodeToTestDataMap* testData = GetNodeTestData(); for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki) From 60029122c65b0a9b72ad770eea0e770cf39a9f21 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 27 Jun 2022 12:33:43 -0700 Subject: [PATCH 7/8] fix printf arg --- src/coreclr/jit/phase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/phase.cpp b/src/coreclr/jit/phase.cpp index bfe4f079c2e1da..5c28f0decf9a3a 100644 --- a/src/coreclr/jit/phase.cpp +++ b/src/coreclr/jit/phase.cpp @@ -259,7 +259,7 @@ void Phase::PostPhase(PhaseStatus status) { if (comp->compIsForInlining()) { - printf("\n*************** Inline @[%06u] Finishing PHASE %s%s%s%s\n", + printf("\n*************** Inline @[%06u] Finishing PHASE %s%s%s\n", Compiler::dspTreeID(comp->impInlineInfo->iciCall), m_name, statusMessage, checkMessage); } else From a6f7a8a23979605963c22f79f3e74d154d6242af Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 27 Jun 2022 20:15:04 -0700 Subject: [PATCH 8/8] revise per feedback --- src/coreclr/jit/compiler.cpp | 3 ++- src/coreclr/jit/compiler.h | 10 +++++++--- src/coreclr/jit/flowgraph.cpp | 4 +++- src/coreclr/jit/lclvars.cpp | 10 +++------- src/coreclr/jit/optimizer.cpp | 28 +++++++++++++++++----------- 5 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8aed0b68a8b918..e57a0182a123b9 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4840,7 +4840,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl if (opts.OptimizationEnabled()) { - // xxx + // Introduce copies for some single-def locals to make them more + // amenable to optimization // DoPhase(this, PHASE_OPTIMIZE_ADD_COPIES, &Compiler::optAddCopies); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3c84f8e94c348c..595080b2683d8f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1415,8 +1415,10 @@ enum class PhaseChecks // Specify compiler data that a phase might modify enum class PhaseStatus : unsigned { - MODIFIED_NOTHING, - MODIFIED_EVERYTHING + MODIFIED_NOTHING, // Phase did not make any changes that warrant running post-phase checks or dumping + // the main jit data strutures. + MODIFIED_EVERYTHING, // Phase made changes that warrant running post-phase checks or dumping + // the main jit data strutures. }; // The following enum provides a simple 1:1 mapping to CLR API's @@ -5180,7 +5182,7 @@ class Compiler bool fgCheckRemoveStmt(BasicBlock* block, Statement* stmt); - void fgCreateLoopPreHeader(unsigned lnum); + bool fgCreateLoopPreHeader(unsigned lnum); void fgUnreachableBlock(BasicBlock* block); @@ -6100,6 +6102,8 @@ class Compiler int lpLoopVarFPCount; // The register count for the FP LclVars that are read/written inside this loop int lpVarInOutFPCount; // The register count for the FP LclVars that are alive inside or across this loop + bool lpHoistAddedPreheader; // The loop preheader was added during hoisting + typedef JitHashTable, FieldKindForVN> FieldHandleSet; FieldHandleSet* lpFieldsModified; // This has entries for all static field and object instance fields modified diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 0f0cc1273ef98e..1ffae6d4e023d1 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4051,7 +4051,9 @@ PhaseStatus Compiler::fgSetBlockOrder() } #endif // DEBUG - return PhaseStatus::MODIFIED_NOTHING; + // Return "everything" to enable consistency checking of the statement links during post phase. + // + return PhaseStatus::MODIFIED_EVERYTHING; } /*****************************************************************************/ diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index e958bb95f6a4e7..ef78c31f80007d 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4592,7 +4592,7 @@ PhaseStatus Compiler::lvaMarkLocalVars() } } - bool addedLocal = false; + unsigned const lvaCountOrig = lvaCount; #if !defined(FEATURE_EH_FUNCLETS) @@ -4620,8 +4620,6 @@ PhaseStatus Compiler::lvaMarkLocalVars() LclVarDsc* shadowSPslotsVar = lvaGetDesc(lvaShadowSPslotsVar); shadowSPslotsVar->lvType = TYP_BLK; shadowSPslotsVar->lvExactSize = (slotsNeeded * TARGET_POINTER_SIZE); - - addedLocal = true; } #endif // !FEATURE_EH_FUNCLETS @@ -4636,7 +4634,6 @@ PhaseStatus Compiler::lvaMarkLocalVars() LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym); lclPSPSym->lvType = TYP_I_IMPL; lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); - addedLocal = true; } #endif // FEATURE_EH_FUNCLETS @@ -4659,7 +4656,6 @@ PhaseStatus Compiler::lvaMarkLocalVars() lvaLocAllocSPvar = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar")); LclVarDsc* locAllocSPvar = lvaGetDesc(lvaLocAllocSPvar); locAllocSPvar->lvType = TYP_I_IMPL; - addedLocal = true; } #endif // JIT32_GCENCODER } @@ -4681,7 +4677,7 @@ PhaseStatus Compiler::lvaMarkLocalVars() { // This phase may add new locals // - return addedLocal ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return (lvaCount != lvaCountOrig) ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } const bool reportParamTypeArg = lvaReportParamTypeArg(); @@ -4702,7 +4698,7 @@ PhaseStatus Compiler::lvaMarkLocalVars() // This phase may add new locals. // - return addedLocal ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; + return (lvaCount != lvaCountOrig) ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 101c076ad7c972..bb7910ef4d38f7 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6629,9 +6629,10 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi VARSET_TP loopVars(VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef)); - pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); - pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); - pLoopDsc->lpHoistedExprCount = 0; + pLoopDsc->lpVarInOutCount = VarSetOps::Count(this, pLoopDsc->lpVarInOut); + pLoopDsc->lpLoopVarCount = VarSetOps::Count(this, loopVars); + pLoopDsc->lpHoistedExprCount = 0; + pLoopDsc->lpHoistAddedPreheader = false; #ifndef TARGET_64BIT unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars); @@ -6810,8 +6811,7 @@ bool Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi optHoistLoopBlocks(lnum, &defExec, hoistCtxt); const unsigned numHoisted = pLoopDsc->lpHoistedFPExprCount + pLoopDsc->lpHoistedExprCount; - - return numHoisted > 0; + return pLoopDsc->lpHoistAddedPreheader || (numHoisted > 0); } bool Compiler::optIsProfitableToHoistTree(GenTree* tree, unsigned lnum) @@ -7665,6 +7665,10 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } + // Create a loop pre-header in which to put the hoisted code. + const bool newPreheader = fgCreateLoopPreHeader(lnum); + optLoopTable[lnum].lpHoistAddedPreheader |= newPreheader; + // If the block we're hoisting from and the pre-header are in different EH regions, don't hoist. // TODO: we could probably hoist things that won't raise exceptions, such as constants. if (!BasicBlock::sameTryRegion(optLoopTable[lnum].lpHead, treeBb)) @@ -7675,9 +7679,6 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - // Create a loop pre-header in which to put the hoisted code. - fgCreateLoopPreHeader(lnum); - // Expression can be hoisted optPerformHoistExpr(tree, treeBb, lnum); @@ -7824,7 +7825,10 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNSet* loopVnInv // Arguments: // lnum - loop index // -void Compiler::fgCreateLoopPreHeader(unsigned lnum) +// Returns: +// true if new preheader was created +// +bool Compiler::fgCreateLoopPreHeader(unsigned lnum) { #ifdef DEBUG if (verbose) @@ -7841,7 +7845,7 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) { JITDUMP(" pre-header already exists\n"); INDEBUG(loop.lpValidatePreHeader()); - return; + return false; } BasicBlock* head = loop.lpHead; @@ -7895,7 +7899,7 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) head->bbFlags |= BBF_LOOP_PREHEADER; INDEBUG(loop.lpValidatePreHeader()); INDEBUG(fgDebugCheckLoopTable()); - return; + return false; } else { @@ -8264,6 +8268,8 @@ void Compiler::fgCreateLoopPreHeader(unsigned lnum) optPrintLoopTable(); } #endif + + return true; } bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)