From a7d647e54e8e1f97d4a7163f49c165b98e747b4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Wed, 9 Apr 2025 20:43:56 -0700 Subject: [PATCH 1/9] JIT: Enhance range checking for SIMD element node creation for non constant index --- src/coreclr/jit/gentree.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ff2c9583ea8af1..fb8c8ff991cc05 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27021,14 +27021,8 @@ GenTree* Compiler::gtNewSimdWithElementNode( var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - assert(op2->IsCnsIntOrI()); assert(varTypeIsArithmetic(op3)); - ssize_t imm8 = op2->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - assert((0 <= imm8) && (imm8 < count)); - #if defined(TARGET_XARCH) switch (simdBaseType) { @@ -27094,6 +27088,20 @@ GenTree* Compiler::gtNewSimdWithElementNode( #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 + int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1; + bool rangeCheckNeeded = !op2->OperIsConst(); + + if (!rangeCheckNeeded) + { + ssize_t imm8 = op2->AsIntCon()->IconValue(); + rangeCheckNeeded = (imm8 < 0) || (imm8 > immUpperBound); + } + + if (rangeCheckNeeded) + { + op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound); + } + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); } From a6cbb1cce6fc8c6f93a630928116c16a9dea1730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Wed, 9 Apr 2025 21:39:40 -0700 Subject: [PATCH 2/9] JIT: Enhance handling of non-constant index in LowerHWIntrinsicWithElement --- src/coreclr/jit/lowerxarch.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1a7b0e708eb6c3..741f1b0da48128 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -5728,7 +5728,14 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) GenTree* op2 = node->Op(2); GenTree* op3 = node->Op(3); - assert(op2->OperIsConst()); + if (!op2->OperIsConst()) + { + comp->getSIMDInitTempVarNum(simdType); + + // We will specially handle WithElement in codegen when op2 isn't a constant + ContainCheckHWIntrinsic(node); + return node->gtNext; + } ssize_t count = simdSize / genTypeSize(simdBaseType); ssize_t imm8 = op2->AsIntCon()->IconValue(); From 65b4dc818907b271da2d210536a3e45800aa2b37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Wed, 9 Apr 2025 21:47:41 -0700 Subject: [PATCH 3/9] JIT: Remove conditional handling for non-constant index that caused a user call. --- src/coreclr/jit/hwintrinsicxarch.cpp | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 33278184d02d29..da6e8ed3642ae1 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -3887,34 +3887,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 3); GenTree* indexOp = impStackTop(1).val; - if (!indexOp->OperIsConst()) - { - if (!opts.OptimizationEnabled()) - { - // Only enable late stage rewriting if optimizations are enabled - // as we won't otherwise encounter a constant at the later point - return nullptr; - } - - op3 = impPopStack().val; - op2 = impPopStack().val; - op1 = impSIMDPopStack(); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; - } - - ssize_t imm8 = indexOp->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - if ((imm8 >= count) || (imm8 < 0)) - { - // Using software fallback if index is out of range (throw exception) - return nullptr; - } - switch (simdBaseType) { // Using software fallback if simdBaseType is not supported by hardware From 7179450824bd5937c6734999164d81a396670610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Wed, 9 Apr 2025 22:07:35 -0700 Subject: [PATCH 4/9] JIT: Optimize handling of non-constant index for SIMD WithElement --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 54 +++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 4a73297d3d1e18..c58ba1a21494a1 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1570,6 +1570,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; GenTree* op2 = (node->GetOperandCount() >= 2) ? node->Op(2) : nullptr; + GenTree* op3 = (node->GetOperandCount() >= 3) ? node->Op(3) : nullptr; genConsumeMultiOpOperands(node); regNumber op1Reg = (op1 == nullptr) ? REG_NA : op1->GetRegNum(); @@ -1609,6 +1610,59 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } + case NI_Vector128_WithElement: + case NI_Vector256_WithElement: + case NI_Vector512_WithElement: + { + // Optimize the case where op2 is not a constant. + if (!op2->OperIsConst()) + { + // We don't have an instruction to implement this intrinsic if the index is not a constant. + // So we will use the SIMD temp location to store the vector, set the value and then reload it. + // The range check will already have been performed, so at this point we know we have an index + // within the bounds of the vector. + + unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; + noway_assert(simdInitTempVarNum != BAD_VAR_NUM); + + bool isEBPbased; + unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased); + +#if !FEATURE_FIXED_OUT_ARGS + if (!isEBPbased) + { + // Adjust the offset by the amount currently pushed on the CPU stack + offs += genStackLevel; + } +#else + assert(genStackLevel == 0); +#endif // !FEATURE_FIXED_OUT_ARGS + + regNumber indexReg = op2->GetRegNum(); + regNumber valueReg = op3->GetRegNum(); // New element value to be stored + + // Store the vector to the temp location. + GetEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0); + + // Set the desired element. + GetEmitter()->emitIns_ARX_R(ins_Store(baseType, false), // Store + emitTypeSize(baseType), // Of the vector baseType + valueReg, // From valueReg + (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based + indexReg, // Indexed + genTypeSize(baseType), // by the size of the baseType + offs); // Offset + + // Write back the modified vector to the original location. + GetEmitter()->emitIns_R_S(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0); + + } + break; + } + + case NI_Vector128_GetElement: case NI_Vector256_GetElement: case NI_Vector512_GetElement: From c55a16091d013684eff9ebe9b7954ef408cb136a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Tue, 22 Apr 2025 09:36:32 -0700 Subject: [PATCH 5/9] JIT: Comment out handling for NI_Vector128_WithElement in RewriteHWIntrinsicAsUserCall --- src/coreclr/jit/rationalize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index de23305c145b7f..57b6bef003d0f3 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -365,7 +365,7 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack Date: Tue, 22 Apr 2025 09:36:51 -0700 Subject: [PATCH 6/9] JIT: Update instruction for storing SIMD values to use emitIns_ARX_R with ins_Move_Extend --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index c58ba1a21494a1..95a486e1721639 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1646,13 +1646,13 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0); // Set the desired element. - GetEmitter()->emitIns_ARX_R(ins_Store(baseType, false), // Store - emitTypeSize(baseType), // Of the vector baseType - valueReg, // From valueReg - (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based - indexReg, // Indexed - genTypeSize(baseType), // by the size of the baseType - offs); // Offset + GetEmitter()->emitIns_ARX_R(ins_Move_Extend(op3->TypeGet(), false), // Store + emitTypeSize(baseType), // Of the vector baseType + valueReg, // From valueReg + (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based + indexReg, // Indexed + genTypeSize(baseType), // by the size of the baseType + offs); // Offset // Write back the modified vector to the original location. GetEmitter()->emitIns_R_S(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), From b2bd49e7470c801f60e319307662bd8ec540b250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Sun, 4 May 2025 22:14:41 -0700 Subject: [PATCH 7/9] JIT: Remove handling for NI_Vector*_WithElement and related SIMD cases in RewriteHWIntrinsicAsUserCall --- src/coreclr/jit/rationalize.cpp | 53 +-------------------------------- 1 file changed, 1 insertion(+), 52 deletions(-) diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 57b6bef003d0f3..c829242cc85c7c 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -365,57 +365,6 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackOperIsConst()) - { - ssize_t imm8 = op2->AsIntCon()->IconValue(); - ssize_t count = simdSize / genTypeSize(simdBaseType); - - if ((imm8 >= count) || (imm8 < 0)) - { - // Using software fallback if index is out of range (throw exception) - break; - } - -#if defined(TARGET_XARCH) - if (varTypeIsIntegral(simdBaseType)) - { - if (varTypeIsLong(simdBaseType)) - { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41_X64)) - { - break; - } - } - else if (!varTypeIsShort(simdBaseType)) - { - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - break; - } - } - } -#endif // TARGET_XARCH - - result = comp->gtNewSimdWithElementNode(retType, op1, op2, op3, simdBaseJitType, simdSize); - break; - } - break; - } - default: { if (sigInfo.numArgs == 0) @@ -528,7 +477,7 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStack Date: Wed, 7 May 2025 14:05:31 -0700 Subject: [PATCH 8/9] JIT: Assert op2 is not a constant for Vector*_WithElement and fix store to load instruction --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 0876e779c49134..cf721daf2d7df2 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1973,6 +1973,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_Vector256_WithElement: case NI_Vector512_WithElement: { + assert(!op2->OperIsConst()); + // Optimize the case where op2 is not a constant. if (!op2->OperIsConst()) { @@ -2014,7 +2016,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) offs); // Offset // Write back the modified vector to the original location. - GetEmitter()->emitIns_R_S(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + GetEmitter()->emitIns_R_S(ins_Load(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0); } From e2e118e07dd8c282fdf4d33a1be905c2844df350 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kendall=20Gonz=C3=A1lez=20Le=C3=B3n?= Date: Wed, 7 May 2025 15:17:02 -0700 Subject: [PATCH 9/9] JIT: Remove if checking because of the previous assert --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 72 ++++++++++----------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index cf721daf2d7df2..3ebd0218fbf9c9 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1973,53 +1973,49 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_Vector256_WithElement: case NI_Vector512_WithElement: { + // Optimize the case where op2 is not a constant. assert(!op2->OperIsConst()); - // Optimize the case where op2 is not a constant. - if (!op2->OperIsConst()) - { - // We don't have an instruction to implement this intrinsic if the index is not a constant. - // So we will use the SIMD temp location to store the vector, set the value and then reload it. - // The range check will already have been performed, so at this point we know we have an index - // within the bounds of the vector. + // We don't have an instruction to implement this intrinsic if the index is not a constant. + // So we will use the SIMD temp location to store the vector, set the value and then reload it. + // The range check will already have been performed, so at this point we know we have an index + // within the bounds of the vector. - unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; - noway_assert(simdInitTempVarNum != BAD_VAR_NUM); + unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum; + noway_assert(simdInitTempVarNum != BAD_VAR_NUM); - bool isEBPbased; - unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased); + bool isEBPbased; + unsigned offs = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased); #if !FEATURE_FIXED_OUT_ARGS - if (!isEBPbased) - { - // Adjust the offset by the amount currently pushed on the CPU stack - offs += genStackLevel; - } + if (!isEBPbased) + { + // Adjust the offset by the amount currently pushed on the CPU stack + offs += genStackLevel; + } #else - assert(genStackLevel == 0); + assert(genStackLevel == 0); #endif // !FEATURE_FIXED_OUT_ARGS - regNumber indexReg = op2->GetRegNum(); - regNumber valueReg = op3->GetRegNum(); // New element value to be stored - - // Store the vector to the temp location. - GetEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), - emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0); - - // Set the desired element. - GetEmitter()->emitIns_ARX_R(ins_Move_Extend(op3->TypeGet(), false), // Store - emitTypeSize(baseType), // Of the vector baseType - valueReg, // From valueReg - (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based - indexReg, // Indexed - genTypeSize(baseType), // by the size of the baseType - offs); // Offset - - // Write back the modified vector to the original location. - GetEmitter()->emitIns_R_S(ins_Load(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), - emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0); - - } + regNumber indexReg = op2->GetRegNum(); + regNumber valueReg = op3->GetRegNum(); // New element value to be stored + + // Store the vector to the temp location. + GetEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), op1Reg, simdInitTempVarNum, 0); + + // Set the desired element. + GetEmitter()->emitIns_ARX_R(ins_Move_Extend(op3->TypeGet(), false), // Store + emitTypeSize(baseType), // Of the vector baseType + valueReg, // From valueReg + (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based + indexReg, // Indexed + genTypeSize(baseType), // by the size of the baseType + offs); // Offset + + // Write back the modified vector to the original location. + GetEmitter()->emitIns_R_S(ins_Load(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)), + emitTypeSize(simdType), targetReg, simdInitTempVarNum, 0); break; }