diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 9deffd0883a5e5..a2c0133c534ef3 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1455,8 +1455,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX instruction ins_Copy(var_types dstType); instruction ins_Copy(regNumber srcReg, var_types dstType); - instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp); - instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp); static instruction ins_FloatStore(var_types type = TYP_DOUBLE); static instruction ins_FloatCopy(var_types type = TYP_DOUBLE); instruction ins_FloatConv(var_types to, var_types from); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 90c75009fc085b..c6fc52a101ffa9 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4437,17 +4437,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere } #endif instruction copyIns = ins_Copy(regNum, destMemType); -#if defined(TARGET_XARCH) - // For INS_mov_xmm2i, the source xmm reg comes first. - if (copyIns == INS_mov_xmm2i) - { - GetEmitter()->emitIns_R_R(copyIns, size, regNum, destRegNum); - } - else -#endif // TARGET_XARCH - { - GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum); - } + GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum); #ifdef USING_SCOPE_INFO psiMoveToReg(varNum); #endif // USING_SCOPE_INFO @@ -12067,42 +12057,15 @@ void CodeGen::genRegCopy(GenTree* treeNode) } return; } + + regNumber srcReg = genConsumeReg(op1); var_types targetType = treeNode->TypeGet(); regNumber targetReg = treeNode->GetRegNum(); + assert(srcReg != REG_NA); assert(targetReg != REG_NA); assert(targetType != TYP_STRUCT); - // Check whether this node and the node from which we're copying the value have - // different register types. This can happen if (currently iff) we have a SIMD - // vector type that fits in an integer register, in which case it is passed as - // an argument, or returned from a call, in an integer register and must be - // copied if it's in an xmm register. - - bool srcFltReg = (varTypeUsesFloatReg(op1)); - bool tgtFltReg = (varTypeUsesFloatReg(treeNode)); - if (srcFltReg != tgtFltReg) - { - instruction ins; - regNumber fpReg; - regNumber intReg; - if (tgtFltReg) - { - ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet()); - fpReg = targetReg; - intReg = op1->GetRegNum(); - } - else - { - ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet()); - intReg = targetReg; - fpReg = op1->GetRegNum(); - } - inst_RV_RV(ins, fpReg, intReg, targetType); - } - else - { - inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType); - } + inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); if (op1->IsLocal()) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c4e184b836a9a0..f4467fecac0dab 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2743,7 +2743,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, srcXmmReg, srcIntReg); + emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg); emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg); #ifdef TARGET_X86 // For x86, we need one more to convert it from 8 bytes to 16 bytes. @@ -5039,9 +5039,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // integer and floating point registers so, let's do that. if (call->IsVarargs() && varTypeIsFloating(argNode)) { - regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum()); - instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG); - inst_RV_RV(ins, argNode->GetRegNum(), targetReg); + regNumber srcReg = argNode->GetRegNum(); + regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum()); + inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg); } #endif // FEATURE_VARARG } @@ -5783,9 +5783,8 @@ void CodeGen::genJmpMethod(GenTree* jmp) if (varTypeIsFloating(loadType)) { - intArgReg = compiler->getCallArgIntRegister(argReg); - instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); - inst_RV_RV(ins, argReg, intArgReg, loadType); + intArgReg = compiler->getCallArgIntRegister(argReg); + inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); } else { @@ -5824,7 +5823,6 @@ void CodeGen::genJmpMethod(GenTree* jmp) regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask; if (remainingIntArgMask != RBM_NONE) { - instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE); GetEmitter()->emitDisableGC(); for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum) { @@ -5838,7 +5836,7 @@ void CodeGen::genJmpMethod(GenTree* jmp) // also load it in corresponding float arg reg regNumber floatReg = compiler->getCallArgFloatRegister(argReg); - inst_RV_RV(insCopyIntToFloat, floatReg, argReg); + inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg); } argOffset += REGSIZE_BYTES; @@ -6591,8 +6589,9 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Copy the floating-point value to an integer register. If we copied a float to a long, then // right-shift the value so the high 32 bits of the floating-point value sit in the low 32 // bits of the integer register. - instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); - inst_RV_RV(ins, op1->GetRegNum(), tmpReg, targetType); + regNumber srcReg = op1->GetRegNum(); + var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG); + inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType); if (targetType == TYP_DOUBLE) { // right shift by 32 bits to get to exponent. @@ -6661,7 +6660,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point // value, no matter the floating-point type. - inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT); + inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT); // Mask exponent with all 1's and check if the exponent is all 1's inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE); @@ -7082,22 +7081,7 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr assert(dstFltReg == genIsValidFloatReg(targetReg)); if (srcFltReg != dstFltReg) { - instruction ins; - regNumber fltReg; - regNumber intReg; - if (dstFltReg) - { - ins = ins_CopyIntToFloat(srcType, targetType); - fltReg = targetReg; - intReg = srcReg; - } - else - { - ins = ins_CopyFloatToInt(srcType, targetType); - intReg = targetReg; - fltReg = srcReg; - } - inst_RV_RV(ins, fltReg, intReg, targetType); + inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType); } else if (targetReg != srcReg) { @@ -8760,9 +8744,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) #if FEATURE_VARARG if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType)) { - regNumber intArgReg = compiler->getCallArgIntRegister(argReg); - instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG); - inst_RV_RV(ins, argReg, intArgReg, loadType); + regNumber intArgReg = compiler->getCallArgIntRegister(argReg); + inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType); } #endif // FEATURE_VARARG } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 0e78dac0fe0322..a6b3d4a14c6fd4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -506,6 +506,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr) { switch (ins) { + case INS_movd: // TODO-Cleanup: replace with movq, https://github.com/dotnet/runtime/issues/47943. case INS_andn: case INS_bextr: case INS_blsi: @@ -518,8 +519,6 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr) case INS_cvtss2si: case INS_cvtsi2sd: case INS_cvtsi2ss: - case INS_mov_xmm2i: - case INS_mov_i2xmm: case INS_movnti: case INS_mulx: case INS_pdep: @@ -1239,7 +1238,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_cvtsd2si: case INS_cvtss2si: case INS_extractps: - case INS_mov_xmm2i: + case INS_movd: case INS_movmskpd: case INS_movmskps: case INS_mulx: @@ -8837,15 +8836,7 @@ void emitter::emitDispIns( case IF_RRD_RRD: case IF_RWR_RRD: case IF_RRW_RRD: - if (ins == INS_mov_i2xmm) - { - printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); - } - else if (ins == INS_mov_xmm2i) - { - printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - } - else if (ins == INS_pmovmskb) + if (ins == INS_pmovmskb) { printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); } @@ -11447,11 +11438,19 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regNumber reg2 = id->idReg2(); emitAttr size = id->idOpSize(); - // Get the 'base' opcode - code = insCodeRM(ins); - code = AddVexPrefixIfNeeded(ins, code, size); if (IsSSEOrAVXInstruction(ins)) { + assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2))); + + if ((ins != INS_movd) || isFloatReg(reg1)) + { + code = insCodeRM(ins); + } + else + { + code = insCodeMR(ins); + } + code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code); if (TakesRexWPrefix(ins, size)) @@ -11461,6 +11460,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins))) { + assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins)); + code = insCodeRM(ins); + code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE); #ifdef TARGET_AMD64 @@ -11472,6 +11474,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } else if (ins == INS_movsxd) { + assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins)); + code = insCodeRM(ins); + code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code); #endif // TARGET_AMD64 @@ -11480,6 +11485,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt)) { + assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins)); + code = insCodeRM(ins); + code = AddVexPrefixIfNeeded(ins, code, size); code = insEncodeRMreg(ins, code); if ((ins == INS_crc32) && (size > EA_1BYTE)) { @@ -11499,7 +11507,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) #endif // FEATURE_HW_INTRINSICS else { - code = insEncodeMRreg(ins, insCodeMR(ins)); + assert(!TakesVexPrefix(ins)); + code = insCodeMR(ins); + code = insEncodeMRreg(ins, code); if (ins != INS_test) { @@ -11543,17 +11553,27 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } } - regNumber reg345 = REG_NA; + regNumber regFor012Bits = reg2; + regNumber regFor345Bits = REG_NA; if (IsBMIInstruction(ins)) { - reg345 = getBmiRegNumber(ins); + regFor345Bits = getBmiRegNumber(ins); + } + if (regFor345Bits == REG_NA) + { + regFor345Bits = reg1; } - if (reg345 == REG_NA) + if (ins == INS_movd) { - reg345 = id->idReg1(); + assert(isFloatReg(reg1) != isFloatReg(reg2)); + if (isFloatReg(reg2)) + { + std::swap(regFor012Bits, regFor345Bits); + } } - unsigned regCode = insEncodeReg345(ins, reg345, size, &code); - regCode |= insEncodeReg012(ins, reg2, size, &code); + + unsigned regCode = insEncodeReg345(ins, regFor345Bits, size, &code); + regCode |= insEncodeReg012(ins, regFor012Bits, size, &code); if (TakesVexPrefix(ins)) { @@ -11648,7 +11668,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } } - emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + emitGCregLiveUpd(id->idGCref(), reg1, dst); break; case IF_RRW_RRD: @@ -11668,13 +11688,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) */ case INS_xor: - assert(id->idReg1() == id->idReg2()); - emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + assert(reg1 == reg2); + emitGCregLiveUpd(id->idGCref(), reg1, dst); break; case INS_or: case INS_and: - emitGCregDeadUpd(id->idReg1(), dst); + emitGCregDeadUpd(reg1, dst); break; case INS_add: @@ -11691,7 +11711,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub))); #endif // Mark r1 as holding a byref - emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst); + emitGCregLiveUpd(GCT_BYREF, reg1, dst); break; default: @@ -11773,15 +11793,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) case IF_RWR_RRD: case IF_RRW_RRD: case IF_RWR_RRD_RRD: - // INS_movxmm2i writes to reg2. - if (ins == INS_mov_xmm2i) - { - emitGCregDeadUpd(id->idReg2(), dst); - } - else - { - emitGCregDeadUpd(id->idReg1(), dst); - } + emitGCregDeadUpd(reg1, dst); break; default: @@ -14681,18 +14693,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_25C; break; - case INS_mov_xmm2i: - // movd reg, xmm - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case INS_mov_i2xmm: - // movd xmm, reg - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; - case INS_movd: if (memAccessKind == PERFSCORE_MEMORY_NONE) { diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index ddca579f34ab11..a805128ab950cc 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1451,7 +1451,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); op1Reg = op1->GetRegNum(); - emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); + emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg); } else { @@ -1562,7 +1562,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { // extract instructions return to GP-registers, so it needs int size as the emitsize inst_RV_TT_IV(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1, i); - emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg); + emit->emitIns_R_R(INS_movd, EA_4BYTE, targetReg, tmpTargetReg); } else { @@ -1688,7 +1688,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) assert(numArgs == 1); assert((baseType == TYP_INT) || (baseType == TYP_UINT)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg); + emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg); break; } diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index dde116ccfac725..a9c4696d511d3b 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -43,7 +43,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector3, HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, AsVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) // The instruction generated for float/double depends on which ISAs are supported HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) @@ -80,7 +80,7 @@ HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -251,17 +251,17 @@ HARDWARE_INTRINSIC(SSE2, CompareOrdered, HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -318,12 +318,12 @@ HARDWARE_INTRINSIC(SSE2, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE2 64-bit-only Intrinsics -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(SSE2_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -519,8 +519,8 @@ HARDWARE_INTRINSIC(AVX2, CompareEqual, HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 06ef501a5220ac..a1135a347f1b22 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1836,14 +1836,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType) return ins_Copy(dstType); } #if defined(TARGET_XARCH) - if (dstIsFloatReg) - { - return INS_mov_i2xmm; - } - else - { - return INS_mov_xmm2i; - } + return INS_movd; #elif defined(TARGET_ARM64) if (dstIsFloatReg) { @@ -2021,32 +2014,6 @@ instruction CodeGen::ins_FloatCompare(var_types type) return (type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd; } -instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType) -{ - // On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register. - assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG)); - -#if !defined(TARGET_64BIT) - // No 64-bit registers on x86. - assert((srcType != TYP_LONG) && (srcType != TYP_ULONG)); -#endif // !defined(TARGET_64BIT) - - return INS_mov_i2xmm; -} - -instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType) -{ - // On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register. - assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG)); - -#if !defined(TARGET_64BIT) - // No 64-bit registers on x86. - assert((dstType != TYP_LONG) && (dstType != TYP_ULONG)); -#endif // !defined(TARGET_64BIT) - - return INS_mov_xmm2i; -} - instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type) { switch (oper) @@ -2166,36 +2133,6 @@ instruction CodeGen::ins_FloatCopy(var_types type) return INS_vmov; } -instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType) -{ - assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE)); - assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG)); - - if ((srcType == TYP_LONG) || (srcType == TYP_ULONG)) - { - return INS_vmov_i2d; - } - else - { - return INS_vmov_i2f; - } -} - -instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType) -{ - assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE)); - assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG)); - - if ((dstType == TYP_LONG) || (dstType == TYP_ULONG)) - { - return INS_vmov_d2i; - } - else - { - return INS_vmov_f2i; - } -} - instruction CodeGen::ins_FloatCompare(var_types type) { // Not used and not implemented @@ -2308,24 +2245,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from) unreached(); } -#elif defined(TARGET_ARM64) -instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType) -{ - assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE)); - assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG)); - - return INS_mov; -} - -instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType) -{ - assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE)); - assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG)); - - return INS_mov; -} - -#endif // TARGET_ARM64 +#endif // TARGET_ARM /***************************************************************************** * diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index c6a50690f70290..25e5de73caf746 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -181,12 +181,10 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) // These are the SSE instructions used on x86 -INST3(mov_i2xmm, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg -INST3(mov_xmm2i, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros. -INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) -INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None) +INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move Double/Quadword between mm regs <-> memory/r32/r64 regs, cleanup https://github.com/dotnet/runtime/issues/47943 +INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None) // Move Quadword between memory/mm <-> regs, cleanup https://github.com/dotnet/runtime/issues/47943 INST3(movsdsse2, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_Flags_IsDstSrcSrcAVXInstruction) INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction) diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index f1bafb9991d803..6897dd01954f60 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -531,17 +531,13 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // For AVX2, move it to all 4 of the 64-bit lanes using: // vpbroadcastq targetReg, targetReg - instruction ins; - regNumber op1loReg = genConsumeReg(op1lo); - ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); - inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT)); + inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), targetReg, op1loReg, TYP_INT); regNumber tmpReg = simdNode->GetSingleTempReg(); regNumber op1hiReg = genConsumeReg(op1hi); - ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT); - inst_RV_RV(ins, tmpReg, op1hiReg, TYP_INT, emitTypeSize(TYP_INT)); + inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), tmpReg, op1hiReg, TYP_INT); ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); GetEmitter()->emitIns_R_I(ins, EA_16BYTE, tmpReg, 4); // shift left by 4 bytes @@ -600,9 +596,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) regNumber srcReg = genConsumeReg(op1); if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG) { - ins = ins_CopyIntToFloat(baseType, TYP_FLOAT); - assert(ins != INS_invalid); - inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType)); + inst_RV_RV(ins_Copy(srcReg, TYP_FLOAT), targetReg, srcReg, baseType, emitTypeSize(baseType)); srcReg = targetReg; } @@ -653,17 +647,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) { if (op1Reg != targetReg) { - if (varTypeIsFloating(baseType)) - { - ins = ins_Copy(targetType); - } - else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG) - { - ins = ins_CopyIntToFloat(baseType, TYP_FLOAT); - } - - assert(ins != INS_invalid); - inst_RV_RV(ins, targetReg, op1Reg, baseType, emitTypeSize(baseType)); + inst_RV_RV(ins_Copy(op1Reg, TYP_FLOAT), targetReg, op1Reg, baseType, emitTypeSize(baseType)); } } @@ -845,12 +829,12 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) // prepare mask #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); #else if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) { GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); } else { @@ -910,14 +894,13 @@ void CodeGen::genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, instruction ins = getOpForSIMDIntrinsic(intrinsicID, baseType); if (intrinsicID == SIMDIntrinsicConvertToDouble) { - // Note that for mov_xmm2i, the int register is always in the reg2 position - inst_RV_RV(INS_mov_xmm2i, tmpReg, tmpIntReg, TYP_LONG); + inst_RV_RV(INS_movd, tmpIntReg, tmpReg, TYP_LONG); inst_RV_RV(ins, targetReg, tmpIntReg, baseType, emitActualTypeSize(baseType)); } else { inst_RV_RV(ins, tmpIntReg, tmpReg, baseType, emitActualTypeSize(baseType)); - inst_RV_RV(INS_mov_i2xmm, targetReg, tmpIntReg, TYP_LONG); + inst_RV_RV(INS_movd, targetReg, tmpIntReg, TYP_LONG); } } @@ -1008,10 +991,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting upper 32 bits #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4530000000000000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); #else GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif if (level == SIMD_AVX2_Supported) @@ -1030,10 +1013,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting lower 32 bits #ifdef TARGET_AMD64 GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4330000000000000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG); #else GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); #endif if (level == SIMD_AVX2_Supported) @@ -1123,7 +1106,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting upper 32 bits GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); if (level == SIMD_AVX2_Supported) @@ -1141,7 +1124,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // prepare mask for converting lower 32 bits GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000); - inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT); + inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT); GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4); if (level == SIMD_AVX2_Supported) @@ -1883,8 +1866,6 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) { // We need a temp xmm register if the baseType is not floating point and // accessing non-zero'th element. - instruction ins; - if (byteShiftCnt != 0) { assert(tmpReg != REG_NA); @@ -1895,7 +1876,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } assert((byteShiftCnt > 0) && (byteShiftCnt <= 32)); - ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); + instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); GetEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt); } else @@ -1904,9 +1885,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) } assert(tmpReg != REG_NA); - ins = ins_CopyFloatToInt(TYP_FLOAT, baseType); - // (Note that for mov_xmm2i, the int register is always in the reg2 position.) - inst_RV_RV(ins, tmpReg, targetReg, baseType); + inst_RV_RV(ins_Copy(tmpReg, baseType), targetReg, tmpReg, baseType); } } @@ -1987,9 +1966,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) assert(genIsValidIntReg(tmpReg)); // Move the value from xmm reg to an int reg - instruction ins = ins_CopyFloatToInt(TYP_FLOAT, TYP_INT); - // (Note that for mov_xmm2i, the int register is always in the reg2 position. - inst_RV_RV(ins, op2Reg, tmpReg, baseType); + inst_RV_RV(ins_Copy(op2Reg, TYP_INT), tmpReg, op2Reg, baseType); assert((index >= 0) && (index <= 15));