Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1455,8 +1455,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

instruction ins_Copy(var_types dstType);
instruction ins_Copy(regNumber srcReg, var_types dstType);
instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
instruction ins_FloatConv(var_types to, var_types from);
Expand Down
47 changes: 5 additions & 42 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4437,17 +4437,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
}
#endif
instruction copyIns = ins_Copy(regNum, destMemType);
#if defined(TARGET_XARCH)
// For INS_mov_xmm2i, the source xmm reg comes first.
if (copyIns == INS_mov_xmm2i)
{
GetEmitter()->emitIns_R_R(copyIns, size, regNum, destRegNum);
}
else
#endif // TARGET_XARCH
{
GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
}
GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
#ifdef USING_SCOPE_INFO
psiMoveToReg(varNum);
#endif // USING_SCOPE_INFO
Expand Down Expand Up @@ -12067,42 +12057,15 @@ void CodeGen::genRegCopy(GenTree* treeNode)
}
return;
}

regNumber srcReg = genConsumeReg(op1);
var_types targetType = treeNode->TypeGet();
regNumber targetReg = treeNode->GetRegNum();
assert(srcReg != REG_NA);
assert(targetReg != REG_NA);
assert(targetType != TYP_STRUCT);

// Check whether this node and the node from which we're copying the value have
// different register types. This can happen if (currently iff) we have a SIMD
// vector type that fits in an integer register, in which case it is passed as
// an argument, or returned from a call, in an integer register and must be
// copied if it's in an xmm register.

bool srcFltReg = (varTypeUsesFloatReg(op1));
bool tgtFltReg = (varTypeUsesFloatReg(treeNode));
if (srcFltReg != tgtFltReg)
{
instruction ins;
regNumber fpReg;
regNumber intReg;
if (tgtFltReg)
{
ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
fpReg = targetReg;
intReg = op1->GetRegNum();
}
else
{
ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
intReg = targetReg;
fpReg = op1->GetRegNum();
}
inst_RV_RV(ins, fpReg, intReg, targetType);
}
else
{
inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
}
inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);

if (op1->IsLocal())
{
Expand Down
45 changes: 14 additions & 31 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2743,7 +2743,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
}
else
{
emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, srcXmmReg, srcIntReg);
emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg);
emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg);
#ifdef TARGET_X86
// For x86, we need one more to convert it from 8 bytes to 16 bytes.
Expand Down Expand Up @@ -5039,9 +5039,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
// integer and floating point registers so, let's do that.
if (call->IsVarargs() && varTypeIsFloating(argNode))
{
regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
inst_RV_RV(ins, argNode->GetRegNum(), targetReg);
regNumber srcReg = argNode->GetRegNum();
regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg);
}
#endif // FEATURE_VARARG
}
Expand Down Expand Up @@ -5783,9 +5783,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)

if (varTypeIsFloating(loadType))
{
intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
intArgReg = compiler->getCallArgIntRegister(argReg);
inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
else
{
Expand Down Expand Up @@ -5824,7 +5823,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
if (remainingIntArgMask != RBM_NONE)
{
instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
GetEmitter()->emitDisableGC();
for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
{
Expand All @@ -5838,7 +5836,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)

// also load it in corresponding float arg reg
regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg);
}

argOffset += REGSIZE_BYTES;
Expand Down Expand Up @@ -6591,8 +6589,9 @@ void CodeGen::genCkfinite(GenTree* treeNode)
// Copy the floating-point value to an integer register. If we copied a float to a long, then
// right-shift the value so the high 32 bits of the floating-point value sit in the low 32
// bits of the integer register.
instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
inst_RV_RV(ins, op1->GetRegNum(), tmpReg, targetType);
regNumber srcReg = op1->GetRegNum();
var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType);
Comment thread
tannergooding marked this conversation as resolved.
Outdated
if (targetType == TYP_DOUBLE)
{
// right shift by 32 bits to get to exponent.
Expand Down Expand Up @@ -6661,7 +6660,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)

// Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
// value, no matter the floating-point type.
inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT);

// Mask exponent with all 1's and check if the exponent is all 1's
inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
Expand Down Expand Up @@ -7082,22 +7081,7 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr
assert(dstFltReg == genIsValidFloatReg(targetReg));
if (srcFltReg != dstFltReg)
{
instruction ins;
regNumber fltReg;
regNumber intReg;
if (dstFltReg)
{
ins = ins_CopyIntToFloat(srcType, targetType);
fltReg = targetReg;
intReg = srcReg;
}
else
{
ins = ins_CopyFloatToInt(srcType, targetType);
intReg = targetReg;
fltReg = srcReg;
}
inst_RV_RV(ins, fltReg, intReg, targetType);
inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
}
else if (targetReg != srcReg)
{
Expand Down Expand Up @@ -8760,9 +8744,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
{
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
inst_RV_RV(ins, argReg, intArgReg, loadType);
regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
#endif // FEATURE_VARARG
}
Expand Down
96 changes: 48 additions & 48 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
{
switch (ins)
{
case INS_movd: // TODO-Cleanup: replace with movq, https://github.com/dotnet/runtime/issues/47943.
case INS_andn:
case INS_bextr:
case INS_blsi:
Expand All @@ -518,8 +519,6 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_cvtss2si:
case INS_cvtsi2sd:
case INS_cvtsi2ss:
case INS_mov_xmm2i:
case INS_mov_i2xmm:
case INS_movnti:
case INS_mulx:
case INS_pdep:
Expand Down Expand Up @@ -1239,7 +1238,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_cvtsd2si:
case INS_cvtss2si:
case INS_extractps:
case INS_mov_xmm2i:
case INS_movd:
case INS_movmskpd:
case INS_movmskps:
case INS_mulx:
Expand Down Expand Up @@ -8837,15 +8836,7 @@ void emitter::emitDispIns(
case IF_RRD_RRD:
case IF_RWR_RRD:
case IF_RRW_RRD:
if (ins == INS_mov_i2xmm)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
}
else if (ins == INS_mov_xmm2i)
{
printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
}
else if (ins == INS_pmovmskb)
if (ins == INS_pmovmskb)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
}
Expand Down Expand Up @@ -11447,11 +11438,19 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
regNumber reg2 = id->idReg2();
emitAttr size = id->idOpSize();

// Get the 'base' opcode
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
if (IsSSEOrAVXInstruction(ins))
{
assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2)));

if ((ins != INS_movd) || isFloatReg(reg1))
{
code = insCodeRM(ins);
}
else
{
code = insCodeMR(ins);
}
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);

if (TakesRexWPrefix(ins, size))
Expand All @@ -11461,6 +11460,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you confirm why we added insCodeRM() and AddVexPrefixIfNeeded() here and below?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was it missing currently?

@sandreenko sandreenko Feb 8, 2021

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we had logic like:

    // Get the 'base' opcode	
    code = insCodeRM(ins);	
    code = AddVexPrefixIfNeeded(ins, code, size);
    if ()
    { 
        code = insEncodeRMreg(ins, code); // use the existing value.
    } 
    else if ()
    {
       code = insCodeMR(ins);	//rewrite the existing value without reading
    }

so it was hard to follow, so I moved this

    code = insCodeRM(ins);	
    code = AddVexPrefixIfNeeded(ins, code, size);

under conditions where the result code is used.

see 11451 in the base.

code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
#ifdef TARGET_AMD64

Expand All @@ -11472,6 +11474,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else if (ins == INS_movsxd)
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);

#endif // TARGET_AMD64
Expand All @@ -11480,6 +11485,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
(ins == INS_tzcnt))
{
assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
code = insCodeRM(ins);
code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);
if ((ins == INS_crc32) && (size > EA_1BYTE))
{
Expand All @@ -11499,7 +11507,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
#endif // FEATURE_HW_INTRINSICS
else
{
code = insEncodeMRreg(ins, insCodeMR(ins));
assert(!TakesVexPrefix(ins));
code = insCodeMR(ins);
code = insEncodeMRreg(ins, code);

if (ins != INS_test)
{
Expand Down Expand Up @@ -11543,17 +11553,27 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
}

regNumber reg345 = REG_NA;
regNumber regFor012Bits = reg2;
regNumber regFor345Bits = REG_NA;
if (IsBMIInstruction(ins))
{
reg345 = getBmiRegNumber(ins);
regFor345Bits = getBmiRegNumber(ins);
}
if (regFor345Bits == REG_NA)
{
regFor345Bits = reg1;
}
if (reg345 == REG_NA)
if (ins == INS_movd)
{
reg345 = id->idReg1();
assert(isFloatReg(reg1) != isFloatReg(reg2));
if (isFloatReg(reg2))
{
std::swap(regFor012Bits, regFor345Bits);
}
}
unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
regCode |= insEncodeReg012(ins, reg2, size, &code);

unsigned regCode = insEncodeReg345(ins, regFor345Bits, size, &code);
regCode |= insEncodeReg012(ins, regFor012Bits, size, &code);

if (TakesVexPrefix(ins))
{
Expand Down Expand Up @@ -11648,7 +11668,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
}

emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;

case IF_RRW_RRD:
Expand All @@ -11668,13 +11688,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)

*/
case INS_xor:
assert(id->idReg1() == id->idReg2());
emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
assert(reg1 == reg2);
emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;

case INS_or:
case INS_and:
emitGCregDeadUpd(id->idReg1(), dst);
emitGCregDeadUpd(reg1, dst);
break;

case INS_add:
Expand All @@ -11691,7 +11711,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
#endif
// Mark r1 as holding a byref
emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
emitGCregLiveUpd(GCT_BYREF, reg1, dst);
break;

default:
Expand Down Expand Up @@ -11773,15 +11793,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
case IF_RWR_RRD:
case IF_RRW_RRD:
case IF_RWR_RRD_RRD:
// INS_movxmm2i writes to reg2.
if (ins == INS_mov_xmm2i)
{
emitGCregDeadUpd(id->idReg2(), dst);
}
else
{
emitGCregDeadUpd(id->idReg1(), dst);
}
emitGCregDeadUpd(reg1, dst);
break;

default:
Expand Down Expand Up @@ -14681,18 +14693,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_25C;
break;

case INS_mov_xmm2i:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like these perfscores and the ones used for INS_movd are tracked slightly differently?

// movd reg, xmm
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case INS_mov_i2xmm:
// movd xmm, reg
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;
break;

case INS_movd:
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
Expand Down
Loading