Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
04875d6
Add AvxVnni.V512 hardware intrinsics
May 19, 2026
0d45e79
Address AvxVnni.V512 review feedback
May 19, 2026
a15d56a
Address remaining AvxVnni.V512 review suggestions
May 19, 2026
cb29856
Address AvxVnni.V512 refinements
May 19, 2026
29530fd
Use unaligned copy for AvxVnni.V512 test vectors
May 19, 2026
218e9e9
Use Vector512.Create in AvxVnni.V512 sample
May 19, 2026
4e83c38
Clarify AvxVnni.V512 config and feature bits
May 19, 2026
dfd9b4b
Allow EVEX VNNI with AvxVnni.V512
May 20, 2026
ab02890
Avoid reverse implication for AvxVnni.V512
May 20, 2026
2c67a32
Address AvxVnni.V512 review refinements
May 20, 2026
d0975f4
Address AvxVnni.V512 review comments
May 21, 2026
1692c7c
Merge branch 'main' into feature/avxvnni.v512
jamesburton May 21, 2026
3dd8111
Address AvxVnni.V512 review feedback
May 21, 2026
d7e6efb
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
May 25, 2026
48d000f
@
May 26, 2026
e554e99
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
May 29, 2026
beac7ea
Address post-merge AvxVnni.V512 review feedback
May 29, 2026
560dfd1
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
May 29, 2026
156f34f
Address Copilot review feedback (post-3rd-merge)
Jun 2, 2026
7347487
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
Jun 2, 2026
ebddfff
Fold AvxVnni.V512 under AVX512v3 grouping per maintainer review
Jun 2, 2026
91cdc6c
Address review nits: restore VNNI in config comments, drop unused NoR…
Jun 8, 2026
e4a25f1
Cover AvxVnni.V512 in CpuId + SmokeTests, enable AVXVNNI via AVX512v3
Jun 8, 2026
fdbb545
Re-comment Avx512Bitalg / Avx512Vpopcntdq checks — types don't exist …
Jun 8, 2026
30bb860
Respect AVX512v3 config when enabling AVXVNNI via the v3 fallback
Jun 8, 2026
d322db3
Move AvxVnni v3-fallback to lookupInstructionSet per maintainer review
Jun 9, 2026
85e6adf
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
jamesburton Jun 10, 2026
1c62727
Merge remote-tracking branch 'upstream/main' into feature/avxvnni.v512
jamesburton Jun 15, 2026
2cb06c7
Wire V512VersionOfIsa for AvxVnni / AVX512v3 with defensive assert
jamesburton Jun 15, 2026
700e933
Remove over-aggressive V512 dispatch assert
jamesburton Jun 15, 2026
80cb14c
Extend NativeAOT lowering assert for NI_AVX512v3_MultiplyWideningAndAdd
jamesburton Jun 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -675,13 +675,13 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntri
#if defined(TARGET_AMD64) || defined(TARGET_X86)
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BMM, W("EnableAVX512BMM"), 1, "Allows AVX512BMM and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BMM, W("EnableAVX512BMM"), 1, "Allows AVX512BMM and dependent hardware intrinsics to be disabled")

RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v2, W("EnableAVX512v2"), 1, "Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v3, W("EnableAVX512v3"), 1, "Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled")
Comment thread
tannergooding marked this conversation as resolved.
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1 and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2 and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v2, W("EnableAVX512v2"), 1, "Allows AVX512 IFMA+VBMI and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v3, W("EnableAVX512v3"), 1, "Allows AVX512 BITALG+VBMI2+VPOPCNTDQ+VNNI and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1 and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2 and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAPX, W("EnableAPX"), 0, "Allows APX and dependent features to be disabled")

RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES, PCLMULQDQ, and dependent hardware intrinsics to be disabled")
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@

#include <minipal/guid.h>

constexpr GUID JITEEVersionIdentifier = { /* 2fa4c0dd-1b2b-4c3f-8a88-cc7b79c4667f */
0x2fa4c0dd,
0x1b2b,
0x4c3f,
{0x8a, 0x88, 0xcc, 0x7b, 0x79, 0xc4, 0x66, 0x7f}
constexpr GUID JITEEVersionIdentifier = { /* 21bf6983-fc9b-4d33-8583-d5b90a7ea60b */
0x21bf6983,
0xfc9b,
0x4d33,
{0x85, 0x83, 0xd5, 0xb9, 0x0a, 0x7e, 0xa6, 0x0b}
};

#endif // JIT_EE_VERSIONING_GUID_H
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
case NI_AVX512v3_MultiplyWideningAndAdd:
case NI_AVX512v3_MultiplyWideningAndAddSaturate:
case NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction:
case NI_AVX512BMM_BitMultiplyMatrix16x16WithXorReduction:
{
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1066,13 +1066,15 @@ HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8x2,
// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Intrinsics for AVX512-BITALG, AVX512-VBMI2, AVX512-VPOPCNTDQ
// Intrinsics for AVX512-BITALG, AVX512-VBMI2, AVX512-VPOPCNTDQ, AVX512-VNNI
#define FIRST_NI_AVX512v3 NI_AVX512v3_Compress
HARDWARE_INTRINSIC(AVX512v3, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(AVX512v3, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512v3, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(AVX512v3, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId)
#define LAST_NI_AVX512v3 NI_AVX512v3_ExpandLoad
HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAdd, 64, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512v3, MultiplyWideningAndAddSaturate, 64, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, 5, -1, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg)
#define LAST_NI_AVX512v3 NI_AVX512v3_MultiplyWideningAndAddSaturate
Comment thread
jamesburton marked this conversation as resolved.

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions IntCost FltCost Category Flags
Expand Down
22 changes: 21 additions & 1 deletion src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVXVNNIINT_V512;
}

case InstructionSet_AVXVNNI:
case InstructionSet_AVX512v3:
{
// AvxVnni.V512 lifts under AVX512v3, which carries the EVEX-encoded
// VPDPBUSD / VPDPWSSD on ZMM. The class-name dispatch in
// lookupInstructionSet has already chosen between AVXVNNI and AVX512v3
// based on the available CPUID bits, and the caller's downstream
// compSupportsHWIntrinsic(InstructionSet_AVX512v3) check gates the
// result correctly: on machines without AVX-512 (e.g. Tiger Lake)
// AVX512v3 isn't supported and IsSupported returns false.
return InstructionSet_AVX512v3;
}

default:
{
return InstructionSet_NONE;
Expand Down Expand Up @@ -269,7 +282,14 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
{
if (className[7] == '\0')
{
return InstructionSet_AVXVNNI;
if (compSupportsHWIntrinsic(InstructionSet_AVXVNNI))
{
return InstructionSet_AVXVNNI;
}
else
{
return InstructionSet_AVX512v3;
}
Comment thread
jamesburton marked this conversation as resolved.
}
Comment on lines 282 to 293
else if (strncmp(className + 7, "Int", 3) == 0)
{
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -414,13 +414,13 @@ RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic",
#if defined(TARGET_AMD64) || defined(TARGET_X86)
RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512BMM, "EnableAVX512BMM", 1) // Allows AVX10v2 and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512BMM, "EnableAVX512BMM", 1) // Allows AVX512BMM and dependent hardware intrinsics to be disabled

RELEASE_CONFIG_INTEGER(EnableAVX512v2, "EnableAVX512v2", 1) // Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512v3, "EnableAVX512v3", 1) // Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled
Comment thread
tannergooding marked this conversation as resolved.
RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1 and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 0) // Allows AVX10v2 and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512v2, "EnableAVX512v2", 1) // Allows AVX512 IFMA+VBMI and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512v3, "EnableAVX512v3", 1) // Allows AVX512 BITALG+VBMI2+VPOPCNTDQ+VNNI and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1 and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 0) // Allows AVX10v2 and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAPX, "EnableAPX", 0) // Allows APX and dependent features to be disabled

RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", 1) // Allows AES, PCLMULQDQ, and dependent hardware intrinsics to be disabled
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10247,6 +10247,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
default:
{
assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) ||
(intrinsicId == NI_AVX512v3_MultiplyWideningAndAdd) ||
(intrinsicId == NI_AVX512v3_MultiplyWideningAndAddSaturate) ||
(intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512));
TryMakeSrcContainedOrRegOptional(node, op3);
break;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2748,6 +2748,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

case NI_AVXVNNI_MultiplyWideningAndAdd:
case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
case NI_AVX512v3_MultiplyWideningAndAdd:
case NI_AVX512v3_MultiplyWideningAndAddSaturate:
case NI_AVXVNNIINT_MultiplyWideningAndAdd:
case NI_AVXVNNIINT_MultiplyWideningAndAddSaturate:
case NI_AVXVNNIINT_V512_MultiplyWideningAndAdd:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ private static class XArchIntrinsicConstants
public const int Vaes = (1 << 15);
public const int WaitPkg = (1 << 16);
public const int X86Serialize = (1 << 17);
Comment thread
jamesburton marked this conversation as resolved.
public const int Avx512Bmm = (1 << 18); // NativeAOT does not currently consume this here.

public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags)
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/tools/Common/InstructionSetHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnniint_v512");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avxvnni_v512");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vp2intersect");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes_v512");
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512");
Expand Down
Loading
Loading