x86/cpuid: Add AVX512_4VNNIW and AVX512_4FMAPS support
authorHe Chen <he.chen@linux.intel.com>
Mon, 21 Nov 2016 06:01:14 +0000 (14:01 +0800)
committerAndrew Cooper <andrew.cooper3@citrix.com>
Fri, 2 Dec 2016 12:56:38 +0000 (12:56 +0000)
Add two new AVX512 subfeatures support for guest.

AVX512_4VNNIW:
Vector instructions for deep learning enhanced word variable precision.

AVX512_4FMAPS:
Vector instructions for deep learning floating-point single precision.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: He Chen <he.chen@linux.intel.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Wei Liu <wei.liu2@citrix.com>
tools/libxc/xc_cpuid_x86.c
tools/misc/xen-cpuid.c
xen/arch/x86/cpu/common.c
xen/arch/x86/cpuid.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/traps.c
xen/include/asm-x86/cpuid.h
xen/include/public/arch-x86/cpufeatureset.h
xen/tools/gen-cpuid.py

index 2ad9aebfce66689369215174c1579264c951132e..e9e3691366e313a3cfd7cb5b99b8a9a88e8c389f 100644 (file)
@@ -547,13 +547,15 @@ static void xc_cpuid_hvm_policy(xc_interface *xch,
         {
             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
+            regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)];
         }
         else
         {
             regs[1] = 0;
             regs[2] = 0;
+            regs[3] = 0;
         }
-        regs[0] = regs[3] = 0;
+        regs[0] = 0;
         break;
 
     case 0x0000000d:
@@ -638,13 +640,15 @@ static void xc_cpuid_pv_policy(xc_interface *xch,
         {
             regs[1] = info->featureset[featureword_of(X86_FEATURE_FSGSBASE)];
             regs[2] = info->featureset[featureword_of(X86_FEATURE_PREFETCHWT1)];
+            regs[3] = info->featureset[featureword_of(X86_FEATURE_AVX512_4VNNIW)];
         }
         else
         {
             regs[1] = 0;
             regs[2] = 0;
+            regs[3] = 0;
         }
-        regs[0] = regs[3] = 0;
+        regs[0] = 0;
         break;
 
     case 0x0000000d:
index 44991f6329d49c165373f4157e7eda664484e354..5d66e9496da43a62cea6dda66953086d15c983b4 100644 (file)
@@ -143,6 +143,15 @@ static const char *str_e8b[32] =
     [1 ... 31] = "REZ",
 };
 
+static const char *str_7d0[32] =
+{
+    [0 ... 1] = "REZ",
+
+    [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
+
+    [4 ... 31] = "REZ",
+};
+
 static struct {
     const char *name;
     const char *abbr;
@@ -158,6 +167,7 @@ static struct {
     { "0x00000007:0.ecx", "7c0", str_7c0 },
     { "0x80000007.edx",   "e7d", str_e7d },
     { "0x80000008.ebx",   "e8b", str_e8b },
+    { "0x00000007:0.edx", "7d0", str_7d0 },
 };
 
 #define COL_ALIGN "18"
index 1d78ab4d8468ef5a8102e0a60c15170d4c658c41..f771287d51b4b89722c4fa4eacfb9e4a9a6e92b0 100644 (file)
@@ -325,7 +325,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
                cpuid_count(0x00000007, 0, &tmp,
                            &c->x86_capability[cpufeat_word(X86_FEATURE_FSGSBASE)],
                            &c->x86_capability[cpufeat_word(X86_FEATURE_PKU)],
-                           &tmp);
+                           &c->x86_capability[cpufeat_word(X86_FEATURE_AVX512_4VNNIW)]);
 }
 
 /*
index 63b2db99b86475deac6822946c2c1356d36d9d74..3e85a63c4132f66b7ac16b69cc0c615ba22f277d 100644 (file)
@@ -78,7 +78,7 @@ static void __init calculate_raw_featureset(void)
         cpuid_count(0x7, 0, &tmp,
                     &raw_featureset[FEATURESET_7b0],
                     &raw_featureset[FEATURESET_7c0],
-                    &tmp);
+                    &raw_featureset[FEATURESET_7d0]);
     if ( max >= 0xd )
         cpuid_count(0xd, 1,
                     &raw_featureset[FEATURESET_Da1],
index 25dc759ca9774e3b60e798fdcde24360db751207..9dace73c775ac0381249ccb085ef78cda9ad9c2f 100644 (file)
@@ -3525,6 +3525,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                      special_features[FEATURESET_7b0]);
 
             *ecx &= hvm_featureset[FEATURESET_7c0];
+            *edx &= hvm_featureset[FEATURESET_7d0];
 
             /* Don't expose HAP-only features to non-hap guests. */
             if ( !hap_enabled(d) )
index b4642118287363d89c051d02a78c264a4605314a..2121474004265ad68ccdcafe1def289d570e616c 100644 (file)
@@ -1133,6 +1133,7 @@ void pv_cpuid(struct cpu_user_regs *regs)
                   special_features[FEATURESET_7b0]);
 
             c &= pv_featureset[FEATURESET_7c0];
+            d &= pv_featureset[FEATURESET_7d0];
 
             if ( !is_pvh_domain(currd) )
             {
@@ -1147,8 +1148,8 @@ void pv_cpuid(struct cpu_user_regs *regs)
             }
         }
         else
-            b = c = 0;
-        a = d = 0;
+            b = c = d = 0;
+        a = 0;
         break;
 
     case XSTATE_CPUID:
index 2372474791bdc9881a6bf0d254ace206ce560b7d..ec8bbb5378e1d322c0ca863592dcf7c11211444a 100644 (file)
@@ -17,6 +17,7 @@
 #define FEATURESET_7c0    6 /* 0x00000007:0.ecx    */
 #define FEATURESET_e7d    7 /* 0x80000007.edx      */
 #define FEATURESET_e8b    8 /* 0x80000008.ebx      */
+#define FEATURESET_7d0    9 /* 0x00000007:0.edx    */
 
 #ifndef __ASSEMBLY__
 #include <xen/types.h>
index 9320c9e97c77afd00960d208d988934dd1d5770c..565ccd55a98a378c307b69184919be924e8e94ee 100644 (file)
@@ -234,6 +234,10 @@ XEN_CPUFEATURE(EFRO,          7*32+10) /*   APERF/MPERF Read Only interface */
 /* AMD-defined CPU features, CPUID level 0x80000008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,        8*32+ 0) /*A  CLZERO instruction */
 
+/* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */
+XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */
+XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */
+
 #endif /* XEN_CPUFEATURE */
 
 /* Clean up from a default include.  Close the enum (for C). */
index 005cad9b43029ec3a5345c2738bdd5031dd18195..c29f1d3a5e4063a1531bd8c1a3fe3681865b3502 100755 (executable)
@@ -253,7 +253,7 @@ def crunch_numbers(state):
         # 512bit registers, and the instructions themselves. All further AVX512 features
         # are built on top of AVX512F
         AVX512F: [AVX512DQ, AVX512IFMA, AVX512PF, AVX512ER, AVX512CD,
-                  AVX512BW, AVX512VL, AVX512VBMI],
+                  AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW, AVX512_4FMAPS],
     }
 
     deep_features = tuple(sorted(deps.keys()))