x86emul: extend vbroadcasts{s,d} to AVX2
authorJan Beulich <jbeulich@suse.com>
Mon, 5 Mar 2018 15:20:46 +0000 (16:20 +0100)
committerJan Beulich <jbeulich@suse.com>
Mon, 5 Mar 2018 15:20:46 +0000 (16:20 +0100)
These gain register forms now.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
tools/tests/x86_emulator/simd.c
xen/arch/x86/x86_emulate/x86_emulate.c

index dd45095b9a888486dda324308d7debde2da8380b..81a9396b5d30ac31b747ee05c855469e90127cb1 100644 (file)
@@ -70,7 +70,12 @@ static inline bool _to_bool(byte_vec_t bv)
 
 #if FLOAT_SIZE == 4 && defined(__SSE__)
 # if VEC_SIZE == 32 && defined(__AVX__)
-#  define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })
+#  if defined(__AVX2__)
+#   define broadcast(x) \
+    __builtin_ia32_vbroadcastss_ps256((float __attribute__((vector_size(16)))){ x })
+#  else
+#   define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })
+#  endif
 #  define max(x, y) __builtin_ia32_maxps256(x, y)
 #  define min(x, y) __builtin_ia32_minps256(x, y)
 #  define recip(x) __builtin_ia32_rcpps256(x)
@@ -85,7 +90,9 @@ static inline bool _to_bool(byte_vec_t bv)
     __builtin_ia32_vperm2f128_ps256(t_, t_, 0b00000001); \
 })
 # elif VEC_SIZE == 16
-#  ifdef __AVX__
+#  if defined(__AVX2__)
+#   define broadcast(x) __builtin_ia32_vbroadcastss_ps((vec_t){ x })
+#  elif defined(__AVX__)
 #   define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); })
 #  endif
 #  define interleave_hi(x, y) __builtin_ia32_unpckhps(x, y)
@@ -106,7 +113,12 @@ static inline bool _to_bool(byte_vec_t bv)
 # endif
 #elif FLOAT_SIZE == 8 && defined(__SSE2__)
 # if VEC_SIZE == 32 && defined(__AVX__)
-#  define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); })
+#  if defined(__AVX2__)
+#   define broadcast(x) \
+    __builtin_ia32_vbroadcastsd_pd256((double __attribute__((vector_size(16)))){ x })
+#  else
+#   define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); })
+#  endif
 #  define max(x, y) __builtin_ia32_maxpd256(x, y)
 #  define min(x, y) __builtin_ia32_minpd256(x, y)
 #  define recip(x) ({ \
index 02c79914dd59efc43616ea32f6bc820c4545bba1..ad347cc4e83d0f6c592afc9532c060907e837f5d 100644 (file)
@@ -7026,12 +7026,16 @@ x86_emulate(
         fic.insn_bytes = PFX_BYTES + 3;
         break;
 
-    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
         generate_exception_if(!vex.l, EXC_UD);
         /* fall through */
-    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss m32,{x,y}mm */
-        generate_exception_if(ea.type != OP_MEM, EXC_UD);
+    case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */
+        if ( ea.type != OP_MEM )
+        {
+            generate_exception_if(b & 2, EXC_UD);
+            host_and_vcpu_must_have(avx2);
+        }
         /* fall through */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */
     case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */