#if FLOAT_SIZE == 4 && defined(__SSE__)
# if VEC_SIZE == 32 && defined(__AVX__)
-# define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })
+# if defined(__AVX2__)
+# define broadcast(x) \
+ __builtin_ia32_vbroadcastss_ps256((float __attribute__((vector_size(16)))){ x })
+# else
+# define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); })
+# endif
# define max(x, y) __builtin_ia32_maxps256(x, y)
# define min(x, y) __builtin_ia32_minps256(x, y)
# define recip(x) __builtin_ia32_rcpps256(x)
__builtin_ia32_vperm2f128_ps256(t_, t_, 0b00000001); \
})
# elif VEC_SIZE == 16
-# ifdef __AVX__
+# if defined(__AVX2__)
+# define broadcast(x) __builtin_ia32_vbroadcastss_ps((vec_t){ x })
+# elif defined(__AVX__)
# define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); })
# endif
# define interleave_hi(x, y) __builtin_ia32_unpckhps(x, y)
# endif
#elif FLOAT_SIZE == 8 && defined(__SSE2__)
# if VEC_SIZE == 32 && defined(__AVX__)
-# define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); })
+# if defined(__AVX2__)
+# define broadcast(x) \
+ __builtin_ia32_vbroadcastsd_pd256((double __attribute__((vector_size(16)))){ x })
+# else
+# define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); })
+# endif
# define max(x, y) __builtin_ia32_maxpd256(x, y)
# define min(x, y) __builtin_ia32_minpd256(x, y)
# define recip(x) ({ \
fic.insn_bytes = PFX_BYTES + 3;
break;
- case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd m64,ymm */
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,ymm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x1a): /* vbroadcastf128 m128,ymm */
generate_exception_if(!vex.l, EXC_UD);
/* fall through */
- case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss m32,{x,y}mm */
- generate_exception_if(ea.type != OP_MEM, EXC_UD);
+ case X86EMUL_OPC_VEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,{x,y}mm */
+ if ( ea.type != OP_MEM )
+ {
+ generate_exception_if(b & 2, EXC_UD);
+ host_and_vcpu_must_have(avx2);
+ }
/* fall through */
case X86EMUL_OPC_VEX_66(0x0f38, 0x0c): /* vpermilps {x,y}mm/mem,{x,y}mm,{x,y}mm */
case X86EMUL_OPC_VEX_66(0x0f38, 0x0d): /* vpermilpd {x,y}mm/mem,{x,y}mm,{x,y}mm */