x86emul: support AVX512{F,DQ} FP-to-uint conversion insns

author Jan Beulich <jbeulich@suse.com>

Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)

committer Jan Beulich <jbeulich@suse.com>

Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)
author Jan Beulich <jbeulich@suse.com>
Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)
committer Jan Beulich <jbeulich@suse.com>
Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)
diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c

index 50a328125db1ecb5d55265022f0e52d74c6da133..f7d7cddb182a486f7a2ad22ed6f8a0c8842f3449 100644 (file)
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -112,21 +112,29 @@ static const struct test avx512f_all[] = {
      INSN(cvtdq2pd,     f3,   0f, e6,    vl_2,    d, vl),
      INSN(cvtdq2ps,       ,   0f, 5b,    vl,      d, vl),
      INSN(cvtpd2dq,     f2,   0f, e6,    vl,      q, vl),
+    INSN(cvtpd2udq,      ,   0f, 79,    vl,      q, vl),
      INSN(cvtpd2ps,     66,   0f, 5a,    vl,      q, vl),
      INSN(cvtph2ps,     66, 0f38, 13,    vl_2, d_nb, vl),
      INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
      INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
      INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
+    INSN(cvtps2udq,      ,   0f, 79,    vl,      d, vl),
      INSN(cvtsd2si,     f2,   0f, 2d,    el,      q, el),
+    INSN(cvtsd2usi,    f2,   0f, 79,    el,      q, el),
      INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
      INSN(cvtsi2sd,     f2,   0f, 2a,    el,   dq64, el),
      INSN(cvtsi2ss,     f3,   0f, 2a,    el,   dq64, el),
      INSN(cvtss2sd,     f3,   0f, 5a,    el,      d, el),
      INSN(cvtss2si,     f3,   0f, 2d,    el,      d, el),
+    INSN(cvtss2usi,    f3,   0f, 79,    el,      d, el),
      INSN(cvttpd2dq,    66,   0f, e6,    vl,      q, vl),
+    INSN(cvttpd2udq,     ,   0f, 78,    vl,      q, vl),
      INSN(cvttps2dq,    f3,   0f, 5b,    vl,      d, vl),
+    INSN(cvttps2udq,     ,   0f, 78,    vl,      d, vl),
      INSN(cvttsd2si,    f2,   0f, 2c,    el,      q, el),
+    INSN(cvttsd2usi,   f2,   0f, 78,    el,      q, el),
      INSN(cvttss2si,    f3,   0f, 2c,    el,      d, el),
+    INSN(cvttss2usi,   f3,   0f, 78,    el,      d, el),
      INSN(cvtudq2pd,    f3,   0f, 7a,    vl_2,    d, vl),
      INSN(cvtudq2ps,    f2,   0f, 7a,    vl,      d, vl),
      INSN(cvtusi2sd,    f2,   0f, 7b,    el,   dq64, el),
@@ -415,11 +423,15 @@ static const struct test avx512dq_all[] = {
      INSN_PFP(andn,             0f, 55),
      INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
      INSN(cvtpd2qq,       66,   0f, 7b,   vl,  q, vl),
+    INSN(cvtpd2uqq,      66,   0f, 79,   vl,  q, vl),
      INSN(cvtps2qq,       66,   0f, 7b, vl_2,  d, vl),
+    INSN(cvtps2uqq,      66,   0f, 79, vl_2,  d, vl),
      INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
      INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
      INSN(cvttpd2qq,      66,   0f, 7a,   vl,  q, vl),
+    INSN(cvttpd2uqq,     66,   0f, 78,   vl,  q, vl),
      INSN(cvttps2qq,      66,   0f, 7a, vl_2,  d, vl),
+    INSN(cvttps2uqq,     66,   0f, 78, vl_2,  d, vl),
      INSN(cvtuqq2pd,      f3,   0f, 7a,   vl,  q, vl),
      INSN(cvtuqq2ps,      f2,   0f, 7a,   vl,  q, vl),
      INSN_PFP(or,               0f, 56),
diff --git a/tools/tests/x86_emulator/simd.c b/tools/tests/x86_emulator/simd.c

index c4d9e8ceb72f7694be4d28da4f358aa0f4e74c15..7402c88d8957886aa2489645dae721f2379bfcd3 100644 (file)
--- a/tools/tests/x86_emulator/simd.c
+++ b/tools/tests/x86_emulator/simd.c
@@ -93,31 +93,65 @@ static inline bool _to_bool(byte_vec_t bv)
  # ifdef __x86_64__
  #  define to_wint(x) ({ long l_ = (x)[0]; touch(l_); ((vec_t){ l_ }); })
  # endif
+# ifdef __AVX512F__
+/*
+ * Sadly even gcc 9.x, at the time of writing, does not carry out at least
+ * uint -> FP conversions using VCVTUSI2S{S,D}, so we need to use builtins
+ * or inline assembly here. The full-vector parameter types of the builtins
+ * aren't very helpful for our purposes, so use inline assembly.
+ */
+#  if FLOAT_SIZE == 4
+#   define to_u_int(type, x) ({ \
+    unsigned type u_; \
+    float __attribute__((vector_size(16))) t_; \
+    asm ( "vcvtss2usi %1, %0" : "=r" (u_) : "m" ((x)[0]) ); \
+    asm ( "vcvtusi2ss%z1 %1, %0, %0" : "=v" (t_) : "m" (u_) ); \
+    (vec_t){ t_[0] }; \
+})
+#  elif FLOAT_SIZE == 8
+#   define to_u_int(type, x) ({ \
+    unsigned type u_; \
+    double __attribute__((vector_size(16))) t_; \
+    asm ( "vcvtsd2usi %1, %0" : "=r" (u_) : "m" ((x)[0]) ); \
+    asm ( "vcvtusi2sd%z1 %1, %0, %0" : "=v" (t_) : "m" (u_) ); \
+    (vec_t){ t_[0] }; \
+})
+#  endif
+#  define to_uint(x) to_u_int(int, x)
+#  ifdef __x86_64__
+#   define to_uwint(x) to_u_int(long, x)
+#  endif
+# endif
  #elif VEC_SIZE == 8 && FLOAT_SIZE == 4 && defined(__3dNOW__)
  # define to_int(x) __builtin_ia32_pi2fd(__builtin_ia32_pf2id(x))
  #elif defined(FLOAT_SIZE) && VEC_SIZE > FLOAT_SIZE && defined(__AVX512F__) && \
        (VEC_SIZE == 64 || defined(__AVX512VL__))
  # if FLOAT_SIZE == 4
  #  define to_int(x) BR(cvtdq2ps, _mask, BR(cvtps2dq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0)
+#  define to_uint(x) BR(cvtudq2ps, _mask, BR(cvtps2udq, _mask, x, (vsi_t)undef(), ~0), undef(), ~0)
  #  ifdef __AVX512DQ__
-#   define to_wint(x) ({ \
+#   define to_w_int(x, s) ({ \
      vsf_half_t t_ = low_half(x); \
      vdi_t lo_, hi_; \
      touch(t_); \
-    lo_ = BR(cvtps2qq, _mask, t_, (vdi_t)undef(), ~0); \
+    lo_ = BR(cvtps2 ## s ## qq, _mask, t_, (vdi_t)undef(), ~0); \
      t_ = high_half(x); \
      touch(t_); \
-    hi_ = BR(cvtps2qq, _mask, t_, (vdi_t)undef(), ~0); \
+    hi_ = BR(cvtps2 ## s ## qq, _mask, t_, (vdi_t)undef(), ~0); \
      touch(lo_); touch(hi_); \
      insert_half(insert_half(undef(), \
-                            BR(cvtqq2ps, _mask, lo_, (vsf_half_t){}, ~0), 0), \
-                BR(cvtqq2ps, _mask, hi_, (vsf_half_t){}, ~0), 1); \
+                            BR(cvt ## s ## qq2ps, _mask, lo_, (vsf_half_t){}, ~0), 0), \
+                BR(cvt ## s ## qq2ps, _mask, hi_, (vsf_half_t){}, ~0), 1); \
  })
+#   define to_wint(x) to_w_int(x, )
+#   define to_uwint(x) to_w_int(x, u)
  #  endif
  # elif FLOAT_SIZE == 8
  #  define to_int(x) B(cvtdq2pd, _mask, BR(cvtpd2dq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0)
+#  define to_uint(x) B(cvtudq2pd, _mask, BR(cvtpd2udq, _mask, x, (vsi_half_t){}, ~0), undef(), ~0)
  #  ifdef __AVX512DQ__
  #   define to_wint(x) BR(cvtqq2pd, _mask, BR(cvtpd2qq, _mask, x, (vdi_t)undef(), ~0), undef(), ~0)
+#   define to_uwint(x) BR(cvtuqq2pd, _mask, BR(cvtpd2uqq, _mask, x, (vdi_t)undef(), ~0), undef(), ~0)
  #  endif
  # endif
  #elif VEC_SIZE == 16 && defined(__SSE2__)
@@ -1222,6 +1256,20 @@ int simd_test(void)
      if ( !eq(x, src) ) return __LINE__;
  # endif
  
+# ifdef to_uint
+    touch(src);
+    x = to_uint(src);
+    touch(src);
+    if ( !eq(x, src) ) return __LINE__;
+# endif
+
+# ifdef to_uwint
+    touch(src);
+    x = to_uwint(src);
+    touch(src);
+    if ( !eq(x, src) ) return __LINE__;
+# endif
+
  # ifdef sqrt
      x = src * src;
      touch(x);
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c

index cc127a066a81a5f12f535cd98607b483175848e8..8469a7832e064db7a44645e1627fc89d89cf9dc4 100644 (file)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -323,8 +323,7 @@ static const struct twobyte_table {
      [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM, simd_none, d8s_vl },
      [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
      [0x77] = { DstImplicit|SrcNone },
-    [0x78] = { ImplicitOps|ModRM },
-    [0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
+    [0x78 ... 0x79] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_vl },
      [0x7a] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp, d8s_vl },
      [0x7b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other, d8s_dq64 },
      [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
@@ -2491,6 +2490,8 @@ x86_decode_twobyte(
          break;
  
      case 0x78:
+        state->desc = ImplicitOps;
+        state->simd_size = simd_none;
          switch ( vex.pfx )
          {
          case vex_66: /* extrq $imm8, $imm8, xmm */
@@ -2503,7 +2504,7 @@ x86_decode_twobyte(
      case 0x10 ... 0x18:
      case 0x28 ... 0x2f:
      case 0x50 ... 0x77:
-    case 0x79 ... 0x7d:
+    case 0x7a ... 0x7d:
      case 0x7f:
      case 0xc2 ... 0xc3:
      case 0xc5 ... 0xc6:
@@ -2525,6 +2526,12 @@ x86_decode_twobyte(
          op_bytes = mode_64bit() ? 8 : 4;
          break;
  
+    case 0x79:
+        state->desc = DstReg | SrcMem;
+        state->simd_size = simd_packed_int;
+        ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
+        break;
+
      case 0x7e:
          ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
          if ( vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
@@ -3042,6 +3049,18 @@ x86_decode(
                  modrm_mod = 3;
                  break;
  
+            case 0x78:
+            case 0x79:
+                if ( !evex.pfx )
+                    break;
+                /* vcvt{,t}ps2uqq need special casing */
+                if ( evex.pfx == vex_66 )
+                {
+                    if ( !evex.w && !evex.brs )
+                        --disp8scale;
+                    break;
+                }
+                /* vcvt{,t}s{s,d}2usi need special casing: fall through */
              case 0x2c: /* vcvtts{s,d}2si need special casing */
              case 0x2d: /* vcvts{s,d}2si need special casing */
                  if ( evex_encoded() )
@@ -6303,6 +6322,8 @@ x86_emulate(
  
      CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
      CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */
+    CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */
          generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk ||
                                 (ea.type != OP_REG && evex.brs)),
                                EXC_UD);
@@ -6664,7 +6685,11 @@ x86_emulate(
          if ( evex.w )
              host_and_vcpu_must_have(avx512dq);
          else
+        {
+    case X86EMUL_OPC_EVEX(0x0f, 0x78):    /* vcvttp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX(0x0f, 0x79):    /* vcvtp{s,d}2udq [xyz]mm/mem,[xyz]mm{k} */
              host_and_vcpu_must_have(avx512f);
+        }
          if ( ea.type != OP_REG || !evex.brs )
              avx512_vlen_check(false);
          d |= TwoOp;
@@ -7349,6 +7374,10 @@ x86_emulate(
              host_and_vcpu_must_have(avx512f);
          else if ( evex.w )
          {
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x78):   /* vcvttps2uqq {x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvttpd2uqq [xyz]mm/mem,[xyz]mm{k} */
+    case X86EMUL_OPC_EVEX_66(0x0f, 0x79):   /* vcvtps2uqq {x,y}mm/mem,[xyz]mm{k} */
+                                            /* vcvtpd2uqq [xyz]mm/mem,[xyz]mm{k} */
      case X86EMUL_OPC_EVEX_66(0x0f, 0x7a):   /* vcvttps2qq {x,y}mm/mem,[xyz]mm{k} */
                                              /* vcvttpd2qq [xyz]mm/mem,[xyz]mm{k} */
      case X86EMUL_OPC_EVEX_66(0x0f, 0x7b):   /* vcvtps2qq {x,y}mm/mem,[xyz]mm{k} */
author	Jan Beulich <jbeulich@suse.com>
	Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)
committer	Jan Beulich <jbeulich@suse.com>
	Fri, 24 May 2019 08:24:48 +0000 (10:24 +0200)
tools/tests/x86_emulator/evex-disp8.c		patch \| blob \| history
tools/tests/x86_emulator/simd.c		patch \| blob \| history
xen/arch/x86/x86_emulate/x86_emulate.c		patch \| blob \| history