From 5c993a483f37c7df498bacdddae9c0d4cdb214bc Mon Sep 17 00:00:00 2001
From: florian <florian@freepascal.org>
Date: Sat, 4 Jan 2020 21:54:53 +0000
Subject: [PATCH] [PATCH]   * cleaning up tcgsize: it makes no sense to declare
 every combination and type     the different vector types must be either
 handled in the high level cg or     by using the shuffle parameter

git-svn-id: trunk@43860 -
(cherry picked from commit b7c6e01b03ac1d8fcedf2cfe6d150b83f83cef4a)

Gbp-Pq: Name 1-3ef2ab1019a395a9392578b653958c14e19f81e0.patch
---
 fpcsrc/compiler/cgbase.pas       |  44 ++----
 fpcsrc/compiler/cgobj.pas        |   9 +-
 fpcsrc/compiler/defutil.pas      |  16 +--
 fpcsrc/compiler/hlcgobj.pas      |   3 +-
 fpcsrc/compiler/m68k/cpubase.pas |   5 +-
 fpcsrc/compiler/x86/cgx86.pas    | 223 +++----------------------------
 fpcsrc/compiler/x86/cpubase.pas  |   6 +-
 fpcsrc/compiler/x86/rax86.pas    |   6 +-
 8 files changed, 53 insertions(+), 259 deletions(-)

diff --git a/fpcsrc/compiler/cgbase.pas b/fpcsrc/compiler/cgbase.pas
index b9464bc5..82f5eee5 100644
--- a/fpcsrc/compiler/cgbase.pas
+++ b/fpcsrc/compiler/cgbase.pas
@@ -168,14 +168,9 @@ interface
                   OS_S8,  OS_S16,  OS_S32,  OS_S64,  OS_S128,
                  { single, double, extended, comp, float128 }
                   OS_F32, OS_F64,  OS_F80,  OS_C64,  OS_F128,
-                 { multi-media sizes: split in byte, word, dword, ... }
-                 { entities, then the signed counterparts             }
-                  OS_M8,  OS_M16,  OS_M32,  OS_M64,  OS_M128,  OS_M256,  OS_M512,
-                  OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128, OS_MS256, OS_MS512,
-                 { multi-media sizes: single-precision floating-point }
-                  OS_MF32, OS_MF128, OS_MF256, OS_MF512,
-                 { multi-media sizes: double-precision floating-point }
-                  OS_MD64, OS_MD128, OS_MD256, OS_MD512);
+                 { multi-media sizes, describes only the register size but not how it is split,
+                   this information must be passed separately }
+                  OS_M8,  OS_M16,  OS_M32,  OS_M64,  OS_M128,  OS_M256,  OS_M512);
 
       { Register types }
       TRegisterType = (
@@ -319,12 +314,7 @@ interface
          { floating point values }
          4,  8, 10,  8, 16,
          { multimedia values }
-         1,  2,  4,  8, 16, 32, 64,
-         1,  2,  4,  8, 16, 32, 64,
-         { single-precision multimedia values }
-         4, 16, 32, 64,
-         { double-precision multimedia values }
-         8, 16, 32, 64);
+         1,  2,  4,  8, 16, 32, 64);
 
        tfloat2tcgsize: array[tfloattype] of tcgsize =
          (OS_F32,OS_F64,OS_F80,OS_F80,OS_C64,OS_C64,OS_F128);
@@ -364,10 +354,7 @@ interface
          OS_8,    OS_16,   OS_32,   OS_64,   OS_128,
 
          OS_F32,  OS_F64,  OS_F80,  OS_C64,  OS_F128,
-         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512,
-         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512,
-         OS_MF32, OS_MF128,OS_MF256,OS_MF512,
-         OS_MD64, OS_MD128,OS_MD256,OS_MD512);
+         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256, OS_M512);
 
 
        tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
@@ -375,10 +362,7 @@ interface
          OS_S8,   OS_S16,  OS_S32,  OS_S64,  OS_S128,
 
          OS_F32,  OS_F64,  OS_F80,  OS_C64,  OS_F128,
-         OS_MS8,  OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
-         OS_MS8,  OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
-         OS_MF32, OS_MF128,OS_MF256,OS_MF512,
-         OS_MD64, OS_MD128,OS_MD256,OS_MD512);
+         OS_M8,   OS_M16,  OS_M32,  OS_M64,  OS_M128, OS_M256,OS_M512);
 
 
        tcgloc2str : array[TCGLoc] of string[12] = (
@@ -730,13 +714,13 @@ implementation
       begin
         case a of
           4:
-            result := OS_MF32;
+            result := OS_M32;
           16:
-            result := OS_MF128;
+            result := OS_M128;
           32:
-            result := OS_MF256;
+            result := OS_M256;
           64:
-            result := OS_MF512;
+            result := OS_M512;
           else
             result := int_cgsize(a);
         end;
@@ -746,13 +730,13 @@ implementation
       begin
         case a of
           8:
-            result := OS_MD64;
+            result := OS_M64;
           16:
-            result := OS_MD128;
+            result := OS_M128;
           32:
-            result := OS_MD256;
+            result := OS_M256;
           64:
-            result := OS_MD512;
+            result := OS_M512;
           else
             result := int_cgsize(a);
         end;
diff --git a/fpcsrc/compiler/cgobj.pas b/fpcsrc/compiler/cgobj.pas
index d8279408..7148e09f 100644
--- a/fpcsrc/compiler/cgobj.pas
+++ b/fpcsrc/compiler/cgobj.pas
@@ -1145,8 +1145,7 @@ implementation
                      OS_F64,
                      OS_F128:
                        a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,mms_movescalar);
-                     OS_M8..OS_M128,
-                     OS_MS8..OS_MS128:
+                     OS_M8..OS_M512:
                        a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,nil);
                      else
                        internalerror(2010053101);
@@ -1351,8 +1350,7 @@ implementation
                 OS_F64,
                 OS_F128:
                   a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,mms_movescalar);
-                OS_M8..OS_M128,
-                OS_MS8..OS_MS128:
+                OS_M8..OS_M512:
                   a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,nil);
                 else
                   internalerror(2010053102);
@@ -1408,8 +1406,7 @@ implementation
                        OS_F64,
                        OS_F128:
                         a_loadmm_reg_reg(list,paraloc.size,regsize,paraloc.register,reg,mms_movescalar);
-                       OS_M8..OS_M128,
-                       OS_MS8..OS_MS128:
+                       OS_M8..OS_M512:
                          a_loadmm_reg_reg(list,paraloc.size,paraloc.size,paraloc.register,reg,nil);
                        else
                          internalerror(2010053102);
diff --git a/fpcsrc/compiler/defutil.pas b/fpcsrc/compiler/defutil.pas
index c4a70682..6a8af5c2 100644
--- a/fpcsrc/compiler/defutil.pas
+++ b/fpcsrc/compiler/defutil.pas
@@ -1551,19 +1551,19 @@ implementation
                     case TFloatDef(tarraydef(def).elementdef).floattype of
                       s32real:
                         case def.size of
-                          4:  result:=OS_MF32;
-                          16: result:=OS_MF128;
-                          32: result:=OS_MF256;
-                          64: result:=OS_MF512;
+                          4:  result:=OS_M32;
+                          16: result:=OS_M128;
+                          32: result:=OS_M256;
+                          64: result:=OS_M512;
                           else
                             internalerror(2017121400);
                         end;
                       s64real:
                         case def.size of
-                          8:  result:=OS_MD64;
-                          16: result:=OS_MD128;
-                          32: result:=OS_MD256;
-                          64: result:=OS_MD512;
+                          8:  result:=OS_M64;
+                          16: result:=OS_M128;
+                          32: result:=OS_M256;
+                          64: result:=OS_M512;
                           else
                             internalerror(2017121401);
                         end;
diff --git a/fpcsrc/compiler/hlcgobj.pas b/fpcsrc/compiler/hlcgobj.pas
index e4ca5bcb..9e42d205 100644
--- a/fpcsrc/compiler/hlcgobj.pas
+++ b/fpcsrc/compiler/hlcgobj.pas
@@ -1063,8 +1063,7 @@ implementation
                    OS_F64,
                    OS_F128:
                      a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,mms_movescalar);
-                   OS_M8..OS_M128,
-                   OS_MS8..OS_MS128:
+                   OS_M8..OS_M128:
                      a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,nil);
                    else
                      internalerror(2010053101);
diff --git a/fpcsrc/compiler/m68k/cpubase.pas b/fpcsrc/compiler/m68k/cpubase.pas
index 1e33be99..a3af00c2 100644
--- a/fpcsrc/compiler/m68k/cpubase.pas
+++ b/fpcsrc/compiler/m68k/cpubase.pas
@@ -343,10 +343,7 @@ unit cpubase;
       tcgsize2opsize: Array[tcgsize] of topsize =
         (S_NO,S_B,S_W,S_L,S_L,S_NO,S_B,S_W,S_L,S_L,S_NO,
          S_FS,S_FD,S_FX,S_NO,S_NO,
-         S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,
-         S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,
-         S_NO,S_NO,S_NO,S_NO,S_NO,
-         S_NO,S_NO,S_NO,S_NO,S_NO);
+         S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
 
     function  is_calljmp(o:tasmop):boolean;
 
diff --git a/fpcsrc/compiler/x86/cgx86.pas b/fpcsrc/compiler/x86/cgx86.pas
index d4759ec5..5f9f917f 100644
--- a/fpcsrc/compiler/x86/cgx86.pas
+++ b/fpcsrc/compiler/x86/cgx86.pas
@@ -158,26 +158,17 @@ unit cgx86;
       TCGSize2OpSize: Array[tcgsize] of topsize =
         (S_NO,S_B,S_W,S_L,S_Q,S_XMM,S_B,S_W,S_L,S_Q,S_XMM,
          S_FS,S_FL,S_FX,S_IQ,S_FXX,
-         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM);
+         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM);
 {$elseif defined(i386)}
       TCGSize2OpSize: Array[tcgsize] of topsize =
         (S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L,
          S_FS,S_FL,S_FX,S_IQ,S_FXX,
-         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM);
+         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM);
 {$elseif defined(i8086)}
       TCGSize2OpSize: Array[tcgsize] of topsize =
         (S_NO,S_B,S_W,S_W,S_W,S_T,S_B,S_W,S_W,S_W,S_W,
          S_FS,S_FL,S_FX,S_IQ,S_FXX,
-         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM,
-         S_NO,S_XMM,S_YMM,S_ZMM);
+         S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM);
 {$endif}
 
 {$ifndef NOTARGETWIN}
@@ -294,17 +285,11 @@ unit cgx86;
           OS_M64:
             result:=rg[R_MMREGISTER].getregister(list,R_SUBQ);
           OS_M128,
-          OS_F128,
-          OS_MF128,
-          OS_MD128:
+          OS_F128:
             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMX); { R_SUBMMWHOLE seems a bit dangerous and ambiguous, so changed to R_SUBMMX. [Kit] }
-          OS_M256,
-          OS_MF256,
-          OS_MD256:
+          OS_M256:
             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMY);
-          OS_M512,
-          OS_MF512,
-          OS_MD512:
+          OS_M512:
             result:=rg[R_MMREGISTER].getregister(list,R_SUBMMZ);
           else
             internalerror(200506041);
@@ -1373,14 +1358,12 @@ unit cgx86;
             if fromsize=tosize then
               { needs correct size in case of spilling }
               case fromsize of
-                OS_F32,
-                OS_MF128:
+                OS_F32:
                   if UseAVX then
                     instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2)
                   else
                     instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2);
-                OS_F64,
-                OS_MD128:
+                OS_F64:
                   if UseAVX then
                     instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2)
                   else
@@ -1390,27 +1373,13 @@ unit cgx86;
                     instr:=taicpu.op_reg_reg(A_VMOVQ,S_NO,reg1,reg2)
                   else
                     instr:=taicpu.op_reg_reg(A_MOVQ,S_NO,reg1,reg2);
-                OS_M128, OS_MS128:
+                OS_M128:
                   if UseAVX then
                     instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2)
                   else
                     instr:=taicpu.op_reg_reg(A_MOVDQA,S_NO,reg1,reg2);
-                OS_MF256,
-                OS_MF512:
-                  if UseAVX then
-                    instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2)
-                  else
-                    { SSE doesn't support 512-bit vectors }
-                    InternalError(2018012931);
-                OS_MD256,
-                OS_MD512:
-                  if UseAVX then
-                    instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2)
-                  else
-                    { SSE doesn't support 512-bit vectors }
-                    InternalError(2018012932);
-                OS_M256, OS_MS256,
-                OS_M512, OS_MS512:
+                OS_M256,
+                OS_M512:
                   if UseAVX then
                     instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2)
                   else
@@ -1495,39 +1464,7 @@ unit cgx86;
                    op := A_VMOVQ
                  else
                    op := A_MOVQ;
-               OS_MF128:
-                 { Use XMM transfer of packed singles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 16 then
-                       op := A_VMOVAPS
-                     else
-                       op := A_VMOVUPS
-                   end
-                 else
-                   begin
-                     if GetRefAlignment(tmpref) = 16 then
-                       op := A_MOVAPS
-                     else
-                       op := A_MOVUPS
-                   end;
-               OS_MD128:
-                 { Use XMM transfer of packed doubles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 16 then
-                       op := A_VMOVAPD
-                     else
-                       op := A_VMOVUPD
-                   end
-                 else
-                   begin
-                     if GetRefAlignment(tmpref) = 16 then
-                       op := A_MOVAPD
-                     else
-                       op := A_MOVUPD
-                   end;
-               OS_M128, OS_MS128:
+               OS_M128:
                  { Use XMM integer transfer }
                  if UseAVX then
                    begin
@@ -1541,33 +1478,9 @@ unit cgx86;
                      if GetRefAlignment(tmpref) = 16 then
                        op := A_MOVDQA
                      else
-                       op := A_MOVDQU
+                       op := A_MOVDQU;
                    end;
-               OS_MF256:
-                 { Use YMM transfer of packed singles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 32 then
-                       op := A_VMOVAPS
-                     else
-                       op := A_VMOVUPS
-                   end
-                 else
-                   { SSE doesn't support 256-bit vectors }
-                   InternalError(2018012934);
-               OS_MD256:
-                 { Use YMM transfer of packed doubles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 32 then
-                       op := A_VMOVAPD
-                     else
-                       op := A_VMOVUPD
-                   end
-                 else
-                   { SSE doesn't support 256-bit vectors }
-                   InternalError(2018012935);
-               OS_M256, OS_MS256:
+               OS_M256:
                  { Use YMM integer transfer }
                  if UseAVX then
                    begin
@@ -1578,32 +1491,8 @@ unit cgx86;
                    end
                  else
                    { SSE doesn't support 256-bit vectors }
-                   InternalError(2018012936);
-               OS_MF512:
-                 { Use ZMM transfer of packed singles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 64 then
-                       op := A_VMOVAPS
-                     else
-                       op := A_VMOVUPS
-                   end
-                 else
-                   { SSE doesn't support 512-bit vectors }
-                   InternalError(2018012937);
-               OS_MD512:
-                 { Use ZMM transfer of packed doubles }
-                 if UseAVX then
-                   begin
-                     if GetRefAlignment(tmpref) = 64 then
-                       op := A_VMOVAPD
-                     else
-                       op := A_VMOVUPD
-                   end
-                 else
-                   { SSE doesn't support 512-bit vectors }
-                   InternalError(2018012938);
-               OS_M512, OS_MS512:
+                   Internalerror(2020010401);
+               OS_M512:
                  { Use ZMM integer transfer }
                  if UseAVX then
                    begin
@@ -1669,37 +1558,7 @@ unit cgx86;
                    op := A_VMOVQ
                  else
                    op := A_MOVQ;
-               OS_MF128:
-                 { Use XMM transfer of packed singles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 16 then
-                     op := A_VMOVAPS
-                   else
-                     op := A_VMOVUPS
-                 end else
-                 begin
-                   if GetRefAlignment(tmpref) = 16 then
-                     op := A_MOVAPS
-                   else
-                     op := A_MOVUPS
-                 end;
-               OS_MD128:
-                 { Use XMM transfer of packed doubles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 16 then
-                     op := A_VMOVAPD
-                   else
-                     op := A_VMOVUPD
-                 end else
-                 begin
-                   if GetRefAlignment(tmpref) = 16 then
-                     op := A_MOVAPD
-                   else
-                     op := A_MOVUPD
-                 end;
-               OS_M128, OS_MS128:
+               OS_M128:
                  { Use XMM integer transfer }
                  if UseAVX then
                  begin
@@ -1714,29 +1573,7 @@ unit cgx86;
                    else
                      op := A_MOVDQU
                  end;
-               OS_MF256:
-                 { Use XMM transfer of packed singles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 32 then
-                     op := A_VMOVAPS
-                   else
-                     op := A_VMOVUPS
-                 end else
-                   { SSE doesn't support 256-bit vectors }
-                   InternalError(2018012940);
-               OS_MD256:
-                 { Use XMM transfer of packed doubles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 32 then
-                     op := A_VMOVAPD
-                   else
-                     op := A_VMOVUPD
-                 end else
-                   { SSE doesn't support 256-bit vectors }
-                   InternalError(2018012941);
-               OS_M256, OS_MS256:
+               OS_M256:
                  { Use XMM integer transfer }
                  if UseAVX then
                  begin
@@ -1747,29 +1584,7 @@ unit cgx86;
                  end else
                    { SSE doesn't support 256-bit vectors }
                    InternalError(2018012942);
-               OS_MF512:
-                 { Use XMM transfer of packed singles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 64 then
-                     op := A_VMOVAPS
-                   else
-                     op := A_VMOVUPS
-                 end else
-                   { SSE doesn't support 512-bit vectors }
-                   InternalError(2018012943);
-               OS_MD512:
-                 { Use XMM transfer of packed doubles }
-                 if UseAVX then
-                 begin
-                   if GetRefAlignment(tmpref) = 64 then
-                     op := A_VMOVAPD
-                   else
-                     op := A_VMOVUPD
-                 end else
-                   { SSE doesn't support 512-bit vectors }
-                   InternalError(2018012944);
-               OS_M512, OS_MS512:
+               OS_M512:
                  { Use XMM integer transfer }
                  if UseAVX then
                  begin
diff --git a/fpcsrc/compiler/x86/cpubase.pas b/fpcsrc/compiler/x86/cpubase.pas
index b176657d..a38eab9d 100644
--- a/fpcsrc/compiler/x86/cpubase.pas
+++ b/fpcsrc/compiler/x86/cpubase.pas
@@ -447,11 +447,11 @@ implementation
               else
                 internalerror(2009071902);
             end;
-          OS_M128,OS_MS128,OS_MF128,OS_MD128:
+          OS_M128:
             cgsize2subreg:=R_SUBMMX;
-          OS_M256,OS_MS256,OS_MF256,OS_MD256:
+          OS_M256:
             cgsize2subreg:=R_SUBMMY;
-          OS_M512,OS_MS512,OS_MF512,OS_MD512:
+          OS_M512:
             cgsize2subreg:=R_SUBMMZ;
           OS_NO:
             { error message should have been thrown already before, so avoid only
diff --git a/fpcsrc/compiler/x86/rax86.pas b/fpcsrc/compiler/x86/rax86.pas
index b40ac83f..705d06a1 100644
--- a/fpcsrc/compiler/x86/rax86.pas
+++ b/fpcsrc/compiler/x86/rax86.pas
@@ -1288,10 +1288,12 @@ begin
                      asize:=OT_BITS64;
                    OS_F80 :
                      asize:=OT_BITS80;
-                   OS_128,OS_M128,OS_MS128:
+                   OS_128,OS_M128:
                      asize := OT_BITS128;
-                   OS_M256,OS_MS256:
+                   OS_M256:
                      asize := OT_BITS256;
+                   OS_M512:
+                     asize := OT_BITS512;
                  end;
                if asize<>0 then
                  ai.oper[i-1]^.ot:=(ai.oper[i-1]^.ot and not OT_SIZE_MASK) or asize;
-- 
2.30.2