From 5c993a483f37c7df498bacdddae9c0d4cdb214bc Mon Sep 17 00:00:00 2001 From: florian Date: Sat, 4 Jan 2020 21:54:53 +0000 Subject: [PATCH] [PATCH] * cleaning up tcgsize: it makes no sense to declare every combination and type the different vector types must be either handled in the high level cg or by using the shuffle parameter git-svn-id: trunk@43860 - (cherry picked from commit b7c6e01b03ac1d8fcedf2cfe6d150b83f83cef4a) Gbp-Pq: Name 1-3ef2ab1019a395a9392578b653958c14e19f81e0.patch --- fpcsrc/compiler/cgbase.pas | 44 ++---- fpcsrc/compiler/cgobj.pas | 9 +- fpcsrc/compiler/defutil.pas | 16 +-- fpcsrc/compiler/hlcgobj.pas | 3 +- fpcsrc/compiler/m68k/cpubase.pas | 5 +- fpcsrc/compiler/x86/cgx86.pas | 223 +++---------------------------- fpcsrc/compiler/x86/cpubase.pas | 6 +- fpcsrc/compiler/x86/rax86.pas | 6 +- 8 files changed, 53 insertions(+), 259 deletions(-) diff --git a/fpcsrc/compiler/cgbase.pas b/fpcsrc/compiler/cgbase.pas index b9464bc5..82f5eee5 100644 --- a/fpcsrc/compiler/cgbase.pas +++ b/fpcsrc/compiler/cgbase.pas @@ -168,14 +168,9 @@ interface OS_S8, OS_S16, OS_S32, OS_S64, OS_S128, { single, double, extended, comp, float128 } OS_F32, OS_F64, OS_F80, OS_C64, OS_F128, - { multi-media sizes: split in byte, word, dword, ... } - { entities, then the signed counterparts } - OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512, - OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128, OS_MS256, OS_MS512, - { multi-media sizes: single-precision floating-point } - OS_MF32, OS_MF128, OS_MF256, OS_MF512, - { multi-media sizes: double-precision floating-point } - OS_MD64, OS_MD128, OS_MD256, OS_MD512); + { multi-media sizes, describes only the register size but not how it is split, + this information must be passed separately } + OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512); { Register types } TRegisterType = ( @@ -319,12 +314,7 @@ interface { floating point values } 4, 8, 10, 8, 16, { multimedia values } - 1, 2, 4, 8, 16, 32, 64, - 1, 2, 4, 8, 16, 32, 64, - { single-precision multimedia values } - 4, 16, 32, 64, - { double-precision multimedia values } - 8, 16, 32, 64); + 1, 2, 4, 8, 16, 32, 64); tfloat2tcgsize: array[tfloattype] of tcgsize = (OS_F32,OS_F64,OS_F80,OS_F80,OS_C64,OS_C64,OS_F128); @@ -364,10 +354,7 @@ interface OS_8, OS_16, OS_32, OS_64, OS_128, OS_F32, OS_F64, OS_F80, OS_C64, OS_F128, - OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512, - OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512, - OS_MF32, OS_MF128,OS_MF256,OS_MF512, - OS_MD64, OS_MD128,OS_MD256,OS_MD512); + OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512); tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO, @@ -375,10 +362,7 @@ interface OS_S8, OS_S16, OS_S32, OS_S64, OS_S128, OS_F32, OS_F64, OS_F80, OS_C64, OS_F128, - OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512, - OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512, - OS_MF32, OS_MF128,OS_MF256,OS_MF512, - OS_MD64, OS_MD128,OS_MD256,OS_MD512); + OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256,OS_M512); tcgloc2str : array[TCGLoc] of string[12] = ( @@ -730,13 +714,13 @@ implementation begin case a of 4: - result := OS_MF32; + result := OS_M32; 16: - result := OS_MF128; + result := OS_M128; 32: - result := OS_MF256; + result := OS_M256; 64: - result := OS_MF512; + result := OS_M512; else result := int_cgsize(a); end; @@ -746,13 +730,13 @@ implementation begin case a of 8: - result := OS_MD64; + result := OS_M64; 16: - result := OS_MD128; + result := OS_M128; 32: - result := OS_MD256; + result := OS_M256; 64: - result := OS_MD512; + result := OS_M512; else result := int_cgsize(a); end; diff --git a/fpcsrc/compiler/cgobj.pas b/fpcsrc/compiler/cgobj.pas index d8279408..7148e09f 100644 --- a/fpcsrc/compiler/cgobj.pas +++ b/fpcsrc/compiler/cgobj.pas @@ -1145,8 +1145,7 @@ implementation OS_F64, OS_F128: a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,mms_movescalar); - OS_M8..OS_M128, - OS_MS8..OS_MS128: + OS_M8..OS_M512: a_loadmm_ref_reg(list,location^.size,location^.size,tmpref,location^.register,nil); else internalerror(2010053101); @@ -1351,8 +1350,7 @@ implementation OS_F64, OS_F128: a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,mms_movescalar); - OS_M8..OS_M128, - OS_MS8..OS_MS128: + OS_M8..OS_M512: a_loadmm_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref,nil); else internalerror(2010053102); @@ -1408,8 +1406,7 @@ implementation OS_F64, OS_F128: a_loadmm_reg_reg(list,paraloc.size,regsize,paraloc.register,reg,mms_movescalar); - OS_M8..OS_M128, - OS_MS8..OS_MS128: + OS_M8..OS_M512: a_loadmm_reg_reg(list,paraloc.size,paraloc.size,paraloc.register,reg,nil); else internalerror(2010053102); diff --git a/fpcsrc/compiler/defutil.pas b/fpcsrc/compiler/defutil.pas index c4a70682..6a8af5c2 100644 --- a/fpcsrc/compiler/defutil.pas +++ b/fpcsrc/compiler/defutil.pas @@ -1551,19 +1551,19 @@ implementation case TFloatDef(tarraydef(def).elementdef).floattype of s32real: case def.size of - 4: result:=OS_MF32; - 16: result:=OS_MF128; - 32: result:=OS_MF256; - 64: result:=OS_MF512; + 4: result:=OS_M32; + 16: result:=OS_M128; + 32: result:=OS_M256; + 64: result:=OS_M512; else internalerror(2017121400); end; s64real: case def.size of - 8: result:=OS_MD64; - 16: result:=OS_MD128; - 32: result:=OS_MD256; - 64: result:=OS_MD512; + 8: result:=OS_M64; + 16: result:=OS_M128; + 32: result:=OS_M256; + 64: result:=OS_M512; else internalerror(2017121401); end; diff --git a/fpcsrc/compiler/hlcgobj.pas b/fpcsrc/compiler/hlcgobj.pas index e4ca5bcb..9e42d205 100644 --- a/fpcsrc/compiler/hlcgobj.pas +++ b/fpcsrc/compiler/hlcgobj.pas @@ -1063,8 +1063,7 @@ implementation OS_F64, OS_F128: a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,mms_movescalar); - OS_M8..OS_M128, - OS_MS8..OS_MS128: + OS_M8..OS_M128: a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,nil); else internalerror(2010053101); diff --git a/fpcsrc/compiler/m68k/cpubase.pas b/fpcsrc/compiler/m68k/cpubase.pas index 1e33be99..a3af00c2 100644 --- a/fpcsrc/compiler/m68k/cpubase.pas +++ b/fpcsrc/compiler/m68k/cpubase.pas @@ -343,10 +343,7 @@ unit cpubase; tcgsize2opsize: Array[tcgsize] of topsize = (S_NO,S_B,S_W,S_L,S_L,S_NO,S_B,S_W,S_L,S_L,S_NO, S_FS,S_FD,S_FX,S_NO,S_NO, - S_NO,S_NO,S_NO,S_NO,S_NO,S_NO, - S_NO,S_NO,S_NO,S_NO,S_NO,S_NO, - S_NO,S_NO,S_NO,S_NO,S_NO, - S_NO,S_NO,S_NO,S_NO,S_NO); + S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO); function is_calljmp(o:tasmop):boolean; diff --git a/fpcsrc/compiler/x86/cgx86.pas b/fpcsrc/compiler/x86/cgx86.pas index d4759ec5..5f9f917f 100644 --- a/fpcsrc/compiler/x86/cgx86.pas +++ b/fpcsrc/compiler/x86/cgx86.pas @@ -158,26 +158,17 @@ unit cgx86; TCGSize2OpSize: Array[tcgsize] of topsize = (S_NO,S_B,S_W,S_L,S_Q,S_XMM,S_B,S_W,S_L,S_Q,S_XMM, S_FS,S_FL,S_FX,S_IQ,S_FXX, - S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM, - S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM); + S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM); {$elseif defined(i386)} TCGSize2OpSize: Array[tcgsize] of topsize = (S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L, S_FS,S_FL,S_FX,S_IQ,S_FXX, - S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM, - S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM); + S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM); {$elseif defined(i8086)} TCGSize2OpSize: Array[tcgsize] of topsize = (S_NO,S_B,S_W,S_W,S_W,S_T,S_B,S_W,S_W,S_W,S_W, S_FS,S_FL,S_FX,S_IQ,S_FXX, - S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM, - S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM, - S_NO,S_XMM,S_YMM,S_ZMM); + S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM); {$endif} {$ifndef NOTARGETWIN} @@ -294,17 +285,11 @@ unit cgx86; OS_M64: result:=rg[R_MMREGISTER].getregister(list,R_SUBQ); OS_M128, - OS_F128, - OS_MF128, - OS_MD128: + OS_F128: result:=rg[R_MMREGISTER].getregister(list,R_SUBMMX); { R_SUBMMWHOLE seems a bit dangerous and ambiguous, so changed to R_SUBMMX. [Kit] } - OS_M256, - OS_MF256, - OS_MD256: + OS_M256: result:=rg[R_MMREGISTER].getregister(list,R_SUBMMY); - OS_M512, - OS_MF512, - OS_MD512: + OS_M512: result:=rg[R_MMREGISTER].getregister(list,R_SUBMMZ); else internalerror(200506041); @@ -1373,14 +1358,12 @@ unit cgx86; if fromsize=tosize then { needs correct size in case of spilling } case fromsize of - OS_F32, - OS_MF128: + OS_F32: if UseAVX then instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2) else instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2); - OS_F64, - OS_MD128: + OS_F64: if UseAVX then instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2) else @@ -1390,27 +1373,13 @@ unit cgx86; instr:=taicpu.op_reg_reg(A_VMOVQ,S_NO,reg1,reg2) else instr:=taicpu.op_reg_reg(A_MOVQ,S_NO,reg1,reg2); - OS_M128, OS_MS128: + OS_M128: if UseAVX then instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2) else instr:=taicpu.op_reg_reg(A_MOVDQA,S_NO,reg1,reg2); - OS_MF256, - OS_MF512: - if UseAVX then - instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2) - else - { SSE doesn't support 512-bit vectors } - InternalError(2018012931); - OS_MD256, - OS_MD512: - if UseAVX then - instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2) - else - { SSE doesn't support 512-bit vectors } - InternalError(2018012932); - OS_M256, OS_MS256, - OS_M512, OS_MS512: + OS_M256, + OS_M512: if UseAVX then instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2) else @@ -1495,39 +1464,7 @@ unit cgx86; op := A_VMOVQ else op := A_MOVQ; - OS_MF128: - { Use XMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 16 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end - else - begin - if GetRefAlignment(tmpref) = 16 then - op := A_MOVAPS - else - op := A_MOVUPS - end; - OS_MD128: - { Use XMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 16 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end - else - begin - if GetRefAlignment(tmpref) = 16 then - op := A_MOVAPD - else - op := A_MOVUPD - end; - OS_M128, OS_MS128: + OS_M128: { Use XMM integer transfer } if UseAVX then begin @@ -1541,33 +1478,9 @@ unit cgx86; if GetRefAlignment(tmpref) = 16 then op := A_MOVDQA else - op := A_MOVDQU + op := A_MOVDQU; end; - OS_MF256: - { Use YMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 32 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end - else - { SSE doesn't support 256-bit vectors } - InternalError(2018012934); - OS_MD256: - { Use YMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 32 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end - else - { SSE doesn't support 256-bit vectors } - InternalError(2018012935); - OS_M256, OS_MS256: + OS_M256: { Use YMM integer transfer } if UseAVX then begin @@ -1578,32 +1491,8 @@ unit cgx86; end else { SSE doesn't support 256-bit vectors } - InternalError(2018012936); - OS_MF512: - { Use ZMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 64 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end - else - { SSE doesn't support 512-bit vectors } - InternalError(2018012937); - OS_MD512: - { Use ZMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 64 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end - else - { SSE doesn't support 512-bit vectors } - InternalError(2018012938); - OS_M512, OS_MS512: + Internalerror(2020010401); + OS_M512: { Use ZMM integer transfer } if UseAVX then begin @@ -1669,37 +1558,7 @@ unit cgx86; op := A_VMOVQ else op := A_MOVQ; - OS_MF128: - { Use XMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 16 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end else - begin - if GetRefAlignment(tmpref) = 16 then - op := A_MOVAPS - else - op := A_MOVUPS - end; - OS_MD128: - { Use XMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 16 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end else - begin - if GetRefAlignment(tmpref) = 16 then - op := A_MOVAPD - else - op := A_MOVUPD - end; - OS_M128, OS_MS128: + OS_M128: { Use XMM integer transfer } if UseAVX then begin @@ -1714,29 +1573,7 @@ unit cgx86; else op := A_MOVDQU end; - OS_MF256: - { Use XMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 32 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end else - { SSE doesn't support 256-bit vectors } - InternalError(2018012940); - OS_MD256: - { Use XMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 32 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end else - { SSE doesn't support 256-bit vectors } - InternalError(2018012941); - OS_M256, OS_MS256: + OS_M256: { Use XMM integer transfer } if UseAVX then begin @@ -1747,29 +1584,7 @@ unit cgx86; end else { SSE doesn't support 256-bit vectors } InternalError(2018012942); - OS_MF512: - { Use XMM transfer of packed singles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 64 then - op := A_VMOVAPS - else - op := A_VMOVUPS - end else - { SSE doesn't support 512-bit vectors } - InternalError(2018012943); - OS_MD512: - { Use XMM transfer of packed doubles } - if UseAVX then - begin - if GetRefAlignment(tmpref) = 64 then - op := A_VMOVAPD - else - op := A_VMOVUPD - end else - { SSE doesn't support 512-bit vectors } - InternalError(2018012944); - OS_M512, OS_MS512: + OS_M512: { Use XMM integer transfer } if UseAVX then begin diff --git a/fpcsrc/compiler/x86/cpubase.pas b/fpcsrc/compiler/x86/cpubase.pas index b176657d..a38eab9d 100644 --- a/fpcsrc/compiler/x86/cpubase.pas +++ b/fpcsrc/compiler/x86/cpubase.pas @@ -447,11 +447,11 @@ implementation else internalerror(2009071902); end; - OS_M128,OS_MS128,OS_MF128,OS_MD128: + OS_M128: cgsize2subreg:=R_SUBMMX; - OS_M256,OS_MS256,OS_MF256,OS_MD256: + OS_M256: cgsize2subreg:=R_SUBMMY; - OS_M512,OS_MS512,OS_MF512,OS_MD512: + OS_M512: cgsize2subreg:=R_SUBMMZ; OS_NO: { error message should have been thrown already before, so avoid only diff --git a/fpcsrc/compiler/x86/rax86.pas b/fpcsrc/compiler/x86/rax86.pas index b40ac83f..705d06a1 100644 --- a/fpcsrc/compiler/x86/rax86.pas +++ b/fpcsrc/compiler/x86/rax86.pas @@ -1288,10 +1288,12 @@ begin asize:=OT_BITS64; OS_F80 : asize:=OT_BITS80; - OS_128,OS_M128,OS_MS128: + OS_128,OS_M128: asize := OT_BITS128; - OS_M256,OS_MS256: + OS_M256: asize := OT_BITS256; + OS_M512: + asize := OT_BITS512; end; if asize<>0 then ai.oper[i-1]^.ot:=(ai.oper[i-1]^.ot and not OT_SIZE_MASK) or asize; -- 2.30.2