From: Debian Science Team Date: Sun, 25 Jun 2023 19:56:08 +0000 (+0100) Subject: Fix incorrect results of AVX512 DGEMM kernel when built on pre-AVX2 machine X-Git-Tag: archive/raspbian/0.3.13+ds-3+rpi1+deb11u1^2~1 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=0dee94d5bf64eb881d61b639d78415652ea864ad;p=openblas.git Fix incorrect results of AVX512 DGEMM kernel when built on pre-AVX2 machine Origin: upstream, https://github.com/xianyi/OpenBLAS/pull/3579 Bug: https://github.com/xianyi/OpenBLAS/issues/2986 https://github.com/xianyi/OpenBLAS/issues/3454 https://github.com/xianyi/OpenBLAS/issues/3557 Bug-Debian: https://bugs.debian.org/1025480 Applied-Upstream: 0.3.21 Reviewed-by: Sébastien Villemot Last-Update: 2023-06-26 When building OpenBLAS with dynamic arch selection on x86-64 hardware that does not support AVX2 (e.g. Intel Ivybridge or earlier), then the AVX512 (SkylakeX) kernel for DGEMM would produce incorrect results (of course when run on AVX512-capable hardware). The problem was that the check for determining whether the compiler is able to understand AVX512 assembly/intrinsics was doubly incorrect: it would test the build machine capabilities (instead of the compiler capabilities); and it would check for AVX2 instead of AVX512. As a consequence, on pre-AVX2 hardware, the build system would conclude that the compiler is not able to understand AVX512 primitives, and would create a broken AVX512 (SkylakeX) DGEMM kernel (essentially a Haswell kernel, but with some wrong assumptions, hence leading to incorrect numerical results). Last-Update: 2023-06-26 Gbp-Pq: Name avx512-dgemm.patch --- diff --git a/Makefile.prebuild b/Makefile.prebuild index d6395da..05f102d 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -67,7 +67,8 @@ endif getarch : getarch.c cpuid.S dummy $(CPUIDEMU) - $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU) + avx512=$$(perl c_check - - gcc | grep NO_AVX512); \ + $(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) getarch_2nd : getarch_2nd.c config.h dummy ifndef TARGET_CORE diff --git a/c_check b/c_check index 970d475..1d2464d 100644 --- a/c_check +++ b/c_check @@ -240,7 +240,7 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) { # $tmpf = new File::Temp( UNLINK => 1 ); ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); $code = '"vbroadcastss -4 * 4(%rsi), %zmm2"'; - print $tmpf "#include \n\nint main(void){ __asm__ volatile($code); }\n"; + print $fh "#include \n\nint main(void){ __asm__ volatile($code); }\n"; $args = " -march=skylake-avx512 -c -o $tmpf.o $tmpf"; if ($compiler eq "PGI") { $args = " -tp skylake -c -o $tmpf.o $tmpf"; @@ -264,7 +264,7 @@ if ($data =~ /HAVE_C11/) { $c11_atomics = 0; } else { ($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 ); - print $tmpf "#include \nint main(void){}\n"; + print $fh "#include \nint main(void){}\n"; $args = " -c -o $tmpf.o $tmpf"; my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); system(@cmd) == 0; diff --git a/getarch.c b/getarch.c index f48944f..7d59d0e 100644 --- a/getarch.c +++ b/getarch.c @@ -94,14 +94,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #endif -#if defined(__x86_64__) || defined(_M_X64) -#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX2__)) || (defined(__clang__) && __clang_major__ >= 6)) -#else -#ifndef NO_AVX512 -#define NO_AVX512 -#endif -#endif -#endif /* #define FORCE_P2 */ /* #define FORCE_KATMAI */ /* #define FORCE_COPPERMINE */