From b708c503730455bc7df6a77ea395478661e8ae6e Mon Sep 17 00:00:00 2001 From: GNU Libc Maintainers Date: Tue, 30 Apr 2024 23:57:11 +0200 Subject: [PATCH] git-updates GIT update of https://sourceware.org/git/glibc.git/release/2.31/master from glibc-2.31 GIT update of https://sourceware.org/git/glibc.git/release/2.31/master from glibc-2.31 Gbp-Pq: Name git-updates.diff --- INSTALL | 11 +- NEWS | 100 ++ Rules | 19 +- configure | 2 +- configure.ac | 2 +- debug/backtrace.c | 5 + elf/Makefile | 4 +- elf/dl-tunables.c | 56 +- elf/ifuncmain6pie.c | 14 +- elf/ifuncmod6.c | 8 +- elf/tst-env-setuid-tunables.c | 118 +- elf/tst-env-setuid.c | 197 +--- iconv/Makefile | 17 +- iconv/Versions | 3 + iconv/gconv_charset.c | 228 ++++ iconv/gconv_charset.h | 34 +- iconv/gconv_int.h | 40 +- iconv/gconv_open.c | 64 +- iconv/gconv_simple.c | 16 +- iconv/iconv_open.c | 46 +- iconv/iconv_prog.c | 63 +- iconv/tst-iconv-opt.c | 347 ++++++ iconv/tst-iconv8.c | 50 + iconv/tst-iconv_prog.sh | 284 +++++ iconvdata/Makefile | 8 +- iconvdata/bug-iconv13.c | 53 + iconvdata/bug-iconv14.c | 127 ++ iconvdata/bug-iconv15.c | 60 + iconvdata/euc-kr.c | 6 +- iconvdata/ibm1364.c | 14 +- iconvdata/iso-2022-jp-3.c | 77 +- iconvdata/ksc5601.h | 6 +- include/libc-symbols.h | 26 +- include/sys/prctl.h | 1 + include/sys/un.h | 12 + intl/dcigettext.c | 16 +- intl/tst-codeset.c | 34 +- localedata/locales/oc_FR | 6 +- malloc/Makefile | 20 + malloc/tst-mallocfork2.c | 39 +- math/Makefile | 122 ++ misc/Makefile | 2 +- misc/tst-syscalls.c | 167 +++ nptl/Makefile | 2 +- nptl/descr.h | 8 +- nptl/tst-setgroups.c | 79 ++ nscd/netgroupcache.c | 4 +- nscd/selinux.c | 15 + nss/makedb.c | 9 + nss/nss_compat/compat-grp.c | 15 +- nss/nss_compat/compat-initgroups.c | 13 +- nss/nss_compat/compat-pwd.c | 15 +- nss/nss_compat/compat-spwd.c | 14 +- posix/glob.c | 25 +- posix/wordexp-test.c | 1 + posix/wordexp.c | 2 +- socket/Makefile | 6 +- socket/sockaddr_un_set.c | 41 + socket/tst-sockaddr_un_set.c | 62 + stdio-common/Makefile | 13 +- stdio-common/bug22.c | 2 +- stdio-common/tst-grouping2.c | 39 + stdio-common/tst-printf-bz25691.c | 108 ++ stdio-common/tst-vfprintf-width-prec-alloc.c | 41 + stdio-common/vfprintf-internal.c | 447 +++---- stdlib/Makefile | 3 +- stdlib/tst-secure-getenv.c | 199 +--- stdlib/tst-system.c | 124 +- string/test-memchr.c | 39 +- string/test-strncat.c | 61 + string/test-strncmp.c | 13 + string/test-strnlen.c | 87 +- sunrpc/Makefile | 5 +- sunrpc/clnt_gen.c | 10 +- sunrpc/svc_unix.c | 11 +- sunrpc/tst-bug22542.c | 44 + sunrpc/tst-bug28768.c | 42 + support/Makefile | 2 + support/capture_subprocess.h | 6 + support/shell-container.c | 40 +- support/subprocess.h | 5 + support/support.h | 1 + support/support_capture_subprocess.c | 128 +- support/support_subprocess.c | 21 +- support/temp_file.c | 173 ++- support/temp_file.h | 16 + support/xchdir.c | 28 + support/xclone.c | 49 + support/xsched.h | 34 + support/xunistd.h | 1 + sysdeps/aarch64/dl-machine.h | 12 +- sysdeps/aarch64/memcpy.S | 197 ++-- sysdeps/aarch64/multiarch/Makefile | 2 +- sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 + sysdeps/aarch64/multiarch/memcpy.c | 8 +- sysdeps/aarch64/multiarch/memcpy_advsimd.S | 248 ++++ sysdeps/aarch64/multiarch/memmove.c | 6 +- sysdeps/aarch64/strcpy.S | 5 + sysdeps/aarch64/strnlen.S | 5 + sysdeps/aarch64/sysdep.h | 2 +- sysdeps/arm/armv7/multiarch/memcpy_impl.S | 22 +- sysdeps/arm/be/nofpu/Implies | 1 + sysdeps/arm/le/nofpu/Implies | 1 + sysdeps/arm/memcpy.S | 24 +- sysdeps/arm/memmove.S | 24 +- sysdeps/generic/unwind-arch.h | 30 + sysdeps/hppa/dl-fptr.c | 26 +- sysdeps/hppa/dl-machine.h | 36 +- sysdeps/hppa/dl-runtime.c | 58 + sysdeps/hppa/dl-trampoline.S | 74 +- sysdeps/i386/dl-machine.h | 16 +- sysdeps/i386/sysdep.h | 5 +- sysdeps/ieee754/ldbl-96/Makefile | 5 +- sysdeps/ieee754/ldbl-96/e_rem_pio2l.c | 12 + sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c | 41 + sysdeps/posix/getcwd.c | 8 + sysdeps/posix/system.c | 18 +- sysdeps/powerpc/powerpc32/sysdep.h | 4 +- sysdeps/powerpc/powerpc64/backtrace.c | 13 +- sysdeps/s390/configure | 8 +- sysdeps/s390/configure.ac | 8 +- sysdeps/s390/memmove.c | 2 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 3 +- sysdeps/sh/be/sh4/fpu/Implies | 1 + sysdeps/sh/le/sh4/fpu/Implies | 1 + sysdeps/unix/make-syscalls.sh | 24 + sysdeps/unix/syscall-template.S | 49 +- sysdeps/unix/syscalls.list | 22 +- sysdeps/unix/sysv/linux/Makefile | 6 +- .../unix/sysv/linux/aarch64/arch-syscall.h | 1 + .../unix/sysv/linux/aarch64/cpu-features.h | 8 +- sysdeps/unix/sysv/linux/aarch64/localplt.data | 3 + sysdeps/unix/sysv/linux/getpt.c | 67 +- sysdeps/unix/sysv/linux/grantpt.c | 73 +- sysdeps/unix/sysv/linux/hppa/atomic-machine.h | 28 + sysdeps/unix/sysv/linux/microblaze/sysdep.h | 91 +- .../sysv/linux/mips/mips32/mips-syscall5.S | 6 +- .../sysv/linux/mips/mips32/mips-syscall6.S | 6 +- .../sysv/linux/mips/mips32/mips-syscall7.S | 7 +- .../linux/mips/mips32/mips16/mips16-syscall.h | 64 +- .../mips/mips32/mips16/mips16-syscall0.c | 4 +- .../mips/mips32/mips16/mips16-syscall1.c | 6 +- .../mips/mips32/mips16/mips16-syscall2.c | 6 +- .../mips/mips32/mips16/mips16-syscall3.c | 6 +- .../mips/mips32/mips16/mips16-syscall4.c | 6 +- sysdeps/unix/sysv/linux/mips/mips32/sysdep.h | 145 +-- .../unix/sysv/linux/mips/mips64/n32/sysdep.h | 121 +- .../unix/sysv/linux/mips/mips64/n64/sysdep.h | 119 +- sysdeps/unix/sysv/linux/mips/mips64/syscall.S | 2 +- sysdeps/unix/sysv/linux/mips/sysdep.h | 4 +- sysdeps/unix/sysv/linux/mips/unwind-arch.h | 67 ++ sysdeps/unix/sysv/linux/msgctl.c | 10 +- .../sysv/linux/powerpc/powerpc32/sysdep.h | 24 +- .../sysv/linux/powerpc/powerpc64/sysdep.h | 24 +- sysdeps/unix/sysv/linux/prctl.c | 42 + sysdeps/unix/sysv/linux/process_vm_readv.c | 32 + sysdeps/unix/sysv/linux/process_vm_writev.c | 32 + sysdeps/unix/sysv/linux/ptsname.c | 95 +- sysdeps/unix/sysv/linux/riscv/sysdep.h | 84 +- sysdeps/unix/sysv/linux/semctl.c | 10 +- sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies | 1 + sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies | 1 + sysdeps/unix/sysv/linux/shmctl.c | 10 +- sysdeps/unix/sysv/linux/sparc/Makefile | 8 +- .../unix/sysv/linux/sparc/sparc32/sigaction.c | 26 +- .../sysv/linux/sparc/sparc32/sigreturn_stub.S | 34 + .../unix/sysv/linux/sparc/sparc64/sigaction.c | 14 +- .../sysv/linux/sparc/sparc64/sigreturn_stub.S | 29 + sysdeps/unix/sysv/linux/syscall-names.list | 4 +- sysdeps/unix/sysv/linux/syscalls.list | 41 +- .../unix/sysv/linux/tst-getcwd-smallbuff.c | 259 ++++ sysdeps/unix/sysv/linux/x86_64/sysdep.h | 86 +- sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h | 35 + sysdeps/x86/Makefile | 36 + sysdeps/x86/cacheinfo.c | 36 +- sysdeps/x86/cpu-features.c | 60 +- sysdeps/x86/cpu-features.h | 5 + sysdeps/x86/cpu-tunables.c | 3 + sysdeps/x86/dl-cet.c | 6 +- sysdeps/x86/tst-get-cpu-features.c | 1 + sysdeps/x86/tst-memchr-rtm.c | 54 + sysdeps/x86/tst-memcmp-rtm.c | 52 + sysdeps/x86/tst-memmove-rtm.c | 53 + sysdeps/x86/tst-memrchr-rtm.c | 54 + sysdeps/x86/tst-memset-rtm.c | 45 + sysdeps/x86/tst-setjmp-cet.c | 1 + sysdeps/x86/tst-strchr-rtm.c | 54 + sysdeps/x86/tst-strcpy-rtm.c | 53 + sysdeps/x86/tst-string-rtm.h | 72 ++ sysdeps/x86/tst-strlen-rtm.c | 53 + sysdeps/x86/tst-strncmp-rtm.c | 81 ++ sysdeps/x86/tst-strrchr-rtm.c | 53 + .../tst-wcsncmp-rtm.c} | 11 +- sysdeps/x86_64/Makefile | 7 + sysdeps/x86_64/configure | 33 - sysdeps/x86_64/configure.ac | 25 - sysdeps/x86_64/dl-machine.h | 16 +- sysdeps/x86_64/memchr.S | 77 +- sysdeps/x86_64/multiarch/Makefile | 60 +- sysdeps/x86_64/multiarch/ifunc-avx2.h | 18 +- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 448 ++++++- sysdeps/x86_64/multiarch/ifunc-memcmp.h | 17 +- sysdeps/x86_64/multiarch/ifunc-memmove.h | 45 +- sysdeps/x86_64/multiarch/ifunc-memset.h | 49 +- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 17 +- sysdeps/x86_64/multiarch/ifunc-wcslen.h | 52 + sysdeps/x86_64/multiarch/ifunc-wmemset.h | 22 +- sysdeps/x86_64/multiarch/memchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/memchr-avx2.S | 494 ++++---- sysdeps/x86_64/multiarch/memchr-evex.S | 478 ++++++++ .../x86_64/multiarch/memcmp-avx2-movbe-rtm.S | 12 + sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 28 +- sysdeps/x86_64/multiarch/memcmp-evex-movbe.S | 440 +++++++ .../memmove-avx-unaligned-erms-rtm.S | 17 + .../multiarch/memmove-avx512-unaligned-erms.S | 18 +- .../multiarch/memmove-evex-unaligned-erms.S | 26 + .../multiarch/memmove-vec-unaligned-erms.S | 84 +- sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/memrchr-avx2.S | 53 +- sysdeps/x86_64/multiarch/memrchr-evex.S | 337 ++++++ .../memset-avx2-unaligned-erms-rtm.S | 10 + .../multiarch/memset-avx2-unaligned-erms.S | 12 +- .../multiarch/memset-avx512-unaligned-erms.S | 16 +- .../multiarch/memset-evex-unaligned-erms.S | 24 + .../multiarch/memset-vec-unaligned-erms.S | 61 +- sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/rawmemchr-evex.S | 4 + sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/stpcpy-evex.S | 3 + sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/stpncpy-evex.S | 4 + sysdeps/x86_64/multiarch/strcat-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcat-avx2.S | 6 +- sysdeps/x86_64/multiarch/strcat-evex.S | 283 +++++ sysdeps/x86_64/multiarch/strchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strchr-avx2.S | 22 +- sysdeps/x86_64/multiarch/strchr-evex.S | 335 ++++++ sysdeps/x86_64/multiarch/strchr.c | 18 +- sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strchrnul-evex.S | 3 + sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcmp-avx2.S | 80 +- sysdeps/x86_64/multiarch/strcmp-evex.S | 1043 +++++++++++++++++ sysdeps/x86_64/multiarch/strcmp.c | 19 +- sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcpy-avx2.S | 85 +- sysdeps/x86_64/multiarch/strcpy-evex.S | 1003 ++++++++++++++++ sysdeps/x86_64/multiarch/strlen-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strlen-avx2.S | 610 ++++++---- sysdeps/x86_64/multiarch/strlen-evex.S | 489 ++++++++ sysdeps/x86_64/multiarch/strlen-sse2.S | 2 +- sysdeps/x86_64/multiarch/strlen-vec.S | 270 +++++ sysdeps/x86_64/multiarch/strncat-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strncat-evex.S | 3 + sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/strncmp-avx2.S | 1 + sysdeps/x86_64/multiarch/strncmp-evex.S | 3 + sysdeps/x86_64/multiarch/strncmp.c | 19 +- sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strncpy-evex.S | 3 + sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/strnlen-evex.S | 4 + sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strrchr-avx2.S | 19 +- sysdeps/x86_64/multiarch/strrchr-evex.S | 265 +++++ sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/wcschr-evex.S | 3 + sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wcscmp-evex.S | 4 + sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wcslen-evex.S | 4 + sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 4 + sysdeps/x86_64/multiarch/wcslen.c | 2 +- sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S | 5 + sysdeps/x86_64/multiarch/wcsncmp-avx2.S | 2 +- sysdeps/x86_64/multiarch/wcsncmp-evex.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-evex.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 2 +- sysdeps/x86_64/multiarch/wcsnlen.c | 22 +- sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/wcsrchr-evex.S | 3 + sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wmemchr-evex.S | 4 + .../x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S | 4 + sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S | 4 + sysdeps/x86_64/strlen.S | 243 +--- sysdeps/x86_64/sysdep.h | 22 + sysdeps/x86_64/tst-rsi-strlen.c | 81 ++ sysdeps/x86_64/tst-rsi-wcslen.c | 20 + 290 files changed, 13443 insertions(+), 2915 deletions(-) create mode 100644 iconv/gconv_charset.c create mode 100644 iconv/tst-iconv-opt.c create mode 100644 iconv/tst-iconv8.c create mode 100644 iconv/tst-iconv_prog.sh create mode 100644 iconvdata/bug-iconv13.c create mode 100644 iconvdata/bug-iconv14.c create mode 100644 iconvdata/bug-iconv15.c create mode 100644 misc/tst-syscalls.c create mode 100644 nptl/tst-setgroups.c create mode 100644 socket/sockaddr_un_set.c create mode 100644 socket/tst-sockaddr_un_set.c create mode 100644 stdio-common/tst-grouping2.c create mode 100644 stdio-common/tst-printf-bz25691.c create mode 100644 stdio-common/tst-vfprintf-width-prec-alloc.c create mode 100644 sunrpc/tst-bug22542.c create mode 100644 sunrpc/tst-bug28768.c create mode 100644 support/xchdir.c create mode 100644 support/xclone.c create mode 100644 support/xsched.h create mode 100644 sysdeps/aarch64/multiarch/memcpy_advsimd.S create mode 100644 sysdeps/arm/be/nofpu/Implies create mode 100644 sysdeps/arm/le/nofpu/Implies create mode 100644 sysdeps/generic/unwind-arch.h create mode 100644 sysdeps/hppa/dl-runtime.c create mode 100644 sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c create mode 100644 sysdeps/sh/be/sh4/fpu/Implies create mode 100644 sysdeps/sh/le/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/mips/unwind-arch.h create mode 100644 sysdeps/unix/sysv/linux/prctl.c create mode 100644 sysdeps/unix/sysv/linux/process_vm_readv.c create mode 100644 sysdeps/unix/sysv/linux/process_vm_writev.c create mode 100644 sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S create mode 100644 sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S create mode 100644 sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c create mode 100644 sysdeps/x86/tst-memchr-rtm.c create mode 100644 sysdeps/x86/tst-memcmp-rtm.c create mode 100644 sysdeps/x86/tst-memmove-rtm.c create mode 100644 sysdeps/x86/tst-memrchr-rtm.c create mode 100644 sysdeps/x86/tst-memset-rtm.c create mode 100644 sysdeps/x86/tst-setjmp-cet.c create mode 100644 sysdeps/x86/tst-strchr-rtm.c create mode 100644 sysdeps/x86/tst-strcpy-rtm.c create mode 100644 sysdeps/x86/tst-string-rtm.h create mode 100644 sysdeps/x86/tst-strlen-rtm.c create mode 100644 sysdeps/x86/tst-strncmp-rtm.c create mode 100644 sysdeps/x86/tst-strrchr-rtm.c rename sysdeps/{unix/sysv/linux/nios2/kernel-features.h => x86/tst-wcsncmp-rtm.c} (74%) mode change 100644 => 100755 sysdeps/x86_64/configure create mode 100644 sysdeps/x86_64/multiarch/ifunc-wcslen.h create mode 100644 sysdeps/x86_64/multiarch/memchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memcmp-evex-movbe.S create mode 100644 sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S create mode 100644 sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S create mode 100644 sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/rawmemchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/stpcpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/stpncpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcat-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcat-evex.S create mode 100644 sysdeps/x86_64/multiarch/strchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strchrnul-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/strlen-vec.S create mode 100644 sysdeps/x86_64/multiarch/strncat-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncat-evex.S create mode 100644 sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strnlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcschr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcscmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-sse4_1.S create mode 100644 sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsncmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wmemchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S create mode 100644 sysdeps/x86_64/tst-rsi-strlen.c create mode 100644 sysdeps/x86_64/tst-rsi-wcslen.c diff --git a/INSTALL b/INSTALL index 242cb06f9..b487e1863 100644 --- a/INSTALL +++ b/INSTALL @@ -184,14 +184,9 @@ if 'CFLAGS' is specified it must enable optimization. For example: '--enable-pt_chown' The file 'pt_chown' is a helper binary for 'grantpt' (*note Pseudo-Terminals: Allocation.) that is installed setuid root to fix - up pseudo-terminal ownership. It is not built by default because - systems using the Linux kernel are commonly built with the 'devpts' - filesystem enabled and mounted at '/dev/pts', which manages - pseudo-terminal ownership automatically. By using - '--enable-pt_chown', you may build 'pt_chown' and install it setuid - and owned by 'root'. The use of 'pt_chown' introduces additional - security risks to the system and you should enable it only if you - understand and accept those risks. + up pseudo-terminal ownership on GNU/Hurd. It is not required on + GNU/Linux, and the GNU C Library will not use the installed + 'pt_chown' program when configured with '--enable-pt_chown'. '--disable-werror' By default, the GNU C Library is built with '-Werror'. If you wish diff --git a/NEWS b/NEWS index 292fbc595..8a20d3c4e 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,94 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. +Version 2.31.1 + +The following bugs are resolved with this release: + [14231] stdio-common tests memory requirements + [19519] iconv(1) with -c option hangs on illegal multi-byte sequences + (CVE-2016-10228) + [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT + [20543] Please move from .gnu.linkonce to comdat + [22542] CVE-2022-23219: Buffer overflow in sunrpc clnt_create for "unix" + [23296] Data race in setting function descriptor during lazy binding + [24973] iconv encounters segmentation fault when converting 0x00 0xfe in + EUC-KR to UTF-8 (CVE-2019-25013) + [25487] sinl() stack corruption from crafted input (CVE-2020-10029) + [25523] MIPS/Linux inline syscall template is miscompiled + [25623] test-sysvmsg, test-sysvsem, test-sysvshm fail with 2.31 on 32 bit and + old kernel + [25635] arm: Wrong sysdep order selection for soft-fp + [25639] localedata: Some names of days and months wrongly spelt in + Occitan + [25691] stdio: Remove memory leak from multibyte convertion + [25715] system() returns wrong errors when posix_spawn fails + [25810] x32: Incorrect syscall entries with pointer, off_t and size_t + [25896] Incorrect prctl + [25902] Bad LOADARGS_N + [25933] Off by one error in __strncmp_avx2 + [25966] Incorrect access of __x86_shared_non_temporal_threshold for x32 + [25976] nss_compat: internal_end*ent may clobber errno, hiding ERANGE + [26211] printf integer overflow calculating allocation size + [26224] iconv hangs when converting some invalid inputs from several IBM + character sets (CVE-2020-27618) + [26248] Incorrect argument types for INLINE_SETXID_SYSCALL + [26332] Incorrect cache line size load causes memory corruption in memset + [26383] bind_textdomain_codeset doesn't accept //TRANSLIT anymore + [26923] Assertion failure in iconv when converting invalid UCS4 (CVE-2020-29562) + [26932] libc: sh: Multiple floating point functions defined as stubs only + [27130] "rep movsb" performance issue + [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work + [27457] vzeroupper use in AVX2 multiarch string functions cause HTM aborts + [27974] Overflow bug in some implementation of wcsnlen, wmemchr, and wcsncat + [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs + [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex + [28768] CVE-2022-23218: Buffer overflow in sunrpc svcunix_create + [28769] CVE-2021-3999: Off-by-one buffer overflow/underflow in getcwd() + [28896] strncmp-avx2-rtm and wcsncmp-avx2-rtm fallback on non-rtm + variants when avoiding overflow + [29530] segfault in printf handling thousands separator + +Security related changes: + + CVE-2016-10228: An infinite loop has been fixed in the iconv program when + invoked with the -c option and when processing invalid multi-byte input + sequences. Reported by Jan Engelhardt. + + CVE-2020-10029: Trigonometric functions on x86 targets suffered from stack + corruption when they were passed a pseudo-zero argument. Reported by Guido + Vranken / ForAllSecure Mayhem. + + CVE-2020-1751: A defect in the PowerPC backtrace function could cause an + out-of-bounds write when executed in a signal frame context. + + CVE-2020-1752: A use-after-free vulnerability in the glob function when + expanding ~user has been fixed. + + CVE-2020-6096: A signed comparison vulnerability in the ARMv7 memcpy and + memmove functions has been fixed. Discovered by Jason Royes and Samual + Dytrych of the Cisco Security Assessment and Penetration Team (See + TALOS-2020-1019). + + CVE-2020-27618: An infinite loop has been fixed in the iconv program when + invoked with input containing redundant shift sequences in the IBM1364, + IBM1371, IBM1388, IBM1390, or IBM1399 character sets. + + CVE-2020-29562: An assertion failure has been fixed in the iconv function + when invoked with UCS4 input containing an invalid character. + + CVE-2021-3999: Passing a buffer of size exactly 1 byte to the getcwd + function may result in an off-by-one buffer underflow and overflow + when the current working directory is longer than PATH_MAX and also + corresponds to the / directory through an unprivileged mount + namespace. Reported by Qualys. + + CVE-2022-23219: Passing an overlong file name to the clnt_create + legacy function could result in a stack-based buffer overflow when + using the "unix" protocol. Reported by Martin Sebor. + + CVE-2022-23218: Passing an overlong file name to the svcunix_create + legacy function could result in a stack-based buffer overflow. + Version 2.31 Major new features: @@ -141,6 +229,18 @@ Changes to build and runtime requirements: source tree. ChangeLog files are located in the ChangeLog.old directory as ChangeLog.N where the highest N has the latest entries. +* On Linux, the system administrator needs to configure /dev/pts with + the intended access modes for pseudo-terminals. glibc no longer + attemps to adjust permissions of terminal devices. The previous glibc + defaults ("tty" group, user read/write and group write) already + corresponded to what most systems used, so that grantpt did not + perform any adjustments. + +* On Linux, the posix_openpt and getpt functions no longer attempt to + use legacy (BSD) pseudo-terminals and assume that if /dev/ptmx exists + (and pseudo-terminals are supported), a devpts file system is mounted + on /dev/pts. Current systems already meet these requirements. + Security related changes: CVE-2019-19126: ld.so failed to ignore the LD_PREFER_MAP_32BIT_EXEC diff --git a/Rules b/Rules index 8b771f609..beab969fd 100644 --- a/Rules +++ b/Rules @@ -155,6 +155,7 @@ xtests: tests $(xtests-special) else tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ $(tests-container:%=$(objpfx)%.out) \ + $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ $(tests-special) $(tests-printers-out) xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) endif @@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no) tests-expected = else tests-expected = $(tests) $(tests-internal) $(tests-printers) \ - $(tests-container) + $(tests-container) $(tests-mcheck:%=%-mcheck) endif tests: $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \ @@ -191,6 +192,7 @@ else binaries-pie-tests = binaries-pie-notests = endif +binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) else binaries-all-notests = binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) @@ -200,6 +202,7 @@ binaries-static-tests = binaries-static = binaries-pie-tests = binaries-pie-notests = +binaries-mcheck-tests = endif binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) @@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \ $(+link-tests) endif +ifneq "$(strip $(binaries-mcheck-tests))" "" +$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \ + $(link-extra-libs-tests) \ + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) + $(+link-tests) +endif + ifneq "$(strip $(binaries-pie-tests))" "" $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ $(link-extra-libs-tests) \ @@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \ $(+link-static-tests) endif +# All mcheck tests will be run with MALLOC_CHECK_=3 +define mcheck-ENVS +$(1)-mcheck-ENV = MALLOC_CHECK_=3 +endef +$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t)))) + ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" "" # These are the implicit rules for making test outputs # from the test programs and whatever input files are present. diff --git a/configure b/configure index b959d2d98..3b98ec312 100755 --- a/configure +++ b/configure @@ -4035,7 +4035,7 @@ if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \ -o conftest conftest.S 1>&5 2>&5; then # Do a link to see if the backend supports IFUNC relocs. $READELF -r conftest 1>&5 - LC_ALL=C $READELF -r conftest | grep 'no relocations' >/dev/null || { + LC_ALL=C $READELF -Wr conftest | grep -q 'IRELATIVE\|R_SPARC_JMP_IREL' && { libc_cv_ld_gnu_indirect_function=yes } fi diff --git a/configure.ac b/configure.ac index 49b900c1e..e20034f30 100644 --- a/configure.ac +++ b/configure.ac @@ -649,7 +649,7 @@ if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \ -o conftest conftest.S 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD; then # Do a link to see if the backend supports IFUNC relocs. $READELF -r conftest 1>&AS_MESSAGE_LOG_FD - LC_ALL=C $READELF -r conftest | grep 'no relocations' >/dev/null || { + LC_ALL=C $READELF -Wr conftest | grep -q 'IRELATIVE\|R_SPARC_JMP_IREL' && { libc_cv_ld_gnu_indirect_function=yes } fi diff --git a/debug/backtrace.c b/debug/backtrace.c index cc4b9a5c9..69cf4c23c 100644 --- a/debug/backtrace.c +++ b/debug/backtrace.c @@ -23,6 +23,7 @@ #include #include #include +#include struct trace_arg { @@ -78,6 +79,10 @@ backtrace_helper (struct _Unwind_Context *ctx, void *a) if (arg->cnt != -1) { arg->array[arg->cnt] = (void *) unwind_getip (ctx); + if (arg->cnt > 0) + arg->array[arg->cnt] + = unwind_arch_adjustment (arg->array[arg->cnt - 1], + arg->array[arg->cnt]); /* Check whether we make any progress. */ _Unwind_Word cfa = unwind_getcfa (ctx); diff --git a/elf/Makefile b/elf/Makefile index 632a4d8b0..f9646f9c8 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -1348,6 +1348,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag) CFLAGS-ifuncmain9pie.c += $(pie-ccflag) CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag) +LDFLAGS-ifuncmain6pie = -Wl,-z,lazy + $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so @@ -1581,8 +1583,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \ tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \ LD_HWCAP_MASK=0x1 -tst-env-setuid-tunables-ENV = \ - GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096 $(objpfx)tst-debug1: $(libdl) $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 44d06665b..2296ad387 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring) return; char *p = tunestr; + size_t off = 0; while (true) { @@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - return; + { + if (__libc_enable_secure) + tunestr[off] = '\0'; + return; + } /* We did not find a valid name-value pair before encountering the colon. */ @@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring) if (tunable_is_name (cur->name, name)) { - /* If we are in a secure context (AT_SECURE) then ignore the tunable - unless it is explicitly marked as secure. Tunable values take - precendence over their envvar aliases. */ + /* If we are in a secure context (AT_SECURE) then ignore the + tunable unless it is explicitly marked as secure. Tunable + values take precedence over their envvar aliases. We write + the tunables that are not SXID_ERASE back to TUNESTR, thus + dropping all SXID_ERASE tunables and any invalid or + unrecognized tunables. */ if (__libc_enable_secure) { - if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE) + if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE) { - if (p[len] == '\0') - { - /* Last tunable in the valstring. Null-terminate and - return. */ - *name = '\0'; - return; - } - else - { - /* Remove the current tunable from the string. We do - this by overwriting the string starting from NAME - (which is where the current tunable begins) with - the remainder of the string. We then have P point - to NAME so that we continue in the correct - position in the valstring. */ - char *q = &p[len + 1]; - p = name; - while (*q != '\0') - *name++ = *q++; - name[0] = '\0'; - len = 0; - } + if (off > 0) + tunestr[off++] = ':'; + + const char *n = cur->name; + + while (*n != '\0') + tunestr[off++] = *n++; + + tunestr[off++] = '='; + + for (size_t j = 0; j < len; j++) + tunestr[off++] = value[j]; } if (cur->security_level != TUNABLE_SECLEVEL_NONE) @@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] == '\0') - return; - else + if (p[len] != '\0') p += len + 1; } } diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c index 04faeb86e..4a0190683 100644 --- a/elf/ifuncmain6pie.c +++ b/elf/ifuncmain6pie.c @@ -9,7 +9,6 @@ #include "ifunc-sel.h" typedef int (*foo_p) (void); -extern foo_p foo_ptr; static int one (void) @@ -28,20 +27,17 @@ foo_ifunc (void) } extern int foo (void); -extern foo_p get_foo (void); +extern int call_foo (void); extern foo_p get_foo_p (void); -foo_p my_foo_ptr = foo; +foo_p foo_ptr = foo; int main (void) { foo_p p; - p = get_foo (); - if (p != foo) - abort (); - if ((*p) () != -30) + if (call_foo () != -30) abort (); p = get_foo_p (); @@ -52,12 +48,8 @@ main (void) if (foo_ptr != foo) abort (); - if (my_foo_ptr != foo) - abort (); if ((*foo_ptr) () != -30) abort (); - if ((*my_foo_ptr) () != -30) - abort (); if (foo () != -30) abort (); diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c index 2e16c1d06..2f6d0715e 100644 --- a/elf/ifuncmod6.c +++ b/elf/ifuncmod6.c @@ -4,7 +4,7 @@ extern int foo (void); typedef int (*foo_p) (void); -foo_p foo_ptr = foo; +extern foo_p foo_ptr; foo_p get_foo_p (void) @@ -12,8 +12,8 @@ get_foo_p (void) return foo_ptr; } -foo_p -get_foo (void) +int +call_foo (void) { - return foo; + return foo (); } diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 971d5892b..ca0c8c245 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -25,35 +25,76 @@ #include "config.h" #undef _LIBC -#define test_parent test_parent_tunables -#define test_child test_child_tunables - -static int test_child_tunables (void); -static int test_parent_tunables (void); - -#include "tst-env-setuid.c" - -#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096" -#define PARENT_VALSTRING_VALUE \ - "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +const char *teststrings[] = +{ + "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2", + "glibc.malloc.perturb=0x800", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", + "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", + "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", + ":glibc.malloc.garbage=2:glibc.malloc.check=1", + "glibc.malloc.check=1:glibc.malloc.check=2", + "not_valid.malloc.check=2", + "glibc.not_valid.check=2", +}; + +const char *resultstrings[] = +{ + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "", + "", + "", + "", + "", + "", +}; static int -test_child_tunables (void) +test_child (int off) { const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES - if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0) + if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); + printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); return 1; #else if (val != NULL) { - printf ("GLIBC_TUNABLES not cleared\n"); + printf ("[%d] GLIBC_TUNABLES not cleared\n", off); return 1; } return 0; @@ -61,15 +102,48 @@ test_child_tunables (void) } static int -test_parent_tunables (void) +do_test (int argc, char **argv) { - const char *val = getenv ("GLIBC_TUNABLES"); + /* Setgid child process. */ + if (argc == 2) + { + if (getgid () == getegid ()) + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); - if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0) - return 0; + int ret = test_child (atoi (argv[1])); - if (val != NULL) - printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); + if (ret != 0) + exit (1); - return 1; + exit (EXIT_SUCCESS); + } + else + { + int ret = 0; + + /* Spawn tests. */ + for (int i = 0; i < array_length (teststrings); i++) + { + char buf[INT_BUFSIZE_BOUND (int)]; + + printf ("Spawned test for %s (%d)\n", teststrings[i], i); + snprintf (buf, sizeof (buf), "%d\n", i); + if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) + exit (1); + + int status = support_capture_subprogram_self_sgid (buf); + + /* Bail out early if unsupported. */ + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + ret |= status; + } + return ret; + } } + +#define TEST_FUNCTION_ARGV do_test +#include diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c index 41dc79e83..2dbccdb69 100644 --- a/elf/tst-env-setuid.c +++ b/elf/tst-env-setuid.c @@ -29,173 +29,12 @@ #include #include +#include #include #include +#include static char SETGID_CHILD[] = "setgid-child"; -#define CHILD_STATUS 42 - -/* Return a GID which is not our current GID, but is present in the - supplementary group list. */ -static gid_t -choose_gid (void) -{ - const int count = 64; - gid_t groups[count]; - int ret = getgroups (count, groups); - if (ret < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t current = getgid (); - for (int i = 0; i < ret; ++i) - { - if (groups[i] != current) - return groups[i]; - } - return 0; -} - -/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */ -static pid_t -do_execve (char **args) -{ - pid_t kid = vfork (); - - if (kid < 0) - { - printf ("vfork: %m\n"); - return -1; - } - - if (kid == 0) - { - /* Child process. */ - execve (args[0], args, environ); - _exit (-errno); - } - - if (kid < 0) - return 1; - - int status; - - if (waitpid (kid, &status, 0) < 0) - { - printf ("waitpid: %m\n"); - return 1; - } - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS) - { - printf ("Unexpected exit status %d from child process\n", - WEXITSTATUS (status)); - return 1; - } - return 0; -} - -/* Copies the executable into a restricted directory, so that we can - safely make it SGID with the TARGET group ID. Then runs the - executable. */ -static int -run_executable_sgid (gid_t target) -{ - char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", - test_dir, (intmax_t) getpid ()); - char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = 0; - if (mkdir (dirname, 0700) < 0) - { - printf ("mkdir: %m\n"); - goto err; - } - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) - { - printf ("open (/proc/self/exe): %m\n"); - goto err; - } - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - if (outfd < 0) - { - printf ("open (%s): %m\n", execname); - goto err; - } - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - if (rdcount < 0) - { - printf ("read: %m\n"); - goto err; - } - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - if (wrcount <= 0) - { - printf ("write: %m\n"); - goto err; - } - p += wrcount; - } - } - if (fchown (outfd, getuid (), target) < 0) - { - printf ("fchown (%s): %m\n", execname); - goto err; - } - if (fchmod (outfd, 02750) < 0) - { - printf ("fchmod (%s): %m\n", execname); - goto err; - } - if (close (outfd) < 0) - { - printf ("close (outfd): %m\n"); - goto err; - } - if (close (infd) < 0) - { - printf ("close (infd): %m\n"); - goto err; - } - - char *args[] = {execname, SETGID_CHILD, NULL}; - - ret = do_execve (args); - -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname) - { - unlink (execname); - free (execname); - } - if (dirname) - { - rmdir (dirname); - free (dirname); - } - return ret; -} #ifndef test_child static int @@ -256,40 +95,32 @@ do_test (int argc, char **argv) if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0) { if (getgid () == getegid ()) - { - /* This can happen if the file system is mounted nosuid. */ - fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", - (intmax_t) getgid ()); - exit (EXIT_UNSUPPORTED); - } + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); int ret = test_child (); if (ret != 0) exit (1); - exit (CHILD_STATUS); + exit (EXIT_SUCCESS); } else { if (test_parent () != 0) exit (1); - /* Try running a setgid program. */ - gid_t target = choose_gid (); - if (target == 0) - { - fprintf (stderr, - "Could not find a suitable GID for user %jd, skipping test\n", - (intmax_t) getuid ()); - exit (0); - } + int status = support_capture_subprogram_self_sgid (SETGID_CHILD); - return run_executable_sgid (target); - } + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + if (!WIFEXITED (status)) + FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); - /* Something went wrong and our argv was corrupted. */ - _exit (1); + return 0; + } } #define TEST_FUNCTION_ARGV do_test diff --git a/iconv/Makefile b/iconv/Makefile index b8fe8c47d..f9b51e23e 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -26,7 +26,7 @@ headers = iconv.h gconv.h routines = iconv_open iconv iconv_close \ gconv_open gconv gconv_close gconv_db gconv_conf \ gconv_builtin gconv_simple gconv_trans gconv_cache -routines += gconv_dl +routines += gconv_dl gconv_charset vpath %.c ../locale/programs ../intl @@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION CFLAGS-simple-hash.c += -I../locale tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \ - tst-iconv7 tst-iconv-mt + tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt others = iconv_prog iconvconfig install-others-programs = $(inst_bindir)/iconv @@ -61,6 +61,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) ifeq ($(run-built-tests),yes) xtests-special += $(objpfx)test-iconvconfig.out +tests-special += $(objpfx)tst-iconv_prog.out endif # Make a copy of the file because gconv module names are constructed @@ -81,6 +82,13 @@ endif include ../Rules +ifeq ($(run-built-tests),yes) +LOCALES := en_US.UTF-8 +include ../gen-locales.mk + +$(objpfx)tst-iconv-opt.out: $(gen-locales) +endif + $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) $(do-install-program) @@ -95,3 +103,8 @@ $(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ rm -f $$tmp) > $@; \ $(evaluate-test) + +$(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ + '$(run-program-env)' > $@; \ + $(evaluate-test) diff --git a/iconv/Versions b/iconv/Versions index 60ab10a27..d51af52fa 100644 --- a/iconv/Versions +++ b/iconv/Versions @@ -7,6 +7,9 @@ libc { # functions shared with iconv program __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; + # functions used elsewhere in glibc + __gconv_open; __gconv_create_spec; __gconv_destroy_spec; + # function used by the gconv modules __gconv_transliterate; } diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c new file mode 100644 index 000000000..4ba0aa99f --- /dev/null +++ b/iconv/gconv_charset.c @@ -0,0 +1,228 @@ +/* Charset name normalization. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include +#include +#include +#include +#include +#include +#include "gconv_int.h" +#include "gconv_charset.h" + + +/* This function returns a pointer to the last suffix in a conversion code + string. Valid suffixes matched by this function are of the form: '/' or ',' + followed by arbitrary text that doesn't contain '/' or ','. It does not + edit the string in any way. The caller is expected to parse the suffix and + remove it (by e.g. truncating the string) before the next call. */ +static char * +find_suffix (char *s) +{ + /* The conversion code is in the form of a triplet, separated by '/' chars. + The third component of the triplet contains suffixes. If we don't have two + slashes, we don't have a suffix. */ + + int slash_count = 0; + char *suffix_term = NULL; + + for (int i = 0; s[i] != '\0'; i++) + switch (s[i]) + { + case '/': + slash_count++; + /* Fallthrough */ + case ',': + suffix_term = &s[i]; + } + + if (slash_count >= 2) + return suffix_term; + + return NULL; +} + + +struct gconv_parsed_code +{ + char *code; + bool translit; + bool ignore; +}; + + +/* This function parses an iconv_open encoding PC.CODE, strips any suffixes + (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it. */ +static void +gconv_parse_code (struct gconv_parsed_code *pc) +{ + pc->translit = false; + pc->ignore = false; + + while (1) + { + /* First drop any trailing whitespaces and separators. */ + size_t len = strlen (pc->code); + while ((len > 0) + && (isspace (pc->code[len - 1]) + || pc->code[len - 1] == ',' + || pc->code[len - 1] == '/')) + len--; + + pc->code[len] = '\0'; + + if (len == 0) + return; + + char * suffix = find_suffix (pc->code); + if (suffix == NULL) + { + /* At this point, we have processed and removed all suffixes from the + code and what remains of the code is suffix free. */ + return; + } + else + { + /* A suffix is processed from the end of the code array going + backwards, one suffix at a time. The suffix is an index into the + code character array and points to: one past the end of the code + and any unprocessed suffixes, and to the beginning of the suffix + currently being processed during this iteration. We must process + this suffix and then drop it from the code by terminating the + preceding text with NULL. + + We want to allow and recognize suffixes such as: + + "/TRANSLIT" i.e. single suffix + "//TRANSLIT" i.e. single suffix and multiple separators + "//TRANSLIT/IGNORE" i.e. suffixes separated by "/" + "/TRANSLIT//IGNORE" i.e. suffixes separated by "//" + "//IGNORE,TRANSLIT" i.e. suffixes separated by "," + "//IGNORE," i.e. trailing "," + "//TRANSLIT/" i.e. trailing "/" + "//TRANSLIT//" i.e. trailing "//" + "/" i.e. empty suffix. + + Unknown suffixes are silently discarded and ignored. */ + + if ((__strcasecmp_l (suffix, + GCONV_TRIPLE_SEPARATOR + GCONV_TRANSLIT_SUFFIX, + _nl_C_locobj_ptr) == 0) + || (__strcasecmp_l (suffix, + GCONV_SUFFIX_SEPARATOR + GCONV_TRANSLIT_SUFFIX, + _nl_C_locobj_ptr) == 0)) + pc->translit = true; + + if ((__strcasecmp_l (suffix, + GCONV_TRIPLE_SEPARATOR + GCONV_IGNORE_ERRORS_SUFFIX, + _nl_C_locobj_ptr) == 0) + || (__strcasecmp_l (suffix, + GCONV_SUFFIX_SEPARATOR + GCONV_IGNORE_ERRORS_SUFFIX, + _nl_C_locobj_ptr) == 0)) + pc->ignore = true; + + /* We just processed this suffix. We can now drop it from the + code string by truncating it at the suffix's position. */ + suffix[0] = '\0'; + } + } +} + + +/* This function accepts the charset names of the source and destination of the + conversion and populates *conv_spec with an equivalent conversion + specification that may later be used by __gconv_open. The charset names + might contain options in the form of suffixes that alter the conversion, + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring + and truncating any suffix options in fromcode, and processing and truncating + any suffix options in tocode. Supported suffix options ("TRANSLIT" or + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec + to be set to true. Unrecognized suffix options are silently discarded. If + the function succeeds, it returns conv_spec back to the caller. It returns + NULL upon failure. conv_spec must be allocated and freed by the caller. */ +struct gconv_spec * +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + const char *tocode) +{ + struct gconv_parsed_code pfc, ptc; + struct gconv_spec *ret = NULL; + + pfc.code = __strdup (fromcode); + ptc.code = __strdup (tocode); + + if ((pfc.code == NULL) + || (ptc.code == NULL)) + goto out; + + gconv_parse_code (&pfc); + gconv_parse_code (&ptc); + + /* We ignore suffixes in the fromcode because that is how the current + implementation has always handled them. Only suffixes in the tocode are + processed and handled. The reality is that invalid input in the input + character set should only be ignored if the fromcode specifies IGNORE. + The current implementation ignores invalid intput in the input character + set if the tocode contains IGNORE. We preserve this behavior for + backwards compatibility. In the future we may split the handling of + IGNORE to allow a finer grained specification of ignorning invalid input + and/or ignoring invalid output. */ + conv_spec->translit = ptc.translit; + conv_spec->ignore = ptc.ignore; + + /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might + be able to add one or two trailing '/' characters if necessary. */ + conv_spec->fromcode = malloc (strlen (fromcode) + 3); + if (conv_spec->fromcode == NULL) + goto out; + + conv_spec->tocode = malloc (strlen (tocode) + 3); + if (conv_spec->tocode == NULL) + { + free (conv_spec->fromcode); + conv_spec->fromcode = NULL; + goto out; + } + + /* Strip unrecognized characters and ensure that the code has two '/' + characters as per conversion code triplet specification. */ + strip (conv_spec->fromcode, pfc.code); + strip (conv_spec->tocode, ptc.code); + ret = conv_spec; + +out: + free (pfc.code); + free (ptc.code); + + return ret; +} +libc_hidden_def (__gconv_create_spec) + + +void +__gconv_destroy_spec (struct gconv_spec *conv_spec) +{ + free (conv_spec->fromcode); + free (conv_spec->tocode); + return; +} +libc_hidden_def (__gconv_destroy_spec) diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h index 348acc089..e9c122cf7 100644 --- a/iconv/gconv_charset.h +++ b/iconv/gconv_charset.h @@ -19,9 +19,41 @@ #include #include +#include +#include +#include +#include +#include "gconv_int.h" -static void +/* An iconv encoding is in the form of a triplet, with parts separated by + a '/' character. The first part is the standard name, the second part is + the character set, and the third part is the error handler. If the first + part is sufficient to identify both the standard and the character set + then the second part can be empty e.g. UTF-8//. If the first part is not + sufficient to identify both the standard and the character set then the + second part is required e.g. ISO-10646/UTF8/. If neither the first or + second parts are provided e.g. //, then the current locale is used. + The actual values used in the first and second parts are not entirely + relevant to the implementation. The values themselves are used in a hash + table to lookup modules and so the naming convention of the first two parts + is somewhat arbitrary and only helps locate the entries in the cache. + The third part is the error handler and is comprised of a ',' or '/' + separated list of suffixes. Currently, we support "TRANSLIT" for + transliteration and "IGNORE" for ignoring conversion errors due to + unrecognized input characters. */ +#define GCONV_TRIPLE_SEPARATOR "/" +#define GCONV_SUFFIX_SEPARATOR "," +#define GCONV_TRANSLIT_SUFFIX "TRANSLIT" +#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" + + +/* This function copies in-order, characters from the source 's' that are + either alpha-numeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all + alphabetical characters to upper case. It then appends up to two '/' + characters so that the total number of '/'es in the destination is 2. */ +static inline void __attribute__ ((unused, always_inline)) strip (char *wp, const char *s) { int slash_count = 0; diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index fbaf12cee..f721ce30f 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -75,6 +75,15 @@ struct gconv_module }; +/* The specification of the conversion that needs to be performed. */ +struct gconv_spec +{ + char *fromcode; + char *tocode; + bool translit; + bool ignore; +}; + /* Flags for `gconv_open'. */ enum { @@ -136,10 +145,33 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden) }) -/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ -extern int __gconv_open (const char *toset, const char *fromset, - __gconv_t *handle, int flags) - attribute_hidden; +/* Return in *HANDLE, a decriptor for the transformation. The function expects + the specification of the transformation in the structure pointed to by + CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */ +extern int __gconv_open (struct gconv_spec *conv_spec, + __gconv_t *handle, int flags); +libc_hidden_proto (__gconv_open) + +/* This function accepts the charset names of the source and destination of the + conversion and populates *conv_spec with an equivalent conversion + specification that may later be used by __gconv_open. The charset names + might contain options in the form of suffixes that alter the conversion, + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring + and truncating any suffix options in fromcode, and processing and truncating + any suffix options in tocode. Supported suffix options ("TRANSLIT" or + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec + to be set to true. Unrecognized suffix options are silently discarded. If + the function succeeds, it returns conv_spec back to the caller. It returns + NULL upon failure. */ +extern struct gconv_spec * +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + const char *tocode); +libc_hidden_proto (__gconv_create_spec) + +/* This function frees all heap memory allocated by __gconv_create_spec. */ +extern void +__gconv_destroy_spec (struct gconv_spec *conv_spec); +libc_hidden_proto (__gconv_destroy_spec) /* Free resources associated with transformation descriptor CD. */ extern int __gconv_close (__gconv_t cd) diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c index b39626d25..287862095 100644 --- a/iconv/gconv_open.c +++ b/iconv/gconv_open.c @@ -31,7 +31,7 @@ int -__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, +__gconv_open (struct gconv_spec *conv_spec, __gconv_t *handle, int flags) { struct __gconv_step *steps; @@ -40,77 +40,38 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, size_t cnt = 0; int res; int conv_flags = 0; - const char *errhand; - const char *ignore; bool translit = false; + char *tocode, *fromcode; /* Find out whether any error handling method is specified. */ - errhand = strchr (toset, '/'); - if (errhand != NULL) - errhand = strchr (errhand + 1, '/'); - if (__glibc_likely (errhand != NULL)) - { - if (*++errhand == '\0') - errhand = NULL; - else - { - /* Make copy without the error handling description. */ - char *newtoset = (char *) alloca (errhand - toset + 1); - char *tok; - char *ptr = NULL /* Work around a bogus warning */; - - newtoset[errhand - toset] = '\0'; - toset = memcpy (newtoset, toset, errhand - toset); + translit = conv_spec->translit; - /* Find the appropriate transliteration handlers. */ - tok = strdupa (errhand); + if (conv_spec->ignore) + conv_flags |= __GCONV_IGNORE_ERRORS; - tok = __strtok_r (tok, ",", &ptr); - while (tok != NULL) - { - if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) - translit = true; - else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) - /* Set the flag to ignore all errors. */ - conv_flags |= __GCONV_IGNORE_ERRORS; - - tok = __strtok_r (NULL, ",", &ptr); - } - } - } - - /* For the source character set we ignore the error handler specification. - XXX Is this really always the best? */ - ignore = strchr (fromset, '/'); - if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL - && *++ignore != '\0') - { - char *newfromset = (char *) alloca (ignore - fromset + 1); - - newfromset[ignore - fromset] = '\0'; - fromset = memcpy (newfromset, fromset, ignore - fromset); - } + tocode = conv_spec->tocode; + fromcode = conv_spec->fromcode; /* If the string is empty define this to mean the charset of the currently selected locale. */ - if (strcmp (toset, "//") == 0) + if (strcmp (tocode, "//") == 0) { const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); size_t len = strlen (codeset); char *dest; - toset = dest = (char *) alloca (len + 3); + tocode = dest = (char *) alloca (len + 3); memcpy (__mempcpy (dest, codeset, len), "//", 3); } - if (strcmp (fromset, "//") == 0) + if (strcmp (fromcode, "//") == 0) { const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); size_t len = strlen (codeset); char *dest; - fromset = dest = (char *) alloca (len + 3); + fromcode = dest = (char *) alloca (len + 3); memcpy (__mempcpy (dest, codeset, len), "//", 3); } - res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); + res = __gconv_find_transform (tocode, fromcode, &steps, &nsteps, flags); if (res == __GCONV_OK) { /* Allocate room for handle. */ @@ -209,3 +170,4 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, *handle = result; return res; } +libc_hidden_def (__gconv_open) diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index d4797fba1..963b29f24 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -239,11 +239,9 @@ ucs4_internal_loop (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { uint32_t inval; @@ -307,11 +305,9 @@ ucs4_internal_loop_unaligned (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { if (__glibc_unlikely (inptr[0] > 0x80)) { @@ -613,11 +609,9 @@ ucs4le_internal_loop (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { uint32_t inval; @@ -684,11 +678,9 @@ ucs4le_internal_loop_unaligned (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { if (__glibc_unlikely (inptr[3] > 0x80)) { diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c index 687067070..5b30055c0 100644 --- a/iconv/iconv_open.c +++ b/iconv/iconv_open.c @@ -31,49 +31,15 @@ iconv_t iconv_open (const char *tocode, const char *fromcode) { - /* Normalize the name. We remove all characters beside alpha-numeric, - '_', '-', '/', '.', and ':'. */ - size_t tocode_len = strlen (tocode) + 3; - char *tocode_conv; - bool tocode_usealloca = __libc_use_alloca (tocode_len); - if (tocode_usealloca) - tocode_conv = (char *) alloca (tocode_len); - else - { - tocode_conv = (char *) malloc (tocode_len); - if (tocode_conv == NULL) - return (iconv_t) -1; - } - strip (tocode_conv, tocode); - tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' - ? upstr (tocode_conv, tocode) : tocode_conv); + __gconv_t cd; + struct gconv_spec conv_spec; - size_t fromcode_len = strlen (fromcode) + 3; - char *fromcode_conv; - bool fromcode_usealloca = __libc_use_alloca (fromcode_len); - if (fromcode_usealloca) - fromcode_conv = (char *) alloca (fromcode_len); - else - { - fromcode_conv = (char *) malloc (fromcode_len); - if (fromcode_conv == NULL) - { - if (! tocode_usealloca) - free (tocode_conv); - return (iconv_t) -1; - } - } - strip (fromcode_conv, fromcode); - fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' - ? upstr (fromcode_conv, fromcode) : fromcode_conv); + if (__gconv_create_spec (&conv_spec, fromcode, tocode) == NULL) + return (iconv_t) -1; - __gconv_t cd; - int res = __gconv_open (tocode, fromcode, &cd, 0); + int res = __gconv_open (&conv_spec, &cd, 0); - if (! fromcode_usealloca) - free (fromcode_conv); - if (! tocode_usealloca) - free (tocode_conv); + __gconv_destroy_spec (&conv_spec); if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) { diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 9709e4a70..d59979759 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -39,6 +39,7 @@ #include #include "iconv_prog.h" #include "iconvconfig.h" +#include "gconv_charset.h" /* Get libc version number. */ #include "../version.h" @@ -118,8 +119,7 @@ main (int argc, char *argv[]) { int status = EXIT_SUCCESS; int remaining; - iconv_t cd; - const char *orig_to_code; + __gconv_t cd; struct charmap_t *from_charmap = NULL; struct charmap_t *to_charmap = NULL; @@ -139,39 +139,6 @@ main (int argc, char *argv[]) exit (EXIT_SUCCESS); } - /* If we have to ignore errors make sure we use the appropriate name for - the to-character-set. */ - orig_to_code = to_code; - if (omit_invalid) - { - const char *errhand = strchrnul (to_code, '/'); - int nslash = 2; - char *newp; - char *cp; - - if (*errhand == '/') - { - --nslash; - errhand = strchrnul (errhand + 1, '/'); - - if (*errhand == '/') - { - --nslash; - errhand = strchr (errhand, '\0'); - } - } - - newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); - cp = mempcpy (newp, to_code, errhand - to_code); - while (nslash-- > 0) - *cp++ = '/'; - if (cp[-1] != '/') - *cp++ = ','; - memcpy (cp, "IGNORE", sizeof ("IGNORE")); - - to_code = newp; - } - /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f can be file names of charmaps. In this case iconv will have to read those charmaps and use them to do the conversion. But there are @@ -184,10 +151,10 @@ main (int argc, char *argv[]) file. */ from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); - if (strchr (orig_to_code, '/') != NULL) + if (strchr (to_code, '/') != NULL) /* The to-name might be a charmap file name. Try reading the file. */ - to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); + to_charmap = charmap_read (to_code, /*0, 1,*/1, 0, 0, 0); /* At this point we have to handle two cases. The first one is @@ -201,9 +168,25 @@ main (int argc, char *argv[]) argc, remaining, argv, output_file); else { + struct gconv_spec conv_spec; + int res; + + if (__gconv_create_spec (&conv_spec, from_code, to_code) == NULL) + { + error (EXIT_FAILURE, errno, + _("failed to start conversion processing")); + exit (1); + } + + if (omit_invalid) + conv_spec.ignore = true; + /* Let's see whether we have these coded character sets. */ - cd = iconv_open (to_code, from_code); - if (cd == (iconv_t) -1) + res = __gconv_open (&conv_spec, &cd, 0); + + __gconv_destroy_spec (&conv_spec); + + if (res != __GCONV_OK) { if (errno == EINVAL) { @@ -221,7 +204,7 @@ main (int argc, char *argv[]) const char *from_pretty = (from_code[0] ? from_code : nl_langinfo (CODESET)); const char *to_pretty = - (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); + (to_code[0] ? to_code : nl_langinfo (CODESET)); if (from_wrong) { diff --git a/iconv/tst-iconv-opt.c b/iconv/tst-iconv-opt.c new file mode 100644 index 000000000..669d812a6 --- /dev/null +++ b/iconv/tst-iconv-opt.c @@ -0,0 +1,347 @@ +/* Test iconv's TRANSLIT and IGNORE option handling + + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include +#include +#include +#include +#include +#include + + +/* Run one iconv test. Arguments: + to: destination character set and options + from: source character set + input: input string to be converted + exp_in: expected number of bytes consumed + exp_ret: expected return value (error or number of irreversible conversions) + exp_out: expected output string + exp_err: expected value of `errno' after iconv returns. */ +static void +test_iconv (const char *to, const char *from, char *input, size_t exp_in, + size_t exp_ret, const char *exp_out, int exp_err) +{ + iconv_t cd; + char outbuf[500]; + size_t inlen, outlen; + char *inptr, *outptr; + size_t n; + + cd = iconv_open (to, from); + TEST_VERIFY (cd != (iconv_t) -1); + + inlen = strlen (input); + outlen = sizeof (outbuf); + inptr = input; + outptr = outbuf; + + errno = 0; + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + + TEST_COMPARE (n, exp_ret); + TEST_VERIFY (inptr == input + exp_in); + TEST_COMPARE (errno, exp_err); + TEST_COMPARE_BLOB (outbuf, outptr - outbuf, exp_out, strlen (exp_out)); + TEST_VERIFY (iconv_close (cd) == 0); +} + + +/* We test option parsing by converting UTF-8 inputs to ASCII under various + option combinations. The UTF-8 inputs fall into three categories: + - ASCII-only, + - non-ASCII, + - non-ASCII with invalid UTF-8 characters. */ + +/* 1. */ +char ascii[] = "Just some ASCII text"; + +/* 2. Valid UTF-8 input and some corresponding expected outputs with various + options. The two non-ASCII characters below are accented alphabets: + an `a' then an `o'. */ +char utf8[] = "UTF-8 text with \u00E1 couple \u00F3f non-ASCII characters"; +char u2a[] = "UTF-8 text with "; +char u2a_translit[] = "UTF-8 text with a couple of non-ASCII characters"; +char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; + +/* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is + invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ +char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; +char iu2a[] = "Invalid UTF-8 "; +char iu2a_ignore[] = "Invalid UTF-8 text"; +char iu2a_both[] = "Invalid UTF-8 [|text|]"; + +/* 4. Another invalid UTF-8 input and corresponding expected outputs. This time + the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ +char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; +char ju2a[] = "Invalid "; +char ju2a_translit[] = "Invalid [|UTF-8|] "; +char ju2a_ignore[] = "Invalid UTF-8 text"; +char ju2a_both[] = "Invalid [|UTF-8|] text"; + +/* We also test option handling for character set names that have the form + "A/B". In this test, we test conversions "ISO-10646/UTF-8", and either + ISO-8859-1 or ASCII. */ + +/* 5. Accented 'A' and 'a' characters in ISO-8859-1 and UTF-8, and an + equivalent ASCII transliteration. */ +char iso8859_1_a[] = {0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* Accented A's. */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* Accented a's. */ + 0x00}; +char utf8_a[] = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5" + "\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5"; +char ascii_a[] = "AAAAAAaaaaaa"; + +/* 6. An invalid ASCII string where [0] is invalid and [1] is '~'. */ +char iascii [] = {0x80, '~', '\0'}; +char empty[] = ""; +char ia2u_ignore[] = "~"; + +static int +do_test (void) +{ + xsetlocale (LC_ALL, "en_US.UTF-8"); + + + /* 0. iconv_open should gracefully fail for invalid character sets. */ + + TEST_VERIFY (iconv_open ("INVALID", "UTF-8") == (iconv_t) -1); + TEST_VERIFY (iconv_open ("UTF-8", "INVALID") == (iconv_t) -1); + TEST_VERIFY (iconv_open ("INVALID", "INVALID") == (iconv_t) -1); + + + /* 1. ASCII-only UTF-8 input should convert to ASCII with no changes: */ + + test_iconv ("ASCII", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//TRANSLIT", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//TRANSLIT//", "UTF-8", ascii, strlen (ascii), 0, ascii, + 0); + test_iconv ("ASCII//IGNORE", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//IGNORE//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + + + /* 2. Valid UTF-8 input with non-ASCII characters: */ + + /* EILSEQ when converted to ASCII. */ + test_iconv ("ASCII", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, EILSEQ); + + /* Converted without error with TRANSLIT enabled. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, u2a_translit, + 0); + + /* EILSEQ with IGNORE enabled. Non-ASCII chars dropped from output. */ + test_iconv ("ASCII//IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, transliterated without error. We test + four combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + + /* Misspellings of TRANSLIT and IGNORE are ignored, but conversion still + works while respecting any other correctly spelled options. */ + + test_iconv ("ASCII//T", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//TRANSLITERATE", "UTF-8", utf8, strlen (u2a), (size_t) -1, + u2a, EILSEQ); + test_iconv ("ASCII//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//IGNORED", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//TRANSLITERATE//IGNORED", "UTF-8", utf8, strlen (u2a), + (size_t) -1, u2a, EILSEQ); + test_iconv ("ASCII//IGNORED,TRANSLITERATE", "UTF-8", utf8, strlen (u2a), + (size_t) -1, u2a, EILSEQ); + test_iconv ("ASCII//T//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + + test_iconv ("ASCII//TRANSLIT//I", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//I//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//IGNORED,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//TRANSLIT,IGNORED", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + + test_iconv ("ASCII//IGNORE,T", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + test_iconv ("ASCII//T,IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLITERATE//IGNORE", "UTF-8", utf8, strlen (utf8), + (size_t) -1, u2a_ignore, EILSEQ); + test_iconv ("ASCII//IGNORE//TRANSLITERATE", "UTF-8", utf8, strlen (utf8), + (size_t) -1, u2a_ignore, EILSEQ); + + + /* 3. Invalid UTF-8 followed by some valid non-ASCII UTF-8 characters: */ + + /* EILSEQ; output is truncated at the first invalid UTF-8 character. */ + test_iconv ("ASCII", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, iu2a, + EILSEQ); + + /* With TRANSLIT enabled: EILSEQ; output still truncated at the first invalid + UTF-8 character. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, + iu2a, EILSEQ); + + /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and + valid UTF-8 non-ASCII characters. */ + test_iconv ("ASCII//IGNORE", "UTF-8", iutf8, strlen (iutf8), (size_t) -1, + iu2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 + characters and transliterates valid non-ASCII UTF-8 characters. We test + four combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + + + /* 4. Invalid UTF-8 with valid non-ASCII UTF-8 chars appearing first: */ + + /* EILSEQ; output is truncated at the first non-ASCII character. */ + test_iconv ("ASCII", "UTF-8", jutf8, strlen (ju2a), (size_t) -1, ju2a, + EILSEQ); + + /* With TRANSLIT enabled: EILSEQ; output now truncated at the first invalid + UTF-8 character. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", jutf8, strlen (jutf8) - 5, + (size_t) -1, ju2a_translit, EILSEQ); + test_iconv ("ASCII//translit", "UTF-8", jutf8, strlen (jutf8) - 5, + (size_t) -1, ju2a_translit, EILSEQ); + + /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and + valid UTF-8 non-ASCII characters. */ + test_iconv ("ASCII//IGNORE", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, + ju2a_ignore, EILSEQ); + test_iconv ("ASCII//ignore", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, + ju2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 + characters and transliterates valid non-ASCII UTF-8 characters. We test + several combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//translit,ignore", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Trailing whitespace and separators should be ignored. */ + test_iconv ("ASCII//IGNORE,TRANSLIT ", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT/", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT//", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT,,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT /,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + + /* TRANSLIT or IGNORE suffixes in fromcode should be ignored. */ + test_iconv ("ASCII", "UTF-8//TRANSLIT", jutf8, strlen (ju2a), (size_t) -1, + ju2a, EILSEQ); + test_iconv ("ASCII", "UTF-8//IGNORE", jutf8, strlen (ju2a), (size_t) -1, + ju2a, EILSEQ); + test_iconv ("ASCII", "UTF-8//TRANSLIT,IGNORE", jutf8, strlen (ju2a), + (size_t) -1, ju2a, EILSEQ); + + + /* 5. Charset names of the form "A/B/": */ + + /* ISO-8859-1 is converted to UTF-8 without needing transliteration. */ + test_iconv ("ISO-10646/UTF-8", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT/IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + + /* UTF-8 with accented A's is converted to ASCII with transliteration. */ + test_iconv ("ASCII", "ISO-10646/UTF-8", utf8_a, + 0, (size_t) -1, empty, EILSEQ); + test_iconv ("ASCII//IGNORE", "ISO-10646/UTF-8", utf8_a, + strlen (utf8_a), (size_t) -1, empty, EILSEQ); + test_iconv ("ASCII//TRANSLIT", "ISO-10646/UTF-8", utf8_a, + strlen (utf8_a), 12, ascii_a, 0); + + /* Invalid ASCII is converted to UTF-8 only with IGNORE. */ + test_iconv ("ISO-10646/UTF-8", "ASCII", iascii, strlen (empty), (size_t) -1, + empty, EILSEQ); + test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ASCII", iascii, strlen (empty), + (size_t) -1, empty, EILSEQ); + test_iconv ("ISO-10646/UTF-8/IGNORE", "ASCII", iascii, strlen (iascii), + (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + /* Due to bug 19519, iconv was ignoring IGNORE for the following three + inputs: */ + test_iconv ("ISO-10646/UTF-8/TRANSLIT/IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8//TRANSLIT,IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + + return 0; +} + +#include diff --git a/iconv/tst-iconv8.c b/iconv/tst-iconv8.c new file mode 100644 index 000000000..0b92b19f6 --- /dev/null +++ b/iconv/tst-iconv8.c @@ -0,0 +1,50 @@ +/* Test iconv behavior on UCS4 conversions with //IGNORE. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Derived from BZ #26923 */ +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("UTF-8//IGNORE", "ISO-10646/UCS4/"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* + * Convert sequence beginning with an irreversible character into buffer that + * is too small. + */ + char input[12] = "\xe1\x80\xa1" "AAAAAAAAA"; + char *inptr = input; + size_t insize = sizeof (input); + char output[6]; + char *outptr = output; + size_t outsize = sizeof (output); + + TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == -1); + TEST_VERIFY (errno == E2BIG); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh new file mode 100644 index 000000000..d8db7b335 --- /dev/null +++ b/iconv/tst-iconv_prog.sh @@ -0,0 +1,284 @@ +#!/bin/bash +# Test for some known iconv(1) hangs from bug 19519, and miscellaneous +# iconv(1) program error conditions. +# Copyright (C) 2020 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +codir=$1 +test_wrapper_env="$2" +run_program_env="$3" + +# We have to have some directories in the library path. +LIBPATH=$codir:$codir/iconvdata + +# How the start the iconv(1) program. $from is not defined/expanded yet. +ICONV=' +$codir/elf/ld.so --library-path $LIBPATH --inhibit-rpath ${from}.so +$codir/iconv/iconv_prog +' +ICONV="$test_wrapper_env $run_program_env $ICONV" + +# List of known hangs; +# Gathered by running an exhaustive 2 byte input search against glibc-2.28 +hangarray=( +"\x00\x23;-c;ANSI_X3.110;UTF-8//TRANSLIT//IGNORE" +"\x00\xa1;-c;ARMSCII-8;UTF-8//TRANSLIT//IGNORE" +"\x00\xa1;-c;ASMO_449;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;BIG5;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BIG5HKSCS;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BRF;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BS_4730;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1250;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;CP1251;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1252;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1253;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1254;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1255;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1257;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1258;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CP932;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CSA_Z243.4-1985-1;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CSA_Z243.4-1985-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DEC-MCS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DIN_66003;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DS_2089;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-AT-DE;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-AT-DE-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-CA-FR;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-DK-NO;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-DK-NO-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES-S;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FI-SE;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FI-SE-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FR;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-IS-FRISS;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-IT;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-PT;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-UK;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-US;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ES;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ES2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-CN;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JISX0213;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JP;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JP-MS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-KR;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-TW;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GB18030;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GB_1988-80;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GBK;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GOST_19768-74;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK7;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK7-OLD;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK-CCITT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-GREEK8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-ROMAN8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-ROMAN9;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-THAI8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-TURKISH8;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM038;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM1004;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;IBM1008;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM1046;UTF-8//TRANSLIT//IGNORE" +"\x00\x51;-c;IBM1132;UTF-8//TRANSLIT//IGNORE" +"\x00\xa0;-c;IBM1133;UTF-8//TRANSLIT//IGNORE" +"\x00\xce;-c;IBM1137;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" +"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" +"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" +"\x00\x0f;-c;IBM1364;UTF-8" +"\x0e\x0e;-c;IBM1364;UTF-8" +"\x00\x0f;-c;IBM1371;UTF-8" +"\x0e\x0e;-c;IBM1371;UTF-8" +"\x00\x0f;-c;IBM1388;UTF-8" +"\x0e\x0e;-c;IBM1388;UTF-8" +"\x00\x0f;-c;IBM1390;UTF-8" +"\x0e\x0e;-c;IBM1390;UTF-8" +"\x00\x0f;-c;IBM1399;UTF-8" +"\x0e\x0e;-c;IBM1399;UTF-8" +"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM281;UTF-8//TRANSLIT//IGNORE" +"\x00\x57;-c;IBM290;UTF-8//TRANSLIT//IGNORE" +"\x00\x45;-c;IBM420;UTF-8//TRANSLIT//IGNORE" +"\x00\x68;-c;IBM423;UTF-8//TRANSLIT//IGNORE" +"\x00\x70;-c;IBM424;UTF-8//TRANSLIT//IGNORE" +"\x00\x53;-c;IBM4517;UTF-8//TRANSLIT//IGNORE" +"\x00\x53;-c;IBM4899;UTF-8//TRANSLIT//IGNORE" +"\x00\xa5;-c;IBM4909;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;IBM4971;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM803;UTF-8//TRANSLIT//IGNORE" +"\x00\x91;-c;IBM851;UTF-8//TRANSLIT//IGNORE" +"\x00\x9b;-c;IBM856;UTF-8//TRANSLIT//IGNORE" +"\x00\xd5;-c;IBM857;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM864;UTF-8//TRANSLIT//IGNORE" +"\x00\x94;-c;IBM868;UTF-8//TRANSLIT//IGNORE" +"\x00\x94;-c;IBM869;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM874;UTF-8//TRANSLIT//IGNORE" +"\x00\x6a;-c;IBM875;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM880;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM891;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM903;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM904;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM905;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM9066;UTF-8//TRANSLIT//IGNORE" +"\x00\x48;-c;IBM918;UTF-8//TRANSLIT//IGNORE" +"\x00\x57;-c;IBM930;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM932;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM933;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM935;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM937;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM939;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM943;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS-8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS-CYRILLIC;UTF-8//TRANSLIT//IGNORE" +"\x00\xec;-c;ISIRI-3342;UTF-8//TRANSLIT//IGNORE" +"\x00\xec;-c;ISO_10367-BOX;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-CN;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-CN-EXT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP-3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-KR;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_2033;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5427;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5427-EXT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5428;UTF-8//TRANSLIT//IGNORE" +"\x00\xa4;-c;ISO_6937;UTF-8//TRANSLIT//IGNORE" +"\x00\xa0;-c;ISO_6937-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-11;UTF-8//TRANSLIT//IGNORE" +"\x00\xa5;-c;ISO-8859-3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-6;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-7;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-8;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;ISO-IR-197;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;ISO-IR-209;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IT;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JIS_C6220-1969-RO;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JIS_C6229-1984-B;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JOHAB;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JUS_I.B1.002;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;KOI-8;UTF-8//TRANSLIT//IGNORE" +"\x00\x88;-c;KOI8-T;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;KSC5636;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;LATIN-GREEK;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;LATIN-GREEK-1;UTF-8//TRANSLIT//IGNORE" +"\x00\xf6;-c;MAC-IS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;MSZ_7795.3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NATS-DANO;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NATS-SEFI;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NC_NC00-10;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NF_Z_62-010;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NF_Z_62-010_1973;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NS_4551-1;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NS_4551-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;PT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;PT2;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;RK1048;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;SEN_850200_B;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;SEN_850200_C;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;Shift_JISX0213;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;SJIS;UTF-8//TRANSLIT//IGNORE" +"\x00\x23;-c;T.61-8BIT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;TIS-620;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;TSCII;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;UHC;UTF-8//TRANSLIT//IGNORE" +"\x00\xd8;-c;UNICODE;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;UTF-16;UTF-8//TRANSLIT//IGNORE" +"\xdc\x00;-c;UTF-16BE;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;UTF-16LE;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;UTF-7;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" +) + +# List of option combinations that *should* lead to an error +errorarray=( +# Converting from/to invalid character sets should cause error +"\x00\x00;;INVALID;INVALID" +"\x00\x00;;INVALID;UTF-8" +"\x00\x00;;UTF-8;INVALID" +) + +# Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret +execute_test () +{ + eval PROG=\"$ICONV\" + echo -en "$twobyte" \ + | timeout -k 4 3 $PROG $c -f $from -t "$to" &>/dev/null + ret=$? +} + +check_hangtest_result () +{ + if [ "$ret" -eq "124" ] || [ "$ret" -eq "137" ]; then # timeout/hang + result="HANG" + else + if [ "$ret" -eq "139" ]; then # segfault + result="SEGFAULT" + else + if [ "$ret" -gt "127" ]; then # unexpected error + result="UNEXPECTED" + else + result="OK" + fi + fi + fi + + echo -n "$result: from: \"$from\", to: \"$to\"," + echo " input \"$twobyte\", flags \"$c\"" + + if [ "$result" != "OK" ]; then + exit 1 + fi +} + +for hangcommand in "${hangarray[@]}"; do + twobyte="$(echo "$hangcommand" | cut -d";" -f 1)" + c="$(echo "$hangcommand" | cut -d";" -f 2)" + from="$(echo "$hangcommand" | cut -d";" -f 3)" + to="$(echo "$hangcommand" | cut -d";" -f 4)" + execute_test + check_hangtest_result +done + +check_errtest_result () +{ + if [ "$ret" -eq "1" ]; then # we errored out as expected + result="PASS" + else + result="FAIL" + fi + echo -n "$result: from: \"$from\", to: \"$to\"," + echo " input \"$twobyte\", flags \"$c\", return code $ret" + + if [ "$result" != "PASS" ]; then + exit 1 + fi +} + +for errorcommand in "${errorarray[@]}"; do + twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" + c="$(echo "$errorcommand" | cut -d";" -f 2)" + from="$(echo "$errorcommand" | cut -d";" -f 3)" + to="$(echo "$errorcommand" | cut -d";" -f 4)" + execute_test + check_errtest_result +done diff --git a/iconvdata/Makefile b/iconvdata/Makefile index c83962f35..8fbb67a52 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -1,4 +1,5 @@ # Copyright (C) 1997-2020 Free Software Foundation, Inc. +# Copyright (C) The GNU Toolchain Authors. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ - bug-iconv10 bug-iconv11 bug-iconv12 + bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 \ + bug-iconv15 ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -316,6 +318,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) \ diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c new file mode 100644 index 000000000..87aaff398 --- /dev/null +++ b/iconvdata/bug-iconv13.c @@ -0,0 +1,53 @@ +/* bug 24973: Test EUC-KR module + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined + areas, which are not allowed and should be skipped over due to + //IGNORE. The trailing 0xfe also is an incomplete sequence, which + should be checked first. */ + char input[4] = { '\xc9', '\xa1', '\0', '\xfe' }; + char *inptr = input; + size_t insize = sizeof (input); + char output[4]; + char *outptr = output; + size_t outsize = sizeof (output); + + /* This used to crash due to buffer overrun. */ + TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1); + TEST_VERIFY (errno == EINVAL); + /* The conversion should produce one character, the converted null + character. */ + TEST_VERIFY (sizeof (output) - outsize == 1); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c new file mode 100644 index 000000000..902f140fa --- /dev/null +++ b/iconvdata/bug-iconv14.c @@ -0,0 +1,127 @@ +/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256). + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* Use an escape sequence to return to the initial state. */ +static void +with_escape_sequence (void) +{ + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (c != (iconv_t) -1); + + char in[] = "\e$(O+D\e(B"; + char *inbuf = in; + size_t inleft = strlen (in); + char out[3]; /* Space for one output character. */ + char *outbuf; + size_t outleft; + + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); + TEST_COMPARE (errno, E2BIG); + TEST_COMPARE (inleft, 3); + TEST_COMPARE (inbuf - in, strlen (in) - 3); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xc3); + TEST_COMPARE (out[1] & 0xff, 0xa6); + + /* Return to the initial shift state, producing the pending + character. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0); + TEST_COMPARE (inleft, 0); + TEST_COMPARE (inbuf - in, strlen (in)); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + /* Nothing should be flushed the second time. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out)); + TEST_COMPARE (outbuf - out, 0); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + TEST_COMPARE (iconv_close (c), 0); +} + +/* Use an explicit flush to return to the initial state. */ +static void +with_flush (void) +{ + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (c != (iconv_t) -1); + + char in[] = "\e$(O+D"; + char *inbuf = in; + size_t inleft = strlen (in); + char out[3]; /* Space for one output character. */ + char *outbuf; + size_t outleft; + + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); + TEST_COMPARE (errno, E2BIG); + TEST_COMPARE (inleft, 0); + TEST_COMPARE (inbuf - in, strlen (in)); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xc3); + TEST_COMPARE (out[1] & 0xff, 0xa6); + + /* Flush the pending character. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + /* Nothing should be flushed the second time. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out)); + TEST_COMPARE (outbuf - out, 0); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + TEST_COMPARE (iconv_close (c), 0); +} + +static int +do_test (void) +{ + with_escape_sequence (); + with_flush (); + return 0; +} + +#include diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c new file mode 100644 index 000000000..cc04bd031 --- /dev/null +++ b/iconvdata/bug-iconv15.c @@ -0,0 +1,60 @@ +/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv + may emit spurious NUL character on state reset. + Copyright (C) The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + char in[] = "\x1b(I"; + char *inbuf = in; + size_t inleft = sizeof (in) - 1; + char out[1]; + char *outbuf = out; + size_t outleft = sizeof (out); + iconv_t cd; + + cd = iconv_open ("UTF8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* First call to iconv should alter internal state. + Now, JISX0201_Kana_set is selected and + state value != ASCII_set. */ + TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1); + + /* No bytes should have been added to + the output buffer at this point. */ + TEST_VERIFY (outbuf == out); + TEST_VERIFY (outleft == sizeof (out)); + + /* Second call shall emit spurious NUL character in unpatched glibc. */ + TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1); + + /* No characters are expected to be produced. */ + TEST_VERIFY (outbuf == out); + TEST_VERIFY (outleft == sizeof (out)); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c index b0d56cf3e..1045bae92 100644 --- a/iconvdata/euc-kr.c +++ b/iconvdata/euc-kr.c @@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) \ if (ch <= 0x9f) \ ++inptr; \ - /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \ - user-defined areas. */ \ - else if (__builtin_expect (ch == 0xa0, 0) \ - || __builtin_expect (ch > 0xfe, 0) \ - || __builtin_expect (ch == 0xc9, 0)) \ + else if (__glibc_unlikely (ch == 0xa0)) \ { \ /* This is illegal. */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c index 49e7267ab..521f0825b 100644 --- a/iconvdata/ibm1364.c +++ b/iconvdata/ibm1364.c @@ -158,24 +158,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c index 8c3b7e627..c7b470db6 100644 --- a/iconvdata/iso-2022-jp-3.c +++ b/iconvdata/iso-2022-jp-3.c @@ -1,5 +1,6 @@ /* Conversion module for ISO-2022-JP-3. Copyright (C) 1998-2020 Free Software Foundation, Inc. + Copyright (C) The GNU Toolchain Authors. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998, and Bruno Haible , 2002. @@ -67,10 +68,15 @@ enum CURRENT_SEL_MASK = 7 << 3 }; -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state - also contains the last two bytes to be output, shifted by 6 bits, and a - one-bit indicator whether they must be preceded by the shift sequence, - in bit 22. */ +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the + state also contains the last two bytes to be output, shifted by 6 + bits, and a one-bit indicator whether they must be preceded by the + shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4 + conversion, COUNT may also contain a non-zero pending wide + character, shifted by six bits. This happens for certain inputs in + JISX0213_1_2004_set and JISX0213_2_set if the second wide character + in a combining sequence cannot be written because the buffer is + full. */ /* Since this is a stateful encoding we have to provide code which resets the output state to the initial state. This has to be done during the @@ -80,10 +86,27 @@ enum { \ if (FROM_DIRECTION) \ { \ - /* It's easy, we don't have to emit anything, we just reset the \ - state for the input. */ \ - data->__statep->__count &= 7; \ - data->__statep->__count |= ASCII_set; \ + uint32_t ch = data->__statep->__count >> 6; \ + \ + if (__glibc_unlikely (ch != 0)) \ + { \ + if (__glibc_likely (outbuf + 4 <= outend)) \ + { \ + /* Write out the last character. */ \ + put32u (outbuf, ch); \ + outbuf += 4; \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ + else \ + /* We don't have enough room in the output buffer. */ \ + status = __GCONV_FULL_OUTPUT; \ + } \ + else \ + { \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ } \ else \ { \ @@ -151,7 +174,21 @@ enum #define LOOPFCT FROM_LOOP #define BODY \ { \ - uint32_t ch = *inptr; \ + uint32_t ch; \ + \ + /* Output any pending character. */ \ + ch = set >> 6; \ + if (__glibc_unlikely (ch != 0)) \ + { \ + put32 (outptr, ch); \ + outptr += 4; \ + /* Remove the pending character, but preserve state bits. */ \ + set &= (1 << 6) - 1; \ + continue; \ + } \ + \ + /* Otherwise read the next input byte. */ \ + ch = *inptr; \ \ /* Recognize escape sequences. */ \ if (__glibc_unlikely (ch == ESC)) \ @@ -297,21 +334,25 @@ enum uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ \ + inptr += 2; \ + \ + put32 (outptr, u1); \ + outptr += 4; \ + \ /* See whether we have room for two characters. */ \ - if (outptr + 8 <= outend) \ + if (outptr + 4 <= outend) \ { \ - inptr += 2; \ - put32 (outptr, u1); \ - outptr += 4; \ put32 (outptr, u2); \ outptr += 4; \ continue; \ } \ - else \ - { \ - result = __GCONV_FULL_OUTPUT; \ - break; \ - } \ + \ + /* Otherwise store only the first character now, and \ + put the second one into the queue. */ \ + set |= u2 << 6; \ + /* Tell the caller why we terminate the loop. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ } \ \ inptr += 2; \ diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h index d3eb3a4ff..f5cdc7279 100644 --- a/iconvdata/ksc5601.h +++ b/iconvdata/ksc5601.h @@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) unsigned char ch2; int idx; + if (avail < 2) + return 0; + /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */ if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e || (ch - offset) == 0x49) return __UNKNOWN_10646_CHAR; - if (avail < 2) - return 0; - ch2 = (*s)[1]; if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) return __UNKNOWN_10646_CHAR; diff --git a/include/libc-symbols.h b/include/libc-symbols.h index 685e20fdc..68fc79805 100644 --- a/include/libc-symbols.h +++ b/include/libc-symbols.h @@ -59,6 +59,19 @@ # define IN_MODULE (-1) #endif +/* Use symbol_version_reference to specify the version a symbol + reference should link to. Use symbol_version or + default_symbol_version for the definition of a versioned symbol. + The difference is that the latter is a no-op in non-shared + builds. */ +#ifdef __ASSEMBLER__ +# define symbol_version_reference(real, name, version) \ + .symver real, name##@##version +#else /* !__ASSEMBLER__ */ +# define symbol_version_reference(real, name, version) \ + __asm__ (".symver " #real "," #name "@" #version) +#endif + #ifndef _ISOMAC /* This is defined for the compilation of all C library code. features.h @@ -396,19 +409,6 @@ for linking") past the last element in SET. */ #define symbol_set_end_p(set, ptr) ((ptr) >= (void *const *) &__stop_##set) -/* Use symbol_version_reference to specify the version a symbol - reference should link to. Use symbol_version or - default_symbol_version for the definition of a versioned symbol. - The difference is that the latter is a no-op in non-shared - builds. */ -#ifdef __ASSEMBLER__ -# define symbol_version_reference(real, name, version) \ - .symver real, name##@##version -#else /* !__ASSEMBLER__ */ -# define symbol_version_reference(real, name, version) \ - __asm__ (".symver " #real "," #name "@" #version) -#endif - #ifdef SHARED # define symbol_version(real, name, version) \ symbol_version_reference(real, name, version) diff --git a/include/sys/prctl.h b/include/sys/prctl.h index 0920ed642..d33f3a290 100644 --- a/include/sys/prctl.h +++ b/include/sys/prctl.h @@ -4,6 +4,7 @@ # ifndef _ISOMAC extern int __prctl (int __option, ...); +libc_hidden_proto (__prctl) # endif /* !_ISOMAC */ #endif diff --git a/include/sys/un.h b/include/sys/un.h index bdbee9998..152afd9fc 100644 --- a/include/sys/un.h +++ b/include/sys/un.h @@ -1 +1,13 @@ #include + +#ifndef _ISOMAC + +/* Set ADDR->sun_family to AF_UNIX and ADDR->sun_path to PATHNAME. + Return 0 on success or -1 on failure (due to overlong PATHNAME). + The caller should always use sizeof (struct sockaddr_un) as the + socket address length, disregaring the length of PATHNAME. + Only concrete (non-abstract) pathnames are supported. */ +int __sockaddr_un_set (struct sockaddr_un *addr, const char *pathname) + attribute_hidden; + +#endif /* _ISOMAC */ diff --git a/intl/dcigettext.c b/intl/dcigettext.c index 465c8df34..bd332e71d 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -1119,11 +1119,19 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, outcharset = encoding; # ifdef _LIBC + + struct gconv_spec conv_spec; + + __gconv_create_spec (&conv_spec, charset, outcharset); + /* We always want to use transliteration. */ - outcharset = norm_add_slashes (outcharset, "TRANSLIT"); - charset = norm_add_slashes (charset, ""); - int r = __gconv_open (outcharset, charset, &convd->conv, - GCONV_AVOID_NOCONV); + conv_spec.translit = true; + + int r = __gconv_open (&conv_spec, &convd->conv, + GCONV_AVOID_NOCONV); + + __gconv_destroy_spec (&conv_spec); + if (__builtin_expect (r != __GCONV_OK, 0)) { /* If the output encoding is the same there is diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c index fd70432ec..e9f6e5e09 100644 --- a/intl/tst-codeset.c +++ b/intl/tst-codeset.c @@ -22,13 +22,11 @@ #include #include #include +#include static int do_test (void) { - char *s; - int result = 0; - unsetenv ("LANGUAGE"); unsetenv ("OUTPUT_CHARSET"); setlocale (LC_ALL, "de_DE.ISO-8859-1"); @@ -36,25 +34,21 @@ do_test (void) bindtextdomain ("codeset", OBJPFX "domaindir"); /* Here we expect output in ISO-8859-1. */ - s = gettext ("cheese"); - if (strcmp (s, "K\344se")) - { - printf ("call 1 returned: %s\n", s); - result = 1; - } + TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + /* Here we expect output in UTF-8. */ bind_textdomain_codeset ("codeset", "UTF-8"); + TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); - /* Here we expect output in UTF-8. */ - s = gettext ("cheese"); - if (strcmp (s, "K\303\244se")) - { - printf ("call 2 returned: %s\n", s); - result = 1; - } - - return result; + /* `a with umlaut' is transliterated to `ae'. */ + bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + /* Transliteration also works by default even if not set. */ + bind_textdomain_codeset ("codeset", "ASCII"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + return 0; } -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include diff --git a/localedata/locales/oc_FR b/localedata/locales/oc_FR index d0d89a149..ba9377ed4 100644 --- a/localedata/locales/oc_FR +++ b/localedata/locales/oc_FR @@ -92,7 +92,7 @@ day "dimenge";/ "diluns";/ "dimars";/ "dimcres";/ - "dijus";/ + "dijus";/ "divendres";/ "dissabte" abmon "gen.";/ @@ -110,7 +110,7 @@ abmon "gen.";/ alt_mon "genir";/ "febrir";/ "mar";/ - "abrial";/ + "abril";/ "mai";/ "junh";/ "julhet";/ @@ -122,7 +122,7 @@ alt_mon "genir";/ mon "de genir";/ "de febrir";/ "de mar";/ - "dabrial";/ + "dabril";/ "de mai";/ "de junh";/ "de julhet";/ diff --git a/malloc/Makefile b/malloc/Makefile index 984045b5b..8edeb3d62 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -61,6 +61,16 @@ endif tests += $(tests-static) test-srcs = tst-mtrace +# These tests either are run with MALLOC_CHECK_=3 by default or do not work +# with MALLOC_CHECK_=3 because they expect a specific failure. +tests-exclude-mcheck = tst-mcheck tst-malloc-usable \ + tst-interpose-nothread tst-interpose-static-nothread \ + tst-interpose-static-thread tst-malloc-too-large \ + tst-mxfast tst-safe-linking + +# Run all tests with MALLOC_CHECK_=3 +tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests)) + routines = malloc morecore mcheck mtrace obstack reallocarray \ scratch_buffer_grow scratch_buffer_grow_preserve \ scratch_buffer_set_array_size \ @@ -99,6 +109,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library) $(objpfx)tst-malloc-thread-fail: $(shared-thread-library) $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library) $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library) +$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library) # Export the __malloc_initialize_hook variable to libc.so. LDFLAGS-tst-mallocstate = -rdynamic @@ -238,6 +253,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o $(objpfx)tst-interpose-thread: \ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) +$(objpfx)tst-interpose-thread-mcheck: \ + $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o $(objpfx)tst-interpose-static-thread: \ $(objpfx)tst-interpose-aux-thread.o $(static-thread-library) @@ -255,3 +272,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library) $(objpfx)tst-malloc_info: $(shared-thread-library) $(objpfx)tst-mallocfork2: $(shared-thread-library) +$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library) +$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library) diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c index 0602a9489..fc1fd64b2 100644 --- a/malloc/tst-mallocfork2.c +++ b/malloc/tst-mallocfork2.c @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received; progress. Checked by liveness_signal_handler. */ static volatile sig_atomic_t progress_indicator = 1; +/* Set to 1 if an error occurs in the signal handler. */ +static volatile sig_atomic_t error_indicator = 0; + static void sigusr1_handler (int signo) { @@ -72,7 +75,8 @@ sigusr1_handler (int signo) if (pid == -1) { write_message ("error: fork\n"); - abort (); + error_indicator = 1; + return; } if (pid == 0) _exit (0); @@ -81,12 +85,14 @@ sigusr1_handler (int signo) if (ret < 0) { write_message ("error: waitpid\n"); - abort (); + error_indicator = 1; + return; } if (status != 0) { write_message ("error: unexpected exit status from subprocess\n"); - abort (); + error_indicator = 1; + return; } } @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep) } } +/* Children processes. */ +static pid_t sigusr1_sender_pids[5] = { 0 }; +static pid_t sigusr2_sender_pid = 0; + +static void +kill_children (void) +{ + for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) + if (sigusr1_sender_pids[i] > 0) + kill (sigusr1_sender_pids[i], SIGKILL); + if (sigusr2_sender_pid > 0) + kill (sigusr2_sender_pid, SIGKILL); +} + static int do_test (void) { + atexit (kill_children); + /* shared->barrier is intialized along with sigusr1_sender_pids below. */ shared = support_shared_allocate (sizeof (*shared)); @@ -148,14 +170,13 @@ do_test (void) return 1; } - pid_t sigusr2_sender_pid = xfork (); + sigusr2_sender_pid = xfork (); if (sigusr2_sender_pid == 0) signal_sender (SIGUSR2, true); /* Send SIGUSR1 signals from several processes. Hopefully, one signal will hit one of the ciritical functions. Use a barrier to avoid sending signals while not running fork/free/malloc. */ - pid_t sigusr1_sender_pids[5]; { pthread_barrierattr_t attr; xpthread_barrierattr_init (&attr); @@ -166,7 +187,7 @@ do_test (void) } for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) { - sigusr1_sender_pids[i] = fork (); + sigusr1_sender_pids[i] = xfork (); if (sigusr1_sender_pids[i] == 0) signal_sender (SIGUSR1, false); } @@ -211,7 +232,7 @@ do_test (void) ++malloc_signals; xpthread_barrier_wait (&shared->barrier); - if (objects[slot] == NULL) + if (objects[slot] == NULL || error_indicator != 0) { printf ("error: malloc: %m\n"); for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) @@ -225,10 +246,6 @@ do_test (void) for (int slot = 0; slot < malloc_objects; ++slot) free (objects[slot]); - for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) - kill (sigusr1_sender_pids[i], SIGKILL); - kill (sigusr2_sender_pid, SIGKILL); - printf ("info: signals received during fork: %u\n", fork_signals); printf ("info: signals received during free: %u\n", free_signals); printf ("info: signals received during malloc: %u\n", malloc_signals); diff --git a/math/Makefile b/math/Makefile index 5985b6744..3496af404 100644 --- a/math/Makefile +++ b/math/Makefile @@ -644,6 +644,128 @@ ifneq ($(long-double-fcts),yes) # We won't compile the `long double' code at all. Tell the `double' code # to define aliases for the `FUNCl' names. math-CPPFLAGS += -DNO_LONG_DOUBLE +# GCC 10 diagnoses aliases with types conflicting with built-in +# functions. +CFLAGS-w_acos.c += -fno-builtin-acosl +CFLAGS-w_acosh.c += -fno-builtin-acoshl +CFLAGS-w_asin.c += -fno-builtin-asinl +CFLAGS-s_asinh.c += -fno-builtin-asinhl +CFLAGS-s_atan.c += -fno-builtin-atanl +CFLAGS-w_atan2.c += -fno-builtin-atan2l +CFLAGS-w_atanh.c += -fno-builtin-atanhl +CFLAGS-s_cabs.c += -fno-builtin-cabsl +CFLAGS-s_cacos.c += -fno-builtin-cacosl +CFLAGS-s_cacosh.c += -fno-builtin-cacoshl +CFLAGS-s_canonicalize.c += -fno-builtin-canonicalizel +CFLAGS-s_carg.c += -fno-builtin-cargl +CFLAGS-s_casin.c += -fno-builtin-casinl +CFLAGS-s_casinh.c += -fno-builtin-casinhl +CFLAGS-s_catan.c += -fno-builtin-catanl +CFLAGS-s_catanh.c += -fno-builtin-catanhl +CFLAGS-s_cbrt.c += -fno-builtin-cbrtl +CFLAGS-s_ccos.c += -fno-builtin-ccosl +CFLAGS-s_ccosh.c += -fno-builtin-ccoshl +CFLAGS-s_ceil.c += -fno-builtin-ceill +CFLAGS-s_cexp.c += -fno-builtin-cexpl +CFLAGS-s_cimag.c += -fno-builtin-cimagl +CFLAGS-s_clog.c += -fno-builtin-clogl +CFLAGS-s_clog10.c += -fno-builtin-clog10l +CFLAGS-s_conj.c += -fno-builtin-conjl +CFLAGS-s_copysign.c += -fno-builtin-copysignl +CFLAGS-s_cos.c += -fno-builtin-cosl +CFLAGS-w_cosh.c += -fno-builtin-coshl +CFLAGS-s_cpow.c += -fno-builtin-cpowl +CFLAGS-s_cproj.c += -fno-builtin-cprojl +CFLAGS-s_creal.c += -fno-builtin-creall +CFLAGS-s_csin.c += -fno-builtin-csinl +CFLAGS-s_csinh.c += -fno-builtin-csinhl +CFLAGS-s_csqrt.c += -fno-builtin-csqrtl +CFLAGS-s_ctan.c += -fno-builtin-ctanl +CFLAGS-s_ctanh.c += -fno-builtin-ctanhl +CFLAGS-s_dadd.c += -fno-builtin-daddl +CFLAGS-s_ddiv.c += -fno-builtin-ddivl +CFLAGS-s_dmul.c += -fno-builtin-dmull +CFLAGS-s_dsub.c += -fno-builtin-dsubl +CFLAGS-s_erf.c += -fno-builtin-erfl +CFLAGS-s_erfc.c += -fno-builtin-erfcl +CFLAGS-e_exp.c += -fno-builtin-expl +CFLAGS-w_exp10.c += -fno-builtin-exp10l +CFLAGS-e_exp2.c += -fno-builtin-exp2l +CFLAGS-s_expm1.c += -fno-builtin-expm1l +CFLAGS-s_fabs.c += -fno-builtin-fabsl +CFLAGS-s_fadd.c += -fno-builtin-faddl +CFLAGS-s_fdim.c += -fno-builtin-fdiml +CFLAGS-s_fdiv.c += -fno-builtin-fdivl +CFLAGS-s_finite.c += -fno-builtin-finitel +CFLAGS-s_floor.c += -fno-builtin-floorl +CFLAGS-s_fma.c += -fno-builtin-fmal +CFLAGS-s_fmax.c += -fno-builtin-fmaxl +CFLAGS-s_fmaxmag.c += -fno-builtin-fmaxmagl +CFLAGS-s_fmin.c += -fno-builtin-fminl +CFLAGS-s_fminmag.c += -fno-builtin-fminmagl +CFLAGS-w_fmod.c += -fno-builtin-fmodl +CFLAGS-s_fmul.c += -fno-builtin-fmull +CFLAGS-s_frexp.c += -fno-builtin-frexpl +CFLAGS-s_fromfp.c += -fno-builtin-fromfpl +CFLAGS-s_fromfpx.c += -fno-builtin-fromfpxl +CFLAGS-s_fsub.c += -fno-builtin-fsubl +CFLAGS-s_gamma.c += -fno-builtin-gammal +CFLAGS-s_getpayload.c += -fno-builtin-getpayloadl +CFLAGS-w_hypot.c += -fno-builtin-hypotl +CFLAGS-w_ilogb.c += -fno-builtin-ilogbl +CFLAGS-s_isinf.c += -fno-builtin-isinfl +CFLAGS-s_isnan.c += -fno-builtin-isnanl +CFLAGS-w_j0.c += -fno-builtin-j0l +CFLAGS-w_j1.c += -fno-builtin-j1l +CFLAGS-w_jn.c += -fno-builtin-jnl +CFLAGS-s_ldexp.c += -fno-builtin-ldexpl +CFLAGS-w_lgamma.c += -fno-builtin-lgammal +CFLAGS-w_lgamma_r.c += -fno-builtin-lgammal_r +CFLAGS-w_llogb.c += -fno-builtin-llogbl +CFLAGS-s_llrint.c += -fno-builtin-llrintl +CFLAGS-s_llround.c += -fno-builtin-llroundl +CFLAGS-e_log.c += -fno-builtin-logl +CFLAGS-w_log10.c += -fno-builtin-log10l +CFLAGS-w_log1p.c += -fno-builtin-log1pl +CFLAGS-e_log2.c += -fno-builtin-log2l +CFLAGS-s_logb.c += -fno-builtin-logbl +CFLAGS-s_lrint.c += -fno-builtin-lrintl +CFLAGS-s_lround.c += -fno-builtin-lroundl +CFLAGS-s_modf.c += -fno-builtin-modfl +CFLAGS-s_nan.c += -fno-builtin-nanl +CFLAGS-s_nearbyint.c += -fno-builtin-nearbyintl +CFLAGS-s_nextafter.c += -fno-builtin-nextafterl +CFLAGS-s_nextdown.c += -fno-builtin-nextdownl +CFLAGS-s_nexttoward.c += -fno-builtin-nexttoward -fno-builtin-nexttowardl +CFLAGS-s_nexttowardf.c += -fno-builtin-nexttowardf +CFLAGS-s_nextup.c += -fno-builtin-nextupl +CFLAGS-e_pow.c += -fno-builtin-powl +CFLAGS-w_remainder.c += -fno-builtin-remainderl -fno-builtin-dreml +CFLAGS-s_remquo.c += -fno-builtin-remquol +CFLAGS-s_rint.c += -fno-builtin-rintl +CFLAGS-s_round.c += -fno-builtin-roundl +CFLAGS-s_roundeven.c += -fno-builtin-roundevenl +CFLAGS-w_scalb.c += -fno-builtin-scalbl +CFLAGS-w_scalbln.c += -fno-builtin-scalblnl +CFLAGS-s_scalbn.c += -fno-builtin-scalbnl +CFLAGS-s_setpayload.c += -fno-builtin-setpayloadl +CFLAGS-s_setpayloadsig.c += -fno-builtin-setpayloadsigl +CFLAGS-s_significand.c += -fno-builtin-significandl +CFLAGS-s_sin.c += -fno-builtin-sinl +CFLAGS-s_sincos.c += -fno-builtin-sincosl +CFLAGS-w_sinh.c += -fno-builtin-sinhl +CFLAGS-w_sqrt.c += -fno-builtin-sqrtl +CFLAGS-s_tan.c += -fno-builtin-tanl +CFLAGS-s_tanh.c += -fno-builtin-tanhl +CFLAGS-w_tgamma.c += -fno-builtin-tgammal +CFLAGS-s_totalorder.c += -fno-builtin-totalorderl +CFLAGS-s_totalordermag.c += -fno-builtin-totalordermagl +CFLAGS-s_trunc.c += -fno-builtin-truncl +CFLAGS-s_ufromfp.c += -fno-builtin-ufromfpl +CFLAGS-s_ufromfpx.c += -fno-builtin-ufromfpxl +CFLAGS-s_y0.c += -fno-builtin-y0l +CFLAGS-s_y1.c += -fno-builtin-y1l +CFLAGS-s_yn.c += -fno-builtin-ynl endif # These files quiet sNaNs in a way that is optimized away without diff --git a/misc/Makefile b/misc/Makefile index e0465980c..e167e199e 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -87,7 +87,7 @@ tests := tst-dirname tst-tsearch tst-fdset tst-mntent tst-hsearch \ tst-preadvwritev tst-preadvwritev64 tst-makedev tst-empty \ tst-preadvwritev2 tst-preadvwritev64v2 tst-warn-wide \ tst-ldbl-warn tst-ldbl-error tst-dbl-efgcvt tst-ldbl-efgcvt \ - tst-mntent-autofs + tst-mntent-autofs tst-syscalls # Tests which need libdl. ifeq (yes,$(build-shared)) diff --git a/misc/tst-syscalls.c b/misc/tst-syscalls.c new file mode 100644 index 000000000..cfcd38232 --- /dev/null +++ b/misc/tst-syscalls.c @@ -0,0 +1,167 @@ +/* Test for syscall interfaces. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This test verifies that the x32 system call handling zero-extends + unsigned 32-bit arguments to the 64-bit argument registers for + system calls (bug 25810). The bug is specific to x32, but the test + should pass on all architectures. */ + +#include +#include +#include +#include +#include +#include + +/* On x32, this can be passed in a single 64-bit integer register. */ +struct Array +{ + size_t length; + void *ptr; +}; + +static int error_count; + +__attribute__ ((noclone, noinline)) +struct Array +allocate (size_t bytes) +{ + if (!bytes) + return __extension__ (struct Array) {0, 0}; + + void *p = mmap (0x0, bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == MAP_FAILED) + return __extension__ (struct Array) {0, 0}; + + return __extension__ (struct Array) {bytes, p}; +} + +__attribute__ ((noclone, noinline)) +void +deallocate (struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + munmap. */ + if (b.length && munmap (b.ptr, b.length)) + { + printf ("munmap error: %m\n"); + error_count++; + } +} + +__attribute__ ((noclone, noinline)) +void * +do_mmap (void *addr, size_t length) +{ + return mmap (addr, length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); +} + +__attribute__ ((noclone, noinline)) +void * +reallocate (struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + do_mmap. */ + if (b.length) + return do_mmap (b.ptr, b.length); + return NULL; +} + +__attribute__ ((noclone, noinline)) +void +protect (struct Array b) +{ + if (b.length) + { + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument + to mprotect. */ + if (mprotect (b.ptr, b.length, + PROT_READ | PROT_WRITE | PROT_EXEC)) + { + printf ("mprotect error: %m\n"); + error_count++; + } + } +} + +__attribute__ ((noclone, noinline)) +ssize_t +do_read (int fd, void *ptr, struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + read. */ + if (b.length) + return read (fd, ptr, b.length); + return 0; +} + +__attribute__ ((noclone, noinline)) +ssize_t +do_write (int fd, void *ptr, struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + write. */ + if (b.length) + return write (fd, ptr, b.length); + return 0; +} + +static int +do_test (void) +{ + struct Array array; + + array = allocate (1); + protect (array); + deallocate (array); + void *p = reallocate (array); + if (p == MAP_FAILED) + { + printf ("mmap error: %m\n"); + error_count++; + } + array.ptr = p; + protect (array); + deallocate (array); + + int fd = xopen ("/dev/null", O_RDWR, 0); + char buf[2]; + array.ptr = buf; + if (do_read (fd, array.ptr, array) == -1) + { + printf ("read error: %m\n"); + error_count++; + } + if (do_write (fd, array.ptr, array) == -1) + { + printf ("write error: %m\n"); + error_count++; + } + xclose (fd); + + return error_count ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/nptl/Makefile b/nptl/Makefile index 584e0ffd9..2df4c9098 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -332,7 +332,7 @@ tests-internal := tst-rwlock19 tst-rwlock20 \ tst-mutexpi8 tst-mutexpi8-static tst-cancel25 xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \ - tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 + tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups # This test can run into task limits because of a linux kernel bug # and then cause the make process to fail too, see bug 24537. diff --git a/nptl/descr.h b/nptl/descr.h index 9dcf480bd..5f1f35e9a 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -94,7 +94,13 @@ struct pthread_unwind_buf struct xid_command { int syscall_no; - long int id[3]; + /* Enforce zero-extension for the pointer argument in + + int setgroups (size_t size, const gid_t *list); + + The kernel XID arguments are unsigned and do not require sign + extension. */ + unsigned long int id[3]; volatile int cntr; volatile int error; /* -1: no call yet, 0: success seen, >0: error seen. */ }; diff --git a/nptl/tst-setgroups.c b/nptl/tst-setgroups.c new file mode 100644 index 000000000..ae3c1b1ba --- /dev/null +++ b/nptl/tst-setgroups.c @@ -0,0 +1,79 @@ +/* Test setgroups as root and in the presence of threads (Bug 26248) + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The purpose of this test is to test the setgroups API as root and in + the presence of threads. Once we create a thread the setgroups + implementation must ensure that all threads are set to the same + group and this operation should not fail. Lastly we test setgroups + with a zero sized group and a bad address and verify we get EPERM. */ + +static void * +start_routine (void *args) +{ + return NULL; +} + +static int +do_test (void) +{ + int size; + /* NB: Stack address can be at 0xfffXXXXX on 32-bit OSes. */ + gid_t list[NGROUPS_MAX]; + int status = EXIT_SUCCESS; + + pthread_t thread = xpthread_create (NULL, start_routine, NULL); + + size = getgroups (sizeof (list) / sizeof (list[0]), list); + if (size < 0) + { + status = EXIT_FAILURE; + error (0, errno, "getgroups failed"); + } + if (setgroups (size, list) < 0) + { + if (errno == EPERM) + status = EXIT_UNSUPPORTED; + else + { + status = EXIT_FAILURE; + error (0, errno, "setgroups failed"); + } + } + + if (status == EXIT_SUCCESS && setgroups (0, list) < 0) + { + status = EXIT_FAILURE; + error (0, errno, "setgroups failed"); + } + + xpthread_join (thread); + + return status; +} + +#include diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index 88c69d1e9..381aa721e 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, : NULL); ndomain = (ndomain ? newbuf + ndomaindiff : NULL); - buffer = newbuf; + *tofreep = buffer = newbuf; } nhost = memcpy (buffer + bufused, @@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) { buflen *= 2; - buffer = xrealloc (buffer, buflen); + *tofreep = buffer = xrealloc (buffer, buflen); } else if (status == NSS_STATUS_RETURN || status == NSS_STATUS_NOTFOUND diff --git a/nscd/selinux.c b/nscd/selinux.c index a4ea8008e..1ebf92482 100644 --- a/nscd/selinux.c +++ b/nscd/selinux.c @@ -33,6 +33,7 @@ #ifdef HAVE_LIBAUDIT # include #endif +#include #include "dbg_log.h" #include "selinux.h" @@ -320,6 +321,12 @@ avc_free_lock (void *lock) } +/* avc_init (along with several other symbols) was marked as deprecated by the + SELinux API starting from version 3.1. We use it here, but should + eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); + /* Initialize the user space access vector cache (AVC) for NSCD along with log/thread/lock callbacks. */ void @@ -335,7 +342,14 @@ nscd_avc_init (void) audit_init (); #endif } +DIAG_POP_NEEDS_COMMENT + +/* security_context_t and sidput (along with several other symbols) were marked + as deprecated by the SELinux API starting from version 3.1. We use them + here, but should eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); /* Check the permission from the caller (via getpeercon) to nscd. Returns 0 if access is allowed, 1 if denied, and -1 on error. @@ -422,6 +436,7 @@ out: return rc; } +DIAG_POP_NEEDS_COMMENT /* Wrapper to get AVC statistics. */ diff --git a/nss/makedb.c b/nss/makedb.c index 8e389a168..8e1e8ec9a 100644 --- a/nss/makedb.c +++ b/nss/makedb.c @@ -38,6 +38,7 @@ #include #include #include "nss_db/nss_db.h" +#include /* Get libc version number. */ #include "../version.h" @@ -841,6 +842,13 @@ print_database (int fd) #ifdef HAVE_SELINUX + +/* security_context_t and matchpathcon (along with several other symbols) were + marked as deprecated by the SELinux API starting from version 3.1. We use + them here, but should eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); + static void set_file_creation_context (const char *outname, mode_t mode) { @@ -870,6 +878,7 @@ set_file_creation_context (const char *outname, mode_t mode) freecon (ctx); } } +DIAG_POP_NEEDS_COMMENT static void reset_file_creation_context (void) diff --git a/nss/nss_compat/compat-grp.c b/nss/nss_compat/compat-grp.c index a8de1e03b..f8b19a5cf 100644 --- a/nss/nss_compat/compat-grp.c +++ b/nss/nss_compat/compat-grp.c @@ -142,7 +142,7 @@ _nss_compat_setgrent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endgrent (ent_t *ent) { if (ent->stream != NULL) @@ -163,6 +163,15 @@ internal_endgrent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endgrent, but preserve errno in all cases. */ +static void +internal_endgrent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endgrent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endgrent (void) { @@ -483,7 +492,7 @@ _nss_compat_getgrnam_r (const char *name, struct group *grp, if (result == NSS_STATUS_SUCCESS) result = internal_getgrnam_r (name, grp, &ent, buffer, buflen, errnop); - internal_endgrent (&ent); + internal_endgrent_noerror (&ent); return result; } @@ -612,7 +621,7 @@ _nss_compat_getgrgid_r (gid_t gid, struct group *grp, if (result == NSS_STATUS_SUCCESS) result = internal_getgrgid_r (gid, grp, &ent, buffer, buflen, errnop); - internal_endgrent (&ent); + internal_endgrent_noerror (&ent); return result; } diff --git a/nss/nss_compat/compat-initgroups.c b/nss/nss_compat/compat-initgroups.c index 939b25b33..7591442ff 100644 --- a/nss/nss_compat/compat-initgroups.c +++ b/nss/nss_compat/compat-initgroups.c @@ -133,7 +133,7 @@ internal_setgrent (ent_t *ent) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endgrent (ent_t *ent) { if (ent->stream != NULL) @@ -157,6 +157,15 @@ internal_endgrent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endgrent, but preserve errno in all cases. */ +static void +internal_endgrent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endgrent (ent); + __set_errno (saved_errno); +} + /* Add new group record. */ static void add_group (long int *start, long int *size, gid_t **groupsp, long int limit, @@ -501,7 +510,7 @@ _nss_compat_initgroups_dyn (const char *user, gid_t group, long int *start, done: scratch_buffer_free (&tmpbuf); - internal_endgrent (&intern); + internal_endgrent_noerror (&intern); return status; } diff --git a/nss/nss_compat/compat-pwd.c b/nss/nss_compat/compat-pwd.c index ec3f35c59..bd5e707da 100644 --- a/nss/nss_compat/compat-pwd.c +++ b/nss/nss_compat/compat-pwd.c @@ -259,7 +259,7 @@ _nss_compat_setpwent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endpwent (ent_t *ent) { if (ent->stream != NULL) @@ -287,6 +287,15 @@ internal_endpwent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endpwent, but preserve errno in all cases. */ +static void +internal_endpwent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endpwent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endpwent (void) { @@ -822,7 +831,7 @@ _nss_compat_getpwnam_r (const char *name, struct passwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getpwnam_r (name, pwd, &ent, buffer, buflen, errnop); - internal_endpwent (&ent); + internal_endpwent_noerror (&ent); return result; } @@ -1061,7 +1070,7 @@ _nss_compat_getpwuid_r (uid_t uid, struct passwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getpwuid_r (uid, pwd, &ent, buffer, buflen, errnop); - internal_endpwent (&ent); + internal_endpwent_noerror (&ent); return result; } diff --git a/nss/nss_compat/compat-spwd.c b/nss/nss_compat/compat-spwd.c index f6b7a1ef1..d0e3c51b4 100644 --- a/nss/nss_compat/compat-spwd.c +++ b/nss/nss_compat/compat-spwd.c @@ -215,7 +215,7 @@ _nss_compat_setspent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endspent (ent_t *ent) { if (ent->stream != NULL) @@ -244,6 +244,15 @@ internal_endspent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endspent, but preserve errno in all cases. */ +static void +internal_endspent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endspent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endspent (void) { @@ -261,7 +270,6 @@ _nss_compat_endspent (void) return result; } - static enum nss_status getspent_next_nss_netgr (const char *name, struct spwd *result, ent_t *ent, char *group, char *buffer, size_t buflen, @@ -786,7 +794,7 @@ _nss_compat_getspnam_r (const char *name, struct spwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getspnam_r (name, pwd, &ent, buffer, buflen, errnop); - internal_endspent (&ent); + internal_endspent_noerror (&ent); return result; } diff --git a/posix/glob.c b/posix/glob.c index cba9cd181..4580cefb9 100644 --- a/posix/glob.c +++ b/posix/glob.c @@ -827,31 +827,32 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), { size_t home_len = strlen (p->pw_dir); size_t rest_len = end_name == NULL ? 0 : strlen (end_name); - char *d; + char *d, *newp; + bool use_alloca = glob_use_alloca (alloca_used, + home_len + rest_len + 1); - if (__glibc_unlikely (malloc_dirname)) - free (dirname); - malloc_dirname = 0; - - if (glob_use_alloca (alloca_used, home_len + rest_len + 1)) - dirname = alloca_account (home_len + rest_len + 1, - alloca_used); + if (use_alloca) + newp = alloca_account (home_len + rest_len + 1, alloca_used); else { - dirname = malloc (home_len + rest_len + 1); - if (dirname == NULL) + newp = malloc (home_len + rest_len + 1); + if (newp == NULL) { scratch_buffer_free (&pwtmpbuf); retval = GLOB_NOSPACE; goto out; } - malloc_dirname = 1; } - d = mempcpy (dirname, p->pw_dir, home_len); + d = mempcpy (newp, p->pw_dir, home_len); if (end_name != NULL) d = mempcpy (d, end_name, rest_len); *d = '\0'; + if (__glibc_unlikely (malloc_dirname)) + free (dirname); + dirname = newp; + malloc_dirname = !use_alloca; + dirlen = home_len + rest_len; dirname_modified = 1; } diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c index ed1b22308..cb3f989cb 100644 --- a/posix/wordexp-test.c +++ b/posix/wordexp-test.c @@ -183,6 +183,7 @@ struct test_case_struct { 0, NULL, "$var", 0, 0, { NULL, }, IFS }, { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS }, { 0, NULL, "", 0, 0, { NULL, }, IFS }, + { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS }, /* Flags not already covered (testit() has special handling for these) */ { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS }, diff --git a/posix/wordexp.c b/posix/wordexp.c index e082d9489..56289503a 100644 --- a/posix/wordexp.c +++ b/posix/wordexp.c @@ -1399,7 +1399,7 @@ envsubst: /* Is it a numeric parameter? */ else if (isdigit (env[0])) { - int n = atoi (env); + unsigned long n = strtoul (env, NULL, 10); if (n >= __libc_argc) /* Substitute NULL. */ diff --git a/socket/Makefile b/socket/Makefile index 125c042ca..19fc6ab41 100644 --- a/socket/Makefile +++ b/socket/Makefile @@ -29,10 +29,14 @@ headers := sys/socket.h sys/un.h bits/sockaddr.h bits/socket.h \ routines := accept bind connect getpeername getsockname getsockopt \ listen recv recvfrom recvmsg send sendmsg sendto \ setsockopt shutdown socket socketpair isfdtype opensock \ - sockatmark accept4 recvmmsg sendmmsg + sockatmark accept4 recvmmsg sendmmsg sockaddr_un_set tests := tst-accept4 +tests-internal := \ + tst-sockaddr_un_set \ + # tests-internal + aux := sa_len include ../Rules diff --git a/socket/sockaddr_un_set.c b/socket/sockaddr_un_set.c new file mode 100644 index 000000000..0bd40dc34 --- /dev/null +++ b/socket/sockaddr_un_set.c @@ -0,0 +1,41 @@ +/* Set the sun_path member of struct sockaddr_un. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +int +__sockaddr_un_set (struct sockaddr_un *addr, const char *pathname) +{ + size_t name_length = strlen (pathname); + + /* The kernel supports names of exactly sizeof (addr->sun_path) + bytes, without a null terminator, but userspace does not; see the + SUN_LEN macro. */ + if (name_length >= sizeof (addr->sun_path)) + { + __set_errno (EINVAL); /* Error code used by the kernel. */ + return -1; + } + + addr->sun_family = AF_UNIX; + memcpy (addr->sun_path, pathname, name_length + 1); + return 0; +} diff --git a/socket/tst-sockaddr_un_set.c b/socket/tst-sockaddr_un_set.c new file mode 100644 index 000000000..29c2a81af --- /dev/null +++ b/socket/tst-sockaddr_un_set.c @@ -0,0 +1,62 @@ +/* Test the __sockaddr_un_set function. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Re-compile the function because the version in libc is not + exported. */ +#include "sockaddr_un_set.c" + +#include + +static int +do_test (void) +{ + struct sockaddr_un sun; + + memset (&sun, 0xcc, sizeof (sun)); + __sockaddr_un_set (&sun, ""); + TEST_COMPARE (sun.sun_family, AF_UNIX); + TEST_COMPARE (__sockaddr_un_set (&sun, ""), 0); + + memset (&sun, 0xcc, sizeof (sun)); + TEST_COMPARE (__sockaddr_un_set (&sun, "/example"), 0); + TEST_COMPARE_STRING (sun.sun_path, "/example"); + + { + char pathname[108]; /* Length of sun_path (ABI constant). */ + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + memset (&sun, 0xcc, sizeof (sun)); + TEST_COMPARE (__sockaddr_un_set (&sun, pathname), 0); + TEST_COMPARE (sun.sun_family, AF_UNIX); + TEST_COMPARE_STRING (sun.sun_path, pathname); + } + + { + char pathname[109]; + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + memset (&sun, 0xcc, sizeof (sun)); + errno = 0; + TEST_COMPARE (__sockaddr_un_set (&sun, pathname), -1); + TEST_COMPARE (errno, EINVAL); + } + + return 0; +} + +#include diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 95af0c12d..5e92d6b9a 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -66,6 +66,10 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ tst-scanf-round \ tst-renameat2 tst-bz11319 tst-bz11319-fortify2 \ scanf14a scanf16a \ + tst-printf-bz25691 \ + tst-vfprintf-width-prec-alloc \ + tst-grouping2 \ + # tests test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble @@ -75,10 +79,12 @@ tests-special += $(objpfx)tst-unbputc.out $(objpfx)tst-printf.out \ $(objpfx)tst-printf-bz18872-mem.out \ $(objpfx)tst-setvbuf1-cmp.out \ $(objpfx)tst-vfprintf-width-prec-mem.out \ - $(objpfx)tst-printfsz-islongdouble.out + $(objpfx)tst-printfsz-islongdouble.out \ + $(objpfx)tst-printf-bz25691-mem.out generated += tst-printf-bz18872.c tst-printf-bz18872.mtrace \ tst-printf-bz18872-mem.out \ - tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out + tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out \ + tst-printf-bz25691.mtrace tst-printf-bz25691-mem.out endif include ../Rules @@ -91,6 +97,7 @@ $(objpfx)bug14.out: $(gen-locales) $(objpfx)scanf13.out: $(gen-locales) $(objpfx)test-vfprintf.out: $(gen-locales) $(objpfx)tst-grouping.out: $(gen-locales) +$(objpfx)tst-grouping2.out: $(gen-locales) $(objpfx)tst-sprintf.out: $(gen-locales) $(objpfx)tst-sscanf.out: $(gen-locales) $(objpfx)tst-swprintf.out: $(gen-locales) @@ -100,6 +107,8 @@ endif tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace tst-vfprintf-width-prec-ENV = \ MALLOC_TRACE=$(objpfx)tst-vfprintf-width-prec.mtrace +tst-printf-bz25691-ENV = \ + MALLOC_TRACE=$(objpfx)tst-printf-bz25691.mtrace $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc $(SHELL) $< $(common-objpfx) '$(test-program-prefix)' > $@; \ diff --git a/stdio-common/bug22.c b/stdio-common/bug22.c index b3d48eb8e..029b54994 100644 --- a/stdio-common/bug22.c +++ b/stdio-common/bug22.c @@ -57,7 +57,7 @@ do_test (void) ret = fprintf (fp, "%." SN3 "d", 1); printf ("ret = %d\n", ret); - if (ret != -1 || errno != EOVERFLOW) + if (ret != N3) return 1; /* GCC 9 warns about output of more than INT_MAX characters; this is diff --git a/stdio-common/tst-grouping2.c b/stdio-common/tst-grouping2.c new file mode 100644 index 000000000..3024c942a --- /dev/null +++ b/stdio-common/tst-grouping2.c @@ -0,0 +1,39 @@ +/* Test printf with grouping and large width (bug 29530) + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + const int field_width = 1000; + char buf[field_width + 1]; + + xsetlocale (LC_NUMERIC, "de_DE.UTF-8"); + + /* This used to crash in group_number. */ + TEST_COMPARE (sprintf (buf, "%'*d", field_width, 1000), field_width); + TEST_COMPARE_STRING (buf + field_width - 6, " 1.000"); + + return 0; +} + +#include diff --git a/stdio-common/tst-printf-bz25691.c b/stdio-common/tst-printf-bz25691.c new file mode 100644 index 000000000..37b30a3a8 --- /dev/null +++ b/stdio-common/tst-printf-bz25691.c @@ -0,0 +1,108 @@ +/* Test for memory leak with large width (BZ#25691). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +do_test (void) +{ + mtrace (); + + /* For 's' conversion specifier with 'l' modifier the array must be + converted to multibyte characters up to the precision specific + value. */ + { + /* The input size value is to force a heap allocation on temporary + buffer (in the old implementation). */ + const size_t winputsize = 64 * 1024 + 1; + wchar_t *winput = xmalloc (winputsize * sizeof (wchar_t)); + wmemset (winput, L'a', winputsize - 1); + winput[winputsize - 1] = L'\0'; + + char result[9]; + const char expected[] = "aaaaaaaa"; + int ret; + + ret = snprintf (result, sizeof (result), "%.65537ls", winput); + TEST_COMPARE (ret, winputsize - 1); + TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected)); + + ret = snprintf (result, sizeof (result), "%ls", winput); + TEST_COMPARE (ret, winputsize - 1); + TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected)); + + free (winput); + } + + /* For 's' converstion specifier the array is interpreted as a multibyte + character sequence and converted to wide characters up to the precision + specific value. */ + { + /* The input size value is to force a heap allocation on temporary + buffer (in the old implementation). */ + const size_t mbssize = 32 * 1024; + char *mbs = xmalloc (mbssize); + memset (mbs, 'a', mbssize - 1); + mbs[mbssize - 1] = '\0'; + + const size_t expectedsize = 32 * 1024; + wchar_t *expected = xmalloc (expectedsize * sizeof (wchar_t)); + wmemset (expected, L'a', expectedsize - 1); + expected[expectedsize-1] = L'\0'; + + const size_t resultsize = mbssize * sizeof (wchar_t); + wchar_t *result = xmalloc (resultsize); + int ret; + + ret = swprintf (result, resultsize, L"%.65537s", mbs); + TEST_COMPARE (ret, mbssize - 1); + TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t), + expected, expectedsize * sizeof (wchar_t)); + + ret = swprintf (result, resultsize, L"%1$.65537s", mbs); + TEST_COMPARE (ret, mbssize - 1); + TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t), + expected, expectedsize * sizeof (wchar_t)); + + /* Same test, but with an invalid multibyte sequence. */ + mbs[mbssize - 2] = 0xff; + + ret = swprintf (result, resultsize, L"%.65537s", mbs); + TEST_COMPARE (ret, -1); + + ret = swprintf (result, resultsize, L"%1$.65537s", mbs); + TEST_COMPARE (ret, -1); + + free (mbs); + free (result); + free (expected); + } + + return 0; +} + +#include diff --git a/stdio-common/tst-vfprintf-width-prec-alloc.c b/stdio-common/tst-vfprintf-width-prec-alloc.c new file mode 100644 index 000000000..0a74b53a3 --- /dev/null +++ b/stdio-common/tst-vfprintf-width-prec-alloc.c @@ -0,0 +1,41 @@ +/* Test large width or precision does not involve large allocation. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +char test_string[] = "test"; + +static int +do_test (void) +{ + struct rlimit limit; + TEST_VERIFY_EXIT (getrlimit (RLIMIT_AS, &limit) == 0); + limit.rlim_cur = 200 * 1024 * 1024; + TEST_VERIFY_EXIT (setrlimit (RLIMIT_AS, &limit) == 0); + FILE *fp = fopen ("/dev/null", "w"); + TEST_VERIFY_EXIT (fp != NULL); + TEST_COMPARE (fprintf (fp, "%1000000000d", 1), 1000000000); + TEST_COMPARE (fprintf (fp, "%.1000000000s", test_string), 4); + TEST_COMPARE (fprintf (fp, "%1000000000d %1000000000d", 1, 2), 2000000001); + TEST_COMPARE (fprintf (fp, "%2$.*1$s", 0x7fffffff, test_string), 4); + return 0; +} + +#include diff --git a/stdio-common/vfprintf-internal.c b/stdio-common/vfprintf-internal.c index 3be92d4b6..b1c8f5c43 100644 --- a/stdio-common/vfprintf-internal.c +++ b/stdio-common/vfprintf-internal.c @@ -31,6 +31,7 @@ #include #include #include +#include /* This code is shared between the standard stdio implementation found in GNU C library and the libio implementation originally found in @@ -45,10 +46,6 @@ #include #endif -/* In some cases we need extra space for all the output which is not - counted in the width of the string. We assume 32 characters is - enough. */ -#define EXTSIZ 32 #define ARGCHECK(S, Format) \ do \ { \ @@ -119,22 +116,38 @@ while (0) #endif -#define done_add(val) \ - do { \ - unsigned int _val = val; \ - assert ((unsigned int) done < (unsigned int) INT_MAX); \ - if (__glibc_unlikely (INT_MAX - done < _val)) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += _val; \ - } while (0) +/* Add LENGTH to DONE. Return the new value of DONE, or -1 on + overflow (and set errno accordingly). */ +static inline int +done_add_func (size_t length, int done) +{ + if (done < 0) + return done; + int ret; + if (INT_ADD_WRAPV (done, length, &ret)) + { + __set_errno (EOVERFLOW); + return -1; + } + return ret; +} + +#define done_add(val) \ + do \ + { \ + /* Ensure that VAL has a type similar to int. */ \ + _Static_assert (sizeof (val) == sizeof (int), "value int size"); \ + _Static_assert ((__typeof__ (val)) -1 < 0, "value signed"); \ + done = done_add_func ((val), done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) #ifndef COMPILE_WPRINTF # define vfprintf __vfprintf_internal # define CHAR_T char +# define OTHER_CHAR_T wchar_t # define UCHAR_T unsigned char # define INT_T int typedef const char *THOUSANDS_SEP_T; @@ -143,25 +156,14 @@ typedef const char *THOUSANDS_SEP_T; # define STR_LEN(Str) strlen (Str) # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - ssize_t written = _IO_padn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putc_unlocked (C, F) # define ORIENT if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\ return -1 +# define CONVERT_FROM_OTHER_STRING __wcsrtombs #else # define vfprintf __vfwprintf_internal # define CHAR_T wchar_t +# define OTHER_CHAR_T char /* This is a hack!!! There should be a type uwchar_t. */ # define UCHAR_T unsigned int /* uwchar_t */ # define INT_T wint_t @@ -173,21 +175,9 @@ typedef wchar_t THOUSANDS_SEP_T; # include <_itowa.h> # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - ssize_t written = _IO_wpadn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putwc_unlocked (C, F) # define ORIENT if (_IO_fwide (s, 1) != 1) return -1 +# define CONVERT_FROM_OTHER_STRING __mbsrtowcs # undef _itoa # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case) @@ -196,6 +186,33 @@ typedef wchar_t THOUSANDS_SEP_T; # define EOF WEOF #endif +static inline int +pad_func (FILE *s, CHAR_T padchar, int width, int done) +{ + if (width > 0) + { + ssize_t written; +#ifndef COMPILE_WPRINTF + written = _IO_padn (s, padchar, width); +#else + written = _IO_wpadn (s, padchar, width); +#endif + if (__glibc_unlikely (written != width)) + return -1; + return done_add_func (width, done); + } + return done; +} + +#define PAD(Padchar) \ + do \ + { \ + done = pad_func (s, (Padchar), width, done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) + #include "_i18n_number.h" /* Include the shared code for parsing the format string. */ @@ -215,24 +232,115 @@ typedef wchar_t THOUSANDS_SEP_T; } \ while (0) -#define outstring(String, Len) \ - do \ - { \ - assert ((size_t) done <= (size_t) INT_MAX); \ - if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - if (__glibc_unlikely (INT_MAX - done < (Len))) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += (Len); \ - } \ - while (0) +static inline int +outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done) +{ + assert ((size_t) done <= (size_t) INT_MAX); + if ((size_t) PUT (s, string, length) != (size_t) (length)) + return -1; + return done_add_func (length, done); +} + +#define outstring(String, Len) \ + do \ + { \ + const void *string_ = (String); \ + done = outstring_func (s, string_, (Len), done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) + +/* Write the string SRC to S. If PREC is non-negative, write at most + PREC bytes. If LEFT is true, perform left justification. */ +static int +outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec, + int width, bool left, int done) +{ + /* Use a small buffer to combine processing of multiple characters. + CONVERT_FROM_OTHER_STRING expects the buffer size in (wide) + characters, and buf_length counts that. */ + enum { buf_length = 256 / sizeof (CHAR_T) }; + CHAR_T buf[buf_length]; + _Static_assert (sizeof (buf) > MB_LEN_MAX, + "buffer is large enough for a single multi-byte character"); + + /* Add the initial padding if needed. */ + if (width > 0 && !left) + { + /* Make a first pass to find the output width, so that we can + add the required padding. */ + mbstate_t mbstate = { 0 }; + const OTHER_CHAR_T *src_copy = src; + size_t total_written; + if (prec < 0) + total_written = CONVERT_FROM_OTHER_STRING + (NULL, &src_copy, 0, &mbstate); + else + { + /* The source might not be null-terminated. Enforce the + limit manually, based on the output length. */ + total_written = 0; + size_t limit = prec; + while (limit > 0 && src_copy != NULL) + { + size_t write_limit = buf_length; + if (write_limit > limit) + write_limit = limit; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src_copy, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + total_written += written; + limit -= written; + } + } + + /* Output initial padding. */ + if (total_written < width) + { + done = pad_func (s, L_(' '), width - total_written, done); + if (done < 0) + return done; + } + } + + /* Convert the input string, piece by piece. */ + size_t total_written = 0; + { + mbstate_t mbstate = { 0 }; + /* If prec is negative, remaining is not decremented, otherwise, + it serves as the write limit. */ + size_t remaining = -1; + if (prec >= 0) + remaining = prec; + while (remaining > 0 && src != NULL) + { + size_t write_limit = buf_length; + if (remaining < write_limit) + write_limit = remaining; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + done = outstring_func (s, (const UCHAR_T *) buf, written, done); + if (done < 0) + return done; + total_written += written; + if (prec >= 0) + remaining -= written; + } + } + + /* Add final padding. */ + if (width > 0 && left && total_written < width) + return pad_func (s, L_(' '), width - total_written, done); + return done; +} /* For handling long_double and longlong we use the same flag. If `long' and `long long' are effectively the same type define it to @@ -1022,7 +1130,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -1034,7 +1141,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1051,41 +1157,12 @@ static const uint8_t jump_table[] = } \ else if (!is_long && spec != L_('S')) \ { \ - /* This is complicated. We have to transform the multibyte \ - string into a wide character string. */ \ - const char *mbs = (const char *) string; \ - mbstate_t mbstate; \ - \ - len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \ - \ - /* Allocate dynamically an array which definitely is long \ - enough for the wide character version. Each byte in the \ - multi-byte string can produce at most one wide character. */ \ - if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t))) \ - { \ - __set_errno (EOVERFLOW); \ - done = -1; \ - goto all_done; \ - } \ - else if (__libc_use_alloca (len * sizeof (wchar_t))) \ - string = (CHAR_T *) alloca (len * sizeof (wchar_t)); \ - else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t))) \ - == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - len = __mbsrtowcs (string, &mbs, len, &mbstate); \ - if (len == (size_t) -1) \ - { \ - /* Illegal multibyte character. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const char *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ else \ { \ @@ -1108,8 +1185,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (L' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #else @@ -1158,7 +1233,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -1170,7 +1244,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1196,51 +1269,12 @@ static const uint8_t jump_table[] = } \ else \ { \ - const wchar_t *s2 = (const wchar_t *) string; \ - mbstate_t mbstate; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - \ - if (prec >= 0) \ - { \ - /* The string `s2' might not be NUL terminated. */ \ - if (__libc_use_alloca (prec)) \ - string = (char *) alloca (prec); \ - else if ((string = (char *) malloc (prec)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - len = __wcsrtombs (string, &s2, prec, &mbstate); \ - } \ - else \ - { \ - len = __wcsrtombs (NULL, &s2, 0, &mbstate); \ - if (len != (size_t) -1) \ - { \ - assert (__mbsinit (&mbstate)); \ - s2 = (const wchar_t *) string; \ - if (__libc_use_alloca (len + 1)) \ - string = (char *) alloca (len + 1); \ - else if ((string = (char *) malloc (len + 1)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - (void) __wcsrtombs (string, &s2, len + 1, &mbstate); \ - } \ - } \ - \ - if (len == (size_t) -1) \ - { \ - /* Illegal wide-character string. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const wchar_t *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ \ if ((width -= len) < 0) \ @@ -1254,8 +1288,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #endif @@ -1307,7 +1339,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* Buffer intermediate results. */ CHAR_T work_buffer[WORK_BUFFER_SIZE]; - CHAR_T *workstart = NULL; CHAR_T *workend; /* We have to save the original argument pointer. */ @@ -1416,7 +1447,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) UCHAR_T pad = L_(' ');/* Padding character. */ CHAR_T spec; - workstart = NULL; workend = work_buffer + WORK_BUFFER_SIZE; /* Get current character in format string. */ @@ -1508,31 +1538,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) pad = L_(' '); left = 1; } - - if (__glibc_unlikely (width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) - { - __set_errno (EOVERFLOW); - done = -1; - goto all_done; - } - - if (width >= WORK_BUFFER_SIZE - EXTSIZ) - { - /* We have to use a special buffer. */ - size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T); - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + width + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + width + EXTSIZ; - } - } } JUMP (*f, step1_jumps); @@ -1540,31 +1545,13 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) LABEL (width): width = read_int (&f); - if (__glibc_unlikely (width == -1 - || width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) + if (__glibc_unlikely (width == -1)) { __set_errno (EOVERFLOW); done = -1; goto all_done; } - if (width >= WORK_BUFFER_SIZE - EXTSIZ) - { - /* We have to use a special buffer. */ - size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T); - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + width + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + width + EXTSIZ; - } - } if (*f == L_('$')) /* Oh, oh. The argument comes from a positional parameter. */ goto do_positional; @@ -1613,34 +1600,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) } else prec = 0; - if (prec > width && prec > WORK_BUFFER_SIZE - EXTSIZ) - { - /* Deallocate any previously allocated buffer because it is - too small. */ - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - if (__glibc_unlikely (prec >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) - { - __set_errno (EOVERFLOW); - done = -1; - goto all_done; - } - size_t needed = ((size_t) prec + EXTSIZ) * sizeof (CHAR_T); - - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + prec + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + prec + EXTSIZ; - } - } JUMP (*f, step2_jumps); /* Process 'h' modifier. There might another 'h' following. */ @@ -1704,10 +1663,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* The format is correctly handled. */ ++nspecs_done; - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - /* Look for next format specifier. */ #ifdef COMPILE_WPRINTF f = __find_specwc ((end_of_spec = ++f)); @@ -1725,18 +1680,11 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* Hand off processing for positional parameters. */ do_positional: - if (__glibc_unlikely (workstart != NULL)) - { - free (workstart); - workstart = NULL; - } done = printf_positional (s, format, readonly_format, ap, &ap_save, done, nspecs_done, lead_str_end, work_buffer, save_errno, grouping, thousands_sep, mode_flags); all_done: - if (__glibc_unlikely (workstart != NULL)) - free (workstart); /* Unlock the stream. */ _IO_funlockfile (s); _IO_cleanup_region_end (0); @@ -1780,8 +1728,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, /* Just a counter. */ size_t cnt; - CHAR_T *workstart = NULL; - if (grouping == (const char *) -1) { #ifdef COMPILE_WPRINTF @@ -1974,7 +1920,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, char pad = specs[nspecs_done].info.pad; CHAR_T spec = specs[nspecs_done].info.spec; - workstart = NULL; CHAR_T *workend = work_buffer + WORK_BUFFER_SIZE; /* Fill in last information. */ @@ -2008,27 +1953,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, prec = specs[nspecs_done].info.prec; } - /* Maybe the buffer is too small. */ - if (MAX (prec, width) + EXTSIZ > WORK_BUFFER_SIZE) - { - if (__libc_use_alloca ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T))) - workend = ((CHAR_T *) alloca ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T)) - + (MAX (prec, width) + EXTSIZ)); - else - { - workstart = (CHAR_T *) malloc ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T)); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + (MAX (prec, width) + EXTSIZ); - } - } - /* Process format specifiers. */ while (1) { @@ -2102,18 +2026,12 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, break; } - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - /* Write the following constant string. */ outstring (specs[nspecs_done].end_of_fmt, specs[nspecs_done].next_fmt - specs[nspecs_done].end_of_fmt); } all_done: - if (__glibc_unlikely (workstart != NULL)) - free (workstart); scratch_buffer_free (&argsbuf); scratch_buffer_free (&specsbuf); return done; @@ -2236,7 +2154,8 @@ group_number (CHAR_T *front_ptr, CHAR_T *w, CHAR_T *rear_ptr, copy_rest: /* No further grouping to be done. Copy the rest of the number. */ - memmove (w, s, (front_ptr -s) * sizeof (CHAR_T)); + w -= s - front_ptr; + memmove (w, front_ptr, (s - front_ptr) * sizeof (CHAR_T)); break; } else if (*grouping != '\0') diff --git a/stdlib/Makefile b/stdlib/Makefile index 45214b59e..4615f6dfe 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -70,7 +70,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ test-canon test-canon2 tst-strtoll tst-environ \ tst-xpg-basename tst-random tst-random2 tst-bsearch \ tst-limits tst-rand48 bug-strtod tst-setcontext \ - tst-setcontext2 test-a64l tst-qsort tst-system testmb2 \ + tst-setcontext2 test-a64l tst-qsort testmb2 \ bug-strtod2 tst-atof1 tst-atof2 tst-strtod2 \ tst-rand48-2 tst-makecontext tst-strtod5 \ tst-qsort2 tst-makecontext2 tst-strtod6 tst-unsetenv1 \ @@ -92,6 +92,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ tst-tls-atexit tst-tls-atexit-nodelete tests-static := tst-secure-getenv +tests-container := tst-system ifeq ($(build-hardcoded-path-in-tests),yes) tests += tst-empty-env diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c index 3cfe9a05c..d4b1139c5 100644 --- a/stdlib/tst-secure-getenv.c +++ b/stdlib/tst-secure-getenv.c @@ -30,167 +30,12 @@ #include #include +#include #include +#include #include static char MAGIC_ARGUMENT[] = "run-actual-test"; -#define MAGIC_STATUS 19 - -/* Return a GID which is not our current GID, but is present in the - supplementary group list. */ -static gid_t -choose_gid (void) -{ - int count = getgroups (0, NULL); - if (count < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t *groups; - groups = xcalloc (count, sizeof (*groups)); - int ret = getgroups (count, groups); - if (ret < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t current = getgid (); - gid_t not_current = 0; - for (int i = 0; i < ret; ++i) - { - if (groups[i] != current) - { - not_current = groups[i]; - break; - } - } - free (groups); - return not_current; -} - - -/* Copies the executable into a restricted directory, so that we can - safely make it SGID with the TARGET group ID. Then runs the - executable. */ -static int -run_executable_sgid (gid_t target) -{ - char *dirname = xasprintf ("%s/secure-getenv.%jd", - test_dir, (intmax_t) getpid ()); - char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = -1; - if (mkdir (dirname, 0700) < 0) - { - printf ("mkdir: %m\n"); - goto err; - } - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) - { - printf ("open (/proc/self/exe): %m\n"); - goto err; - } - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - if (outfd < 0) - { - printf ("open (%s): %m\n", execname); - goto err; - } - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - if (rdcount < 0) - { - printf ("read: %m\n"); - goto err; - } - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - if (wrcount <= 0) - { - printf ("write: %m\n"); - goto err; - } - p += wrcount; - } - } - if (fchown (outfd, getuid (), target) < 0) - { - printf ("fchown (%s): %m\n", execname); - goto err; - } - if (fchmod (outfd, 02750) < 0) - { - printf ("fchmod (%s): %m\n", execname); - goto err; - } - if (close (outfd) < 0) - { - printf ("close (outfd): %m\n"); - goto err; - } - if (close (infd) < 0) - { - printf ("close (infd): %m\n"); - goto err; - } - - int kid = fork (); - if (kid < 0) - { - printf ("fork: %m\n"); - goto err; - } - if (kid == 0) - { - /* Child process. */ - char *args[] = { execname, MAGIC_ARGUMENT, NULL }; - execve (execname, args, environ); - printf ("execve (%s): %m\n", execname); - _exit (1); - } - int status; - if (waitpid (kid, &status, 0) < 0) - { - printf ("waitpid: %m\n"); - goto err; - } - if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS) - { - printf ("Unexpected exit status %d from child process\n", - status); - goto err; - } - ret = 0; - -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname) - { - unlink (execname); - free (execname); - } - if (dirname) - { - rmdir (dirname); - free (dirname); - } - return ret; -} static int do_test (void) @@ -212,15 +57,15 @@ do_test (void) exit (1); } - gid_t target = choose_gid (); - if (target == 0) - { - fprintf (stderr, - "Could not find a suitable GID for user %jd, skipping test\n", - (intmax_t) getuid ()); - exit (0); - } - return run_executable_sgid (target); + int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); + + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + if (!WIFEXITED (status)) + FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + + return 0; } static void @@ -229,23 +74,15 @@ alternative_main (int argc, char **argv) if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0) { if (getgid () == getegid ()) - { - /* This can happen if the file system is mounted nosuid. */ - fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", - (intmax_t) getgid ()); - exit (MAGIC_STATUS); - } + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); if (getenv ("PATH") == NULL) - { - printf ("PATH variable not present\n"); - exit (3); - } + FAIL_EXIT (3, "PATH variable not present\n"); if (secure_getenv ("PATH") != NULL) - { - printf ("PATH variable not filtered out\n"); - exit (4); - } - exit (MAGIC_STATUS); + FAIL_EXIT (4, "PATH variable not filtered out\n"); + + exit (EXIT_SUCCESS); } } diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c index b6c5aea08..eddea33f4 100644 --- a/stdlib/tst-system.c +++ b/stdlib/tst-system.c @@ -17,14 +17,130 @@ . */ #include +#include +#include +#include +#include +#include +#include +#include +#include + +static char *tmpdir; +static long int namemax; + +static void +do_prepare (int argc, char *argv[]) +{ + tmpdir = support_create_temp_directory ("tst-system-"); + /* Include the last '/0'. */ + namemax = pathconf (tmpdir, _PC_NAME_MAX) + 1; + TEST_VERIFY_EXIT (namemax != -1); +} +#define PREPARE do_prepare + +struct args +{ + const char *command; + int exit_status; + int term_sig; + const char *path; +}; + +static void +call_system (void *closure) +{ + struct args *args = (struct args *) closure; + int ret; + + if (args->path != NULL) + TEST_COMPARE (setenv ("PATH", args->path, 1), 0); + ret = system (args->command); + if (args->term_sig == 0) + { + /* Expect regular termination. */ + TEST_VERIFY (WIFEXITED (ret) != 0); + TEST_COMPARE (WEXITSTATUS (ret), args->exit_status); + } + else + { + /* status_or_signal < 0. Expect termination by signal. */ + TEST_VERIFY (WIFSIGNALED (ret) != 0); + TEST_COMPARE (WTERMSIG (ret), args->term_sig); + } +} static int do_test (void) { - return system (":"); -} + TEST_VERIFY (system (NULL) != 0); + { + char cmd[namemax]; + memset (cmd, 'a', sizeof(cmd)); + cmd[sizeof(cmd) - 1] = '\0'; + + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + cmd, 127, 0, tmpdir + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stderr); + + char *returnerr = xasprintf ("%s: execing %s failed: " + "No such file or directory", + basename(_PATH_BSHELL), cmd); + TEST_COMPARE_STRING (result.err.buffer, returnerr); + free (returnerr); + } + + { + char cmd[namemax + 1]; + memset (cmd, 'a', sizeof(cmd)); + cmd[sizeof(cmd) - 1] = '\0'; + + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + cmd, 127, 0, tmpdir + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stderr); + + char *returnerr = xasprintf ("%s: execing %s failed: " + "File name too long", + basename(_PATH_BSHELL), cmd); + TEST_COMPARE_STRING (result.err.buffer, returnerr); + free (returnerr); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + "kill $$", 0, SIGTERM + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_none); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { "echo ...", 0 }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stdout); + TEST_COMPARE_STRING (result.out.buffer, "...\n"); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { "exit 1", 1 }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_none); + } + + TEST_COMPARE (system (""), 0); + + return 0; +} -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include diff --git a/string/test-memchr.c b/string/test-memchr.c index 5dd0aa547..de70e794d 100644 --- a/string/test-memchr.c +++ b/string/test-memchr.c @@ -65,8 +65,8 @@ do_one_test (impl_t *impl, const CHAR *s, int c, size_t n, CHAR *exp_res) CHAR *res = CALL (impl, s, c, n); if (res != exp_res) { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, exp_res); + error (0, 0, "Wrong result in function %s (%p, %d, %zu) -> %p != %p", + impl->name, s, c, n, res, exp_res); ret = 1; return; } @@ -91,7 +91,7 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) } buf[align + len] = 0; - if (pos < len) + if (pos < MIN(n, len)) { buf[align + pos] = seek_char; buf[align + len] = -seek_char; @@ -107,6 +107,38 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) do_one_test (impl, (CHAR *) (buf + align), seek_char, n, result); } +static void +do_overflow_tests (void) +{ + size_t i, j, len; + const size_t one = 1; + uintptr_t buf_addr = (uintptr_t) buf1; + + for (i = 0; i < 750; ++i) + { + do_test (0, i, 751, SIZE_MAX - i, BIG_CHAR); + do_test (0, i, 751, i - buf_addr, BIG_CHAR); + do_test (0, i, 751, -buf_addr - i, BIG_CHAR); + do_test (0, i, 751, SIZE_MAX - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, SIZE_MAX - buf_addr + i, BIG_CHAR); + + len = 0; + for (j = 8 * sizeof(size_t) - 1; j ; --j) + { + len |= one << j; + do_test (0, i, 751, len - i, BIG_CHAR); + do_test (0, i, 751, len + i, BIG_CHAR); + do_test (0, i, 751, len - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, len - buf_addr + i, BIG_CHAR); + + do_test (0, i, 751, ~len - i, BIG_CHAR); + do_test (0, i, 751, ~len + i, BIG_CHAR); + do_test (0, i, 751, ~len - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, ~len - buf_addr + i, BIG_CHAR); + } + } +} + static void do_random_tests (void) { @@ -221,6 +253,7 @@ test_main (void) do_test (page_size / 2 - i, i, i, 1, 0x9B); do_random_tests (); + do_overflow_tests (); return ret; } diff --git a/string/test-strncat.c b/string/test-strncat.c index abbacb95c..0c7f68d08 100644 --- a/string/test-strncat.c +++ b/string/test-strncat.c @@ -134,6 +134,66 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, } } +static void +do_overflow_tests (void) +{ + size_t i, j, len; + const size_t one = 1; + CHAR *s1, *s2; + uintptr_t s1_addr; + s1 = (CHAR *) buf1; + s2 = (CHAR *) buf2; + s1_addr = (uintptr_t)s1; + for (j = 0; j < 200; ++j) + s2[j] = 32 + 23 * j % (BIG_CHAR - 32); + s2[200] = 0; + for (i = 0; i < 750; ++i) { + for (j = 0; j < i; ++j) + s1[j] = 32 + 23 * j % (BIG_CHAR - 32); + s1[i] = '\0'; + + FOR_EACH_IMPL (impl, 0) + { + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, i - s1_addr); + s2[200] = '\0'; + do_one_test (impl, s2, s1, -s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - s1_addr + i); + } + + len = 0; + for (j = 8 * sizeof(size_t) - 1; j ; --j) + { + len |= one << j; + FOR_EACH_IMPL (impl, 0) + { + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len + i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - s1_addr + i); + + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len + i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - s1_addr + i); + } + } + } +} + static void do_random_tests (void) { @@ -316,6 +376,7 @@ test_main (void) } do_random_tests (); + do_overflow_tests (); return ret; } diff --git a/string/test-strncmp.c b/string/test-strncmp.c index d961ac449..02806f4eb 100644 --- a/string/test-strncmp.c +++ b/string/test-strncmp.c @@ -403,6 +403,18 @@ check2 (void) free (s2); } +static void +check3 (void) +{ + const CHAR *s1 = L ("abc"); + CHAR *s2 = STRDUP (s1); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, s1, s2, SIZE_MAX, 0); + + free (s2); +} + int test_main (void) { @@ -412,6 +424,7 @@ test_main (void) check1 (); check2 (); + check3 (); printf ("%23s", ""); FOR_EACH_IMPL (impl, 0) diff --git a/string/test-strnlen.c b/string/test-strnlen.c index 80ac9e860..ca34352b0 100644 --- a/string/test-strnlen.c +++ b/string/test-strnlen.c @@ -27,6 +27,7 @@ #ifndef WIDE # define STRNLEN strnlen +# define MEMSET memset # define CHAR char # define BIG_CHAR CHAR_MAX # define MIDDLE_CHAR 127 @@ -34,6 +35,7 @@ #else # include # define STRNLEN wcsnlen +# define MEMSET wmemset # define CHAR wchar_t # define BIG_CHAR WCHAR_MAX # define MIDDLE_CHAR 1121 @@ -73,7 +75,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) { size_t i; - align &= 63; + align &= (getpagesize () / sizeof (CHAR) - 1); if ((align + len) * sizeof (CHAR) >= page_size) return; @@ -87,6 +89,56 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen)); } +static void +do_overflow_tests (void) +{ + size_t i, j, al_idx, repeats, len; + const size_t one = 1; + uintptr_t buf_addr = (uintptr_t) buf1; + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; + + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); + al_idx++) + { + for (repeats = 0; repeats < 2; ++repeats) + { + size_t align = repeats ? (getpagesize () - alignments[al_idx]) + : alignments[al_idx]; + align /= sizeof (CHAR); + for (i = 0; i < 750; ++i) + { + do_test (align, i, SIZE_MAX, BIG_CHAR); + + do_test (align, i, SIZE_MAX - i, BIG_CHAR); + do_test (align, i, i - buf_addr, BIG_CHAR); + do_test (align, i, -buf_addr - i, BIG_CHAR); + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); + + len = 0; + for (j = 8 * sizeof (size_t) - 1; j; --j) + { + len |= one << j; + do_test (align, i, len, BIG_CHAR); + do_test (align, i, len - i, BIG_CHAR); + do_test (align, i, len + i, BIG_CHAR); + do_test (align, i, len - buf_addr - i, BIG_CHAR); + do_test (align, i, len - buf_addr + i, BIG_CHAR); + + do_test (align, i, ~len - i, BIG_CHAR); + do_test (align, i, ~len + i, BIG_CHAR); + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); + + do_test (align, i, -buf_addr, BIG_CHAR); + do_test (align, i, j - buf_addr, BIG_CHAR); + do_test (align, i, -buf_addr - j, BIG_CHAR); + } + } + } + } +} + static void do_random_tests (void) { @@ -153,7 +205,7 @@ do_page_tests (void) size_t last_offset = (page_size / sizeof (CHAR)) - 1; CHAR *s = (CHAR *) buf2; - memset (s, 65, (last_offset - 1)); + MEMSET (s, 65, (last_offset - 1)); s[last_offset] = 0; /* Place short strings ending at page boundary. */ @@ -196,6 +248,35 @@ do_page_tests (void) } } +/* Tests meant to unveil fail on implementations that access bytes + beyond the maxium length. */ + +static void +do_page_2_tests (void) +{ + size_t i, exp_len, offset; + size_t last_offset = page_size / sizeof (CHAR); + + CHAR *s = (CHAR *) buf2; + MEMSET (s, 65, last_offset); + + /* Place short strings ending at page boundary without the null + byte. */ + offset = last_offset; + for (i = 0; i < 128; i++) + { + /* Decrease offset to stress several sizes and alignments. */ + offset--; + exp_len = last_offset - offset; + FOR_EACH_IMPL (impl, 0) + { + /* If an implementation goes beyond EXP_LEN, it will trigger + the segfault. */ + do_one_test (impl, (CHAR *) (s + offset), exp_len, exp_len); + } + } +} + int test_main (void) { @@ -242,6 +323,8 @@ test_main (void) do_random_tests (); do_page_tests (); + do_page_2_tests (); + do_overflow_tests (); return ret; } diff --git a/sunrpc/Makefile b/sunrpc/Makefile index d5840d077..162a5cef5 100644 --- a/sunrpc/Makefile +++ b/sunrpc/Makefile @@ -95,7 +95,8 @@ others += rpcgen endif tests = tst-xdrmem tst-xdrmem2 test-rpcent tst-udp-error tst-udp-timeout \ - tst-udp-nonblocking + tst-udp-nonblocking tst-bug22542 tst-bug28768 + xtests := tst-getmyaddr ifeq ($(have-thread-library),yes) @@ -246,3 +247,5 @@ $(objpfx)tst-udp-timeout: $(common-objpfx)linkobj/libc.so $(objpfx)tst-udp-nonblocking: $(common-objpfx)linkobj/libc.so $(objpfx)tst-udp-garbage: \ $(common-objpfx)linkobj/libc.so $(shared-thread-library) + +$(objpfx)tst-bug22542: $(common-objpfx)linkobj/libc.so diff --git a/sunrpc/clnt_gen.c b/sunrpc/clnt_gen.c index 13ced8994..b44357cd8 100644 --- a/sunrpc/clnt_gen.c +++ b/sunrpc/clnt_gen.c @@ -57,9 +57,13 @@ clnt_create (const char *hostname, u_long prog, u_long vers, if (strcmp (proto, "unix") == 0) { - memset ((char *)&sun, 0, sizeof (sun)); - sun.sun_family = AF_UNIX; - strcpy (sun.sun_path, hostname); + if (__sockaddr_un_set (&sun, hostname) < 0) + { + struct rpc_createerr *ce = &get_rpc_createerr (); + ce->cf_stat = RPC_SYSTEMERROR; + ce->cf_error.re_errno = errno; + return NULL; + } sock = RPC_ANYSOCK; client = clntunix_create (&sun, prog, vers, &sock, 0, 0); if (client == NULL) diff --git a/sunrpc/svc_unix.c b/sunrpc/svc_unix.c index e01afeabe..b065d6063 100644 --- a/sunrpc/svc_unix.c +++ b/sunrpc/svc_unix.c @@ -154,7 +154,10 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path) SVCXPRT *xprt; struct unix_rendezvous *r; struct sockaddr_un addr; - socklen_t len = sizeof (struct sockaddr_in); + socklen_t len = sizeof (addr); + + if (__sockaddr_un_set (&addr, path) < 0) + return NULL; if (sock == RPC_ANYSOCK) { @@ -165,12 +168,6 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path) } madesock = TRUE; } - memset (&addr, '\0', sizeof (addr)); - addr.sun_family = AF_UNIX; - len = strlen (path) + 1; - memcpy (addr.sun_path, path, len); - len += sizeof (addr.sun_family); - __bind (sock, (struct sockaddr *) &addr, len); if (__getsockname (sock, (struct sockaddr *) &addr, &len) != 0 diff --git a/sunrpc/tst-bug22542.c b/sunrpc/tst-bug22542.c new file mode 100644 index 000000000..d6cd79787 --- /dev/null +++ b/sunrpc/tst-bug22542.c @@ -0,0 +1,44 @@ +/* Test to verify that overlong hostname is rejected by clnt_create + and doesn't cause a buffer overflow (bug 22542). + + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + /* Create an arbitrary hostname that's longer than fits in sun_path. */ + char name [sizeof ((struct sockaddr_un*)0)->sun_path * 2]; + memset (name, 'x', sizeof name - 1); + name [sizeof name - 1] = '\0'; + + errno = 0; + CLIENT *clnt = clnt_create (name, 0, 0, "unix"); + + TEST_VERIFY (clnt == NULL); + TEST_COMPARE (errno, EINVAL); + return 0; +} + +#include diff --git a/sunrpc/tst-bug28768.c b/sunrpc/tst-bug28768.c new file mode 100644 index 000000000..35a4b7b0b --- /dev/null +++ b/sunrpc/tst-bug28768.c @@ -0,0 +1,42 @@ +/* Test to verify that long path is rejected by svcunix_create (bug 28768). + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +/* svcunix_create does not have a default version in linkobj/libc.so. */ +compat_symbol_reference (libc, svcunix_create, svcunix_create, GLIBC_2_1); + +static int +do_test (void) +{ + char pathname[109]; + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + + errno = 0; + TEST_VERIFY (svcunix_create (RPC_ANYSOCK, 4096, 4096, pathname) == NULL); + TEST_COMPARE (errno, EINVAL); + + return 0; +} + +#include diff --git a/support/Makefile b/support/Makefile index 3325feb79..05e8c292b 100644 --- a/support/Makefile +++ b/support/Makefile @@ -83,8 +83,10 @@ libsupport-routines = \ xasprintf \ xbind \ xcalloc \ + xchdir \ xchroot \ xclock_gettime \ + xclone \ xclose \ xconnect \ xcopy_file_range \ diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 9808750f8..421f65767 100644 --- a/support/capture_subprocess.h +++ b/support/capture_subprocess.h @@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess struct support_capture_subprocess support_capture_subprogram (const char *file, char *const argv[]); +/* Copy the running program into a setgid binary and run it with CHILD_ID + argument. If execution is successful, return the exit status of the child + program, otherwise return a non-zero failure exit code. */ +int support_capture_subprogram_self_sgid + (char *child_id); + /* Deallocate the subprocess data captured by support_capture_subprocess. */ void support_capture_subprocess_free (struct support_capture_subprocess *); diff --git a/support/shell-container.c b/support/shell-container.c index 509e0d69b..201536d24 100644 --- a/support/shell-container.c +++ b/support/shell-container.c @@ -135,6 +135,37 @@ copy_func (char **argv) } +/* Emulate the 'exit' builtin. The exit value is optional. */ +static int +exit_func (char **argv) +{ + int exit_val = 0; + + if (argv[0] != 0) + exit_val = atoi (argv[0]) & 0xff; + exit (exit_val); + return 0; +} + +/* Emulate the "/bin/kill" command. Options are ignored. */ +static int +kill_func (char **argv) +{ + int signum = SIGTERM; + int i; + + for (i = 0; argv[i]; i++) + { + pid_t pid; + if (strcmp (argv[i], "$$") == 0) + pid = getpid (); + else + pid = atoi (argv[i]); + kill (pid, signum); + } + return 0; +} + /* This is a list of all the built-in commands we understand. */ static struct { const char *name; @@ -143,6 +174,8 @@ static struct { { "true", true_func }, { "echo", echo_func }, { "cp", copy_func }, + { "exit", exit_func }, + { "kill", kill_func }, { NULL, NULL } }; @@ -238,7 +271,7 @@ run_command_array (char **argv) fprintf (stderr, "sh: execing %s failed: %s", argv[0], strerror (errno)); - exit (1); + exit (127); } waitpid (pid, &status, 0); @@ -251,6 +284,11 @@ run_command_array (char **argv) if (rv) exit (rv); } + else if (WIFSIGNALED (status)) + { + int sig = WTERMSIG (status); + raise (sig); + } else exit (1); } diff --git a/support/subprocess.h b/support/subprocess.h index 8b442fd5c..34ffd02e8 100644 --- a/support/subprocess.h +++ b/support/subprocess.h @@ -38,6 +38,11 @@ struct support_subprocess support_subprocess struct support_subprocess support_subprogram (const char *file, char *const argv[]); +/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it + to complete. Return program exit status. */ +int support_subprogram_wait + (const char *file, char *const argv[]); + /* Wait for the subprocess indicated by PROC::PID. Return the status indicate by waitpid call. */ int support_process_wait (struct support_subprocess *proc); diff --git a/support/support.h b/support/support.h index 77d68c2ab..0536474c4 100644 --- a/support/support.h +++ b/support/support.h @@ -23,6 +23,7 @@ #ifndef SUPPORT_H #define SUPPORT_H +#include #include #include /* For mode_t. */ diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index eeed676e3..28a37df67 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -20,11 +20,14 @@ #include #include +#include #include #include #include #include #include +#include +#include static void transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) @@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) if (ret < 0) { support_record_failure (); - printf ("error: reading from subprocess %s: %m", what); + printf ("error: reading from subprocess %s: %m\n", what); pfd->events = 0; pfd->revents = 0; } @@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[]) return result; } +/* Copies the executable into a restricted directory, so that we can + safely make it SGID with the TARGET group ID. Then runs the + executable. */ +static int +copy_and_spawn_sgid (char *child_id, gid_t gid) +{ + char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", + test_dir, (intmax_t) getpid ()); + char *execname = xasprintf ("%s/bin", dirname); + int infd = -1; + int outfd = -1; + int ret = 1, status = 1; + + TEST_VERIFY (mkdir (dirname, 0700) == 0); + if (support_record_failure_is_failed ()) + goto err; + + infd = open ("/proc/self/exe", O_RDONLY); + if (infd < 0) + FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); + + outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); + TEST_VERIFY (outfd >= 0); + if (support_record_failure_is_failed ()) + goto err; + + char buf[4096]; + for (;;) + { + ssize_t rdcount = read (infd, buf, sizeof (buf)); + TEST_VERIFY (rdcount >= 0); + if (support_record_failure_is_failed ()) + goto err; + if (rdcount == 0) + break; + char *p = buf; + char *end = buf + rdcount; + while (p != end) + { + ssize_t wrcount = write (outfd, buf, end - p); + if (wrcount == 0) + errno = ENOSPC; + TEST_VERIFY (wrcount > 0); + if (support_record_failure_is_failed ()) + goto err; + p += wrcount; + } + } + TEST_VERIFY (fchown (outfd, getuid (), gid) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (fchmod (outfd, 02750) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (close (outfd) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (close (infd) == 0); + if (support_record_failure_is_failed ()) + goto err; + + /* We have the binary, now spawn the subprocess. Avoid using + support_subprogram because we only want the program exit status, not the + contents. */ + ret = 0; + + char * const args[] = {execname, child_id, NULL}; + + status = support_subprogram_wait (args[0], args); + +err: + if (outfd >= 0) + close (outfd); + if (infd >= 0) + close (infd); + if (execname != NULL) + { + unlink (execname); + free (execname); + } + if (dirname != NULL) + { + rmdir (dirname); + free (dirname); + } + + if (ret != 0) + FAIL_EXIT1("Failed to make sgid executable for test\n"); + + return status; +} + +int +support_capture_subprogram_self_sgid (char *child_id) +{ + gid_t target = 0; + const int count = 64; + gid_t groups[count]; + + /* Get a GID which is not our current GID, but is present in the + supplementary group list. */ + int ret = getgroups (count, groups); + if (ret < 0) + FAIL_UNSUPPORTED("Could not get group list for user %jd\n", + (intmax_t) getuid ()); + + gid_t current = getgid (); + for (int i = 0; i < ret; ++i) + { + if (groups[i] != current) + { + target = groups[i]; + break; + } + } + + if (target == 0) + FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", + (intmax_t) getuid ()); + + return copy_and_spawn_sgid (child_id, target); +} + void support_capture_subprocess_free (struct support_capture_subprocess *p) { diff --git a/support/support_subprocess.c b/support/support_subprocess.c index 36e3a77af..4a2582811 100644 --- a/support/support_subprocess.c +++ b/support/support_subprocess.c @@ -27,7 +27,7 @@ #include static struct support_subprocess -support_suprocess_init (void) +support_subprocess_init (void) { struct support_subprocess result; @@ -48,7 +48,7 @@ support_suprocess_init (void) struct support_subprocess support_subprocess (void (*callback) (void *), void *closure) { - struct support_subprocess result = support_suprocess_init (); + struct support_subprocess result = support_subprocess_init (); result.pid = xfork (); if (result.pid == 0) @@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure) struct support_subprocess support_subprogram (const char *file, char *const argv[]) { - struct support_subprocess result = support_suprocess_init (); + struct support_subprocess result = support_subprocess_init (); posix_spawn_file_actions_t fa; /* posix_spawn_file_actions_init does not fail. */ @@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[]) xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]); xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]); - result.pid = xposix_spawn (file, &fa, NULL, argv, NULL); + result.pid = xposix_spawn (file, &fa, NULL, argv, environ); xclose (result.stdout_pipe[1]); xclose (result.stderr_pipe[1]); @@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[]) return result; } +int +support_subprogram_wait (const char *file, char *const argv[]) +{ + posix_spawn_file_actions_t fa; + + posix_spawn_file_actions_init (&fa); + struct support_subprocess res = support_subprocess_init (); + + res.pid = xposix_spawn (file, &fa, NULL, argv, environ); + + return support_process_wait (&res); +} + int support_process_wait (struct support_subprocess *proc) { diff --git a/support/temp_file.c b/support/temp_file.c index 277c5e0cf..e41128c2d 100644 --- a/support/temp_file.c +++ b/support/temp_file.c @@ -1,5 +1,6 @@ /* Temporary file handling for tests. - Copyright (C) 1998-2020 Free Software Foundation, Inc. + Copyright (C) 1998-2022 Free Software Foundation, Inc. + Copyright The GNU Tools Authors. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,15 +21,17 @@ some 32-bit platforms. */ #define _FILE_OFFSET_BITS 64 +#include #include #include #include +#include #include #include #include #include -#include +#include /* List of temporary files. */ static struct temp_name_list @@ -36,14 +39,20 @@ static struct temp_name_list struct temp_name_list *next; char *name; pid_t owner; + bool toolong; } *temp_name_list; /* Location of the temporary files. Set by the test skeleton via support_set_test_dir. The string is not be freed. */ static const char *test_dir = _PATH_TMP; -void -add_temp_file (const char *name) +/* Name of subdirectories in a too long temporary directory tree. */ +static char toolong_subdir[NAME_MAX + 1]; +static bool toolong_initialized; +static size_t toolong_path_max; + +static void +add_temp_file_internal (const char *name, bool toolong) { struct temp_name_list *newp = (struct temp_name_list *) xcalloc (sizeof (*newp), 1); @@ -53,21 +62,26 @@ add_temp_file (const char *name) newp->name = newname; newp->next = temp_name_list; newp->owner = getpid (); + newp->toolong = toolong; temp_name_list = newp; } else free (newp); } +void +add_temp_file (const char *name) +{ + add_temp_file_internal (name, false); +} + int -create_temp_file (const char *base, char **filename) +create_temp_file_in_dir (const char *base, const char *dir, char **filename) { char *fname; int fd; - fname = (char *) xmalloc (strlen (test_dir) + 1 + strlen (base) - + sizeof ("XXXXXX")); - strcpy (stpcpy (stpcpy (stpcpy (fname, test_dir), "/"), base), "XXXXXX"); + fname = xasprintf ("%s/%sXXXXXX", dir, base); fd = mkstemp (fname); if (fd == -1) @@ -86,8 +100,14 @@ create_temp_file (const char *base, char **filename) return fd; } -char * -support_create_temp_directory (const char *base) +int +create_temp_file (const char *base, char **filename) +{ + return create_temp_file_in_dir (base, test_dir, filename); +} + +static char * +create_temp_directory_internal (const char *base, bool toolong) { char *path = xasprintf ("%s/%sXXXXXX", test_dir, base); if (mkdtemp (path) == NULL) @@ -95,16 +115,132 @@ support_create_temp_directory (const char *base) printf ("error: mkdtemp (\"%s\"): %m", path); exit (1); } - add_temp_file (path); + add_temp_file_internal (path, toolong); return path; } -/* Helper functions called by the test skeleton follow. */ +char * +support_create_temp_directory (const char *base) +{ + return create_temp_directory_internal (base, false); +} + +static void +ensure_toolong_initialized (void) +{ + if (!toolong_initialized) + FAIL_EXIT1 ("uninitialized toolong directory tree\n"); +} + +static void +initialize_toolong (const char *base) +{ + long name_max = pathconf (base, _PC_NAME_MAX); + name_max = (name_max < 0 ? 64 + : (name_max < sizeof (toolong_subdir) ? name_max + : sizeof (toolong_subdir) - 1)); + + long path_max = pathconf (base, _PC_PATH_MAX); + path_max = (path_max < 0 ? 1024 + : path_max <= PTRDIFF_MAX ? path_max : PTRDIFF_MAX); + + /* Sanity check to ensure that the test does not create temporary directories + in different filesystems because this API doesn't support it. */ + if (toolong_initialized) + { + if (name_max != strlen (toolong_subdir)) + FAIL_UNSUPPORTED ("name_max: Temporary directories in different" + " filesystems not supported yet\n"); + if (path_max != toolong_path_max) + FAIL_UNSUPPORTED ("path_max: Temporary directories in different" + " filesystems not supported yet\n"); + return; + } + + toolong_path_max = path_max; + + size_t len = name_max; + memset (toolong_subdir, 'X', len); + toolong_initialized = true; +} + +char * +support_create_and_chdir_toolong_temp_directory (const char *basename) +{ + char *base = create_temp_directory_internal (basename, true); + xchdir (base); + + initialize_toolong (base); + + size_t sz = strlen (toolong_subdir); + + /* Create directories and descend into them so that the final path is larger + than PATH_MAX. */ + for (size_t i = 0; i <= toolong_path_max / sz; i++) + { + int ret = mkdir (toolong_subdir, S_IRWXU); + if (ret != 0 && errno == ENAMETOOLONG) + FAIL_UNSUPPORTED ("Filesystem does not support creating too long " + "directory trees\n"); + else if (ret != 0) + FAIL_EXIT1 ("Failed to create directory tree: %m\n"); + xchdir (toolong_subdir); + } + return base; +} void -support_set_test_dir (const char *path) +support_chdir_toolong_temp_directory (const char *base) { - test_dir = path; + ensure_toolong_initialized (); + + xchdir (base); + + size_t sz = strlen (toolong_subdir); + for (size_t i = 0; i <= toolong_path_max / sz; i++) + xchdir (toolong_subdir); +} + +/* Helper functions called by the test skeleton follow. */ + +static void +remove_toolong_subdirs (const char *base) +{ + ensure_toolong_initialized (); + + if (chdir (base) != 0) + { + printf ("warning: toolong cleanup base failed: chdir (\"%s\"): %m\n", + base); + return; + } + + /* Descend. */ + int levels = 0; + size_t sz = strlen (toolong_subdir); + for (levels = 0; levels <= toolong_path_max / sz; levels++) + if (chdir (toolong_subdir) != 0) + { + printf ("warning: toolong cleanup failed: chdir (\"%s\"): %m\n", + toolong_subdir); + break; + } + + /* Ascend and remove. */ + while (--levels >= 0) + { + if (chdir ("..") != 0) + { + printf ("warning: toolong cleanup failed: chdir (\"..\"): %m\n"); + return; + } + if (remove (toolong_subdir) != 0) + { + printf ("warning: could not remove subdirectory: %s: %m\n", + toolong_subdir); + return; + } + } } void @@ -119,6 +255,9 @@ support_delete_temp_files (void) around, to prevent PID reuse.) */ if (temp_name_list->owner == pid) { + if (temp_name_list->toolong) + remove_toolong_subdirs (temp_name_list->name); + if (remove (temp_name_list->name) != 0) printf ("warning: could not remove temporary file: %s: %m\n", temp_name_list->name); @@ -143,3 +282,9 @@ support_print_temp_files (FILE *f) fprintf (f, ")\n"); } } + +void +support_set_test_dir (const char *path) +{ + test_dir = path; +} diff --git a/support/temp_file.h b/support/temp_file.h index 8b6303a6e..2598f8213 100644 --- a/support/temp_file.h +++ b/support/temp_file.h @@ -32,11 +32,27 @@ void add_temp_file (const char *name); *FILENAME. */ int create_temp_file (const char *base, char **filename); +/* Create a temporary file in directory DIR. Return the opened file + descriptor on success, or -1 on failure. Write the file name to + *FILENAME if FILENAME is not NULL. In this case, the caller is + expected to free *FILENAME. */ +int create_temp_file_in_dir (const char *base, const char *dir, + char **filename); + /* Create a temporary directory and schedule it for deletion. BASE is used as a prefix for the unique directory name, which the function returns. The caller should free this string. */ char *support_create_temp_directory (const char *base); +/* Create a temporary directory tree that is longer than PATH_MAX and schedule + it for deletion. BASENAME is used as a prefix for the unique directory + name, which the function returns. The caller should free this string. */ +char *support_create_and_chdir_toolong_temp_directory (const char *basename); + +/* Change into the innermost directory of the directory tree BASE, which was + created using support_create_and_chdir_toolong_temp_directory. */ +void support_chdir_toolong_temp_directory (const char *base); + __END_DECLS #endif /* SUPPORT_TEMP_FILE_H */ diff --git a/support/xchdir.c b/support/xchdir.c new file mode 100644 index 000000000..beb4feff7 --- /dev/null +++ b/support/xchdir.c @@ -0,0 +1,28 @@ +/* chdir with error checking. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +void +xchdir (const char *path) +{ + if (chdir (path) != 0) + FAIL_EXIT1 ("chdir (\"%s\"): %m", path); +} diff --git a/support/xclone.c b/support/xclone.c new file mode 100644 index 000000000..243eee8b2 --- /dev/null +++ b/support/xclone.c @@ -0,0 +1,49 @@ +/* Auxiliary functions to issue the clone syscall. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef __linux__ +# include +# include /* For _STACK_GROWS_{UP,DOWN}. */ +# include + +pid_t +xclone (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size, + int flags) +{ + pid_t r = -1; + +# ifdef __ia64__ + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size, + int flags, void *arg, ...); + r = __clone2 (fn, stack, stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ NULL); +# else +# if _STACK_GROWS_DOWN + r = clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ NULL); +# elif _STACK_GROWS_UP + r = clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, NULL); +# endif +# endif + + if (r < 0) + FAIL_EXIT1 ("clone: %m"); + + return r; +} +#endif diff --git a/support/xsched.h b/support/xsched.h new file mode 100644 index 000000000..eefd73194 --- /dev/null +++ b/support/xsched.h @@ -0,0 +1,34 @@ +/* Wrapper for sched.h functions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef SUPPORT_XSCHED_H +#define SUPPORT_XSCHED_H + +__BEGIN_DECLS + +#include +#include + +#ifdef __linux__ +pid_t xclone (int (*fn) (void *arg), void *arg, void *stack, + size_t stack_size, int flags); +#endif + +__END_DECLS + +#endif diff --git a/support/xunistd.h b/support/xunistd.h index 96f498f2e..43799d92c 100644 --- a/support/xunistd.h +++ b/support/xunistd.h @@ -44,6 +44,7 @@ long xsysconf (int name); long long xlseek (int fd, long long offset, int whence); void xftruncate (int fd, long long length); void xsymlink (const char *target, const char *linkpath); +void xchdir (const char *path); /* Equivalent of "mkdir -p". */ void xmkdirp (const char *, mode_t); diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h index db3335e5a..8ffa0d1c5 100644 --- a/sysdeps/aarch64/dl-machine.h +++ b/sysdeps/aarch64/dl-machine.h @@ -392,13 +392,6 @@ elf_machine_lazy_rel (struct link_map *map, /* Check for unexpected PLT reloc type. */ if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) { - if (map->l_mach.plt == 0) - { - /* Prelinking. */ - *reloc_addr += l_addr; - return; - } - if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL)) { /* Check the symbol table for variant PCS symbols. */ @@ -422,7 +415,10 @@ elf_machine_lazy_rel (struct link_map *map, } } - *reloc_addr = map->l_mach.plt; + if (map->l_mach.plt == 0) + *reloc_addr += l_addr; + else + *reloc_addr = map->l_mach.plt; } else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) { diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index d0d47e90b..e0b4c4502 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -33,11 +33,11 @@ #define A_l x6 #define A_lw w6 #define A_h x7 -#define A_hw w7 #define B_l x8 #define B_lw w8 #define B_h x9 #define C_l x10 +#define C_lw w10 #define C_h x11 #define D_l x12 #define D_h x13 @@ -51,16 +51,6 @@ #define H_h srcend #define tmp1 x14 -/* Copies are split into 3 main cases: small copies of up to 32 bytes, - medium copies of 33..128 bytes which are fully unrolled. Large copies - of more than 128 bytes align the destination and use an unrolled loop - processing 64 bytes per iteration. - In order to share code with memmove, small and medium copies read all - data before writing, allowing any kind of overlap. So small, medium - and large backwards memmoves are handled by falling through into memcpy. - Overlapping large forward memmoves use a loop that copies backwards. -*/ - #ifndef MEMMOVE # define MEMMOVE memmove #endif @@ -68,118 +58,115 @@ # define MEMCPY memcpy #endif -ENTRY_ALIGN (MEMMOVE, 6) +/* This implementation supports both memcpy and memmove and shares most code. + It uses unaligned accesses and branchless sequences to keep the code small, + simple and improve performance. - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check in memmove is negligible since it is only required for large copies. - sub tmp1, dstin, src - cmp count, 128 - ccmp tmp1, count, 2, hi - b.lo L(move_long) - - /* Common case falls through into memcpy. */ -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -ENTRY (MEMCPY) + Large copies use a software pipelined loop processing 64 bytes per + iteration. The destination pointer is 16-byte aligned to minimize + unaligned accesses. The loop tail is handled by always copying 64 bytes + from the end. +*/ +ENTRY_ALIGN (MEMCPY, 6) DELOUSE (0) DELOUSE (1) DELOUSE (2) - prfm PLDL1KEEP, [src] add srcend, src, count add dstend, dstin, count - cmp count, 32 - b.ls L(copy32) cmp count, 128 b.hi L(copy_long) + cmp count, 32 + b.hi L(copy32_128) - /* Medium copies: 33..128 bytes. */ + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) ldp A_l, A_h, [src] - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] ldp D_l, D_h, [srcend, -16] - cmp count, 64 - b.hi L(copy128) stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] stp D_l, D_h, [dstend, -16] ret - .p2align 4 - /* Small copies: 0..32 bytes. */ -L(copy32): - /* 16-32 bytes. */ - cmp count, 16 - b.lo 1f - ldp A_l, A_h, [src] - ldp B_l, B_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstend, -16] - ret - .p2align 4 -1: - /* 8-15 bytes. */ - tbz count, 3, 1f + /* Copy 8-15 bytes. */ +L(copy16): + tbz count, 3, L(copy8) ldr A_l, [src] ldr A_h, [srcend, -8] str A_l, [dstin] str A_h, [dstend, -8] ret - .p2align 4 -1: - /* 4-7 bytes. */ - tbz count, 2, 1f + + .p2align 3 + /* Copy 4-7 bytes. */ +L(copy8): + tbz count, 2, L(copy4) ldr A_lw, [src] - ldr A_hw, [srcend, -4] + ldr B_lw, [srcend, -4] str A_lw, [dstin] - str A_hw, [dstend, -4] + str B_lw, [dstend, -4] ret - /* Copy 0..3 bytes. Use a branchless sequence that copies the same - byte 3 times if count==1, or the 2nd byte twice if count==2. */ -1: - cbz count, 2f + /* Copy 0..3 bytes using a branchless sequence. */ +L(copy4): + cbz count, L(copy0) lsr tmp1, count, 1 ldrb A_lw, [src] - ldrb A_hw, [srcend, -1] + ldrb C_lw, [srcend, -1] ldrb B_lw, [src, tmp1] strb A_lw, [dstin] strb B_lw, [dstin, tmp1] - strb A_hw, [dstend, -1] -2: ret + strb C_lw, [dstend, -1] +L(copy0): + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +L(copy32_128): + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend, -32] + ldp D_l, D_h, [srcend, -16] + cmp count, 64 + b.hi L(copy128) + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend, -32] + stp D_l, D_h, [dstend, -16] + ret .p2align 4 - /* Copy 65..128 bytes. Copy 64 bytes from the start and - 64 bytes from the end. */ + /* Copy 65..128 bytes. */ L(copy128): ldp E_l, E_h, [src, 32] ldp F_l, F_h, [src, 48] + cmp count, 96 + b.ls L(copy96) ldp G_l, G_h, [srcend, -64] ldp H_l, H_h, [srcend, -48] + stp G_l, G_h, [dstend, -64] + stp H_l, H_h, [dstend, -48] +L(copy96): stp A_l, A_h, [dstin] stp B_l, B_h, [dstin, 16] stp E_l, E_h, [dstin, 32] stp F_l, F_h, [dstin, 48] - stp G_l, G_h, [dstend, -64] - stp H_l, H_h, [dstend, -48] stp C_l, C_h, [dstend, -32] stp D_l, D_h, [dstend, -16] ret - /* Align DST to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 128 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ - .p2align 4 + /* Copy more than 128 bytes. */ L(copy_long): + /* Copy 16 bytes and then align dst to 16-byte alignment. */ + ldp D_l, D_h, [src] and tmp1, dstin, 15 bic dst, dstin, 15 - ldp D_l, D_h, [src] sub src, src, tmp1 add count, count, tmp1 /* Count is now 16 too large. */ ldp A_l, A_h, [src, 16] @@ -188,7 +175,8 @@ L(copy_long): ldp C_l, C_h, [src, 48] ldp D_l, D_h, [src, 64]! subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls L(last64) + b.ls L(copy64_from_end) + L(loop64): stp A_l, A_h, [dst, 16] ldp A_l, A_h, [src, 16] @@ -201,10 +189,8 @@ L(loop64): subs count, count, 64 b.hi L(loop64) - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the end even if - there is just 1 byte left. */ -L(last64): + /* Write the last iteration and copy 64 bytes from the end. */ +L(copy64_from_end): ldp E_l, E_h, [srcend, -64] stp A_l, A_h, [dst, 16] ldp A_l, A_h, [srcend, -48] @@ -219,20 +205,42 @@ L(last64): stp C_l, C_h, [dstend, -16] ret - .p2align 4 -L(move_long): - cbz tmp1, 3f +END (MEMCPY) +libc_hidden_builtin_def (MEMCPY) + +ENTRY_ALIGN (MEMMOVE, 4) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) add srcend, src, count add dstend, dstin, count + cmp count, 128 + b.hi L(move_long) + cmp count, 32 + b.hi L(copy32_128) - /* Align dstend to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 128 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) + ldp A_l, A_h, [src] + ldp D_l, D_h, [srcend, -16] + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend, -16] + ret - and tmp1, dstend, 15 + .p2align 4 +L(move_long): + /* Only use backward copy if there is an overlap. */ + sub tmp1, dstin, src + cbz tmp1, L(copy0) + cmp tmp1, count + b.hs L(copy_long) + + /* Large backwards copy for overlapping copies. + Copy 16 bytes and then align dst to 16-byte alignment. */ ldp D_l, D_h, [srcend, -16] + and tmp1, dstend, 15 sub srcend, srcend, tmp1 sub count, count, tmp1 ldp A_l, A_h, [srcend, -16] @@ -242,10 +250,9 @@ L(move_long): ldp D_l, D_h, [srcend, -64]! sub dstend, dstend, tmp1 subs count, count, 128 - b.ls 2f + b.ls L(copy64_from_start) - nop -1: +L(loop64_backwards): stp A_l, A_h, [dstend, -16] ldp A_l, A_h, [srcend, -16] stp B_l, B_h, [dstend, -32] @@ -255,12 +262,10 @@ L(move_long): stp D_l, D_h, [dstend, -64]! ldp D_l, D_h, [srcend, -64]! subs count, count, 64 - b.hi 1b + b.hi L(loop64_backwards) - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the start even if - there is just 1 byte left. */ -2: + /* Write the last iteration and copy 64 bytes from the start. */ +L(copy64_from_start): ldp G_l, G_h, [src, 48] stp A_l, A_h, [dstend, -16] ldp A_l, A_h, [src, 32] @@ -273,7 +278,7 @@ L(move_long): stp A_l, A_h, [dstin, 32] stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] -3: ret + ret -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) +END (MEMMOVE) +libc_hidden_builtin_def (MEMMOVE) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 8378107c7..3c5292d1a 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -1,5 +1,5 @@ ifeq ($(subdir),string) -sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \ +sysdep_routines += memcpy_generic memcpy_advsimd memcpy_thunderx memcpy_thunderx2 \ memcpy_falkor memmove_falkor \ memset_generic memset_falkor memset_emag memset_kunpeng \ memchr_generic memchr_nosimd \ diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index b7da62c3b..4b004ac47 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -42,11 +42,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_simd) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) IFUNC_IMPL (i, name, memmove, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx2) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_simd) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) IFUNC_IMPL (i, name, memset, /* Enable this on non-falkor processors too so that other cores diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index 2fafefd5d..799d60c98 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -29,6 +29,7 @@ extern __typeof (__redirect_memcpy) __libc_memcpy; extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; +extern __typeof (__redirect_memcpy) __memcpy_simd attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; @@ -36,11 +37,14 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; libc_ifunc (__libc_memcpy, (IS_THUNDERX (midr) ? __memcpy_thunderx - : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_ARES (midr) || IS_KUNPENG920 (midr) + : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr) ? __memcpy_falkor : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) ? __memcpy_thunderx2 - : __memcpy_generic)))); + : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) + || IS_NEOVERSE_V1 (midr) + ? __memcpy_simd + : __memcpy_generic))))); # undef memcpy strong_alias (__libc_memcpy, memcpy); diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S new file mode 100644 index 000000000..48bb6d7ca --- /dev/null +++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S @@ -0,0 +1,248 @@ +/* Generic optimized memcpy using SIMD. + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include + +/* Assumptions: + * + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. + * + */ + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend x4 +#define dstend x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_lw w10 +#define tmp1 x14 + +#define A_q q0 +#define B_q q1 +#define C_q q2 +#define D_q q3 +#define E_q q4 +#define F_q q5 +#define G_q q6 +#define H_q q7 + + +/* This implementation supports both memcpy and memmove and shares most code. + It uses unaligned accesses and branchless sequences to keep the code small, + simple and improve performance. + + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check in memmove is negligible since it is only required for large copies. + + Large copies use a software pipelined loop processing 64 bytes per + iteration. The destination pointer is 16-byte aligned to minimize + unaligned accesses. The loop tail is handled by always copying 64 bytes + from the end. */ + +ENTRY (__memcpy_simd) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + add srcend, src, count + add dstend, dstin, count + cmp count, 128 + b.hi L(copy_long) + cmp count, 32 + b.hi L(copy32_128) + + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] + ret + + /* Copy 8-15 bytes. */ +L(copy16): + tbz count, 3, L(copy8) + ldr A_l, [src] + ldr A_h, [srcend, -8] + str A_l, [dstin] + str A_h, [dstend, -8] + ret + + /* Copy 4-7 bytes. */ +L(copy8): + tbz count, 2, L(copy4) + ldr A_lw, [src] + ldr B_lw, [srcend, -4] + str A_lw, [dstin] + str B_lw, [dstend, -4] + ret + + /* Copy 0..3 bytes using a branchless sequence. */ +L(copy4): + cbz count, L(copy0) + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb C_lw, [srcend, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb C_lw, [dstend, -1] +L(copy0): + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +L(copy32_128): + ldp A_q, B_q, [src] + ldp C_q, D_q, [srcend, -32] + cmp count, 64 + b.hi L(copy128) + stp A_q, B_q, [dstin] + stp C_q, D_q, [dstend, -32] + ret + + .p2align 4 + /* Copy 65..128 bytes. */ +L(copy128): + ldp E_q, F_q, [src, 32] + cmp count, 96 + b.ls L(copy96) + ldp G_q, H_q, [srcend, -64] + stp G_q, H_q, [dstend, -64] +L(copy96): + stp A_q, B_q, [dstin] + stp E_q, F_q, [dstin, 32] + stp C_q, D_q, [dstend, -32] + ret + + /* Align loop64 below to 16 bytes. */ + nop + + /* Copy more than 128 bytes. */ +L(copy_long): + /* Copy 16 bytes and then align src to 16-byte alignment. */ + ldr D_q, [src] + and tmp1, src, 15 + bic src, src, 15 + sub dst, dstin, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_q, B_q, [src, 16] + str D_q, [dstin] + ldp C_q, D_q, [src, 48] + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls L(copy64_from_end) +L(loop64): + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [src, 80] + stp C_q, D_q, [dst, 48] + ldp C_q, D_q, [src, 112] + add src, src, 64 + add dst, dst, 64 + subs count, count, 64 + b.hi L(loop64) + + /* Write the last iteration and copy 64 bytes from the end. */ +L(copy64_from_end): + ldp E_q, F_q, [srcend, -64] + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [srcend, -32] + stp C_q, D_q, [dst, 48] + stp E_q, F_q, [dstend, -64] + stp A_q, B_q, [dstend, -32] + ret + +END (__memcpy_simd) +libc_hidden_builtin_def (__memcpy_simd) + + +ENTRY (__memmove_simd) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) + + add srcend, src, count + add dstend, dstin, count + cmp count, 128 + b.hi L(move_long) + cmp count, 32 + b.hi L(copy32_128) + + /* Small moves: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] + ret + +L(move_long): + /* Only use backward copy if there is an overlap. */ + sub tmp1, dstin, src + cbz tmp1, L(move0) + cmp tmp1, count + b.hs L(copy_long) + + /* Large backwards copy for overlapping copies. + Copy 16 bytes and then align srcend to 16-byte alignment. */ +L(copy_long_backwards): + ldr D_q, [srcend, -16] + and tmp1, srcend, 15 + bic srcend, srcend, 15 + sub count, count, tmp1 + ldp A_q, B_q, [srcend, -32] + str D_q, [dstend, -16] + ldp C_q, D_q, [srcend, -64] + sub dstend, dstend, tmp1 + subs count, count, 128 + b.ls L(copy64_from_start) + +L(loop64_backwards): + str B_q, [dstend, -16] + str A_q, [dstend, -32] + ldp A_q, B_q, [srcend, -96] + str D_q, [dstend, -48] + str C_q, [dstend, -64]! + ldp C_q, D_q, [srcend, -128] + sub srcend, srcend, 64 + subs count, count, 64 + b.hi L(loop64_backwards) + + /* Write the last iteration and copy 64 bytes from the start. */ +L(copy64_from_start): + ldp E_q, F_q, [src, 32] + stp A_q, B_q, [dstend, -32] + ldp A_q, B_q, [src] + stp C_q, D_q, [dstend, -64] + stp E_q, F_q, [dstin, 32] + stp A_q, B_q, [dstin] +L(move0): + ret + +END (__memmove_simd) +libc_hidden_builtin_def (__memmove_simd) diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c index ed5a47f6f..46a4cb3a5 100644 --- a/sysdeps/aarch64/multiarch/memmove.c +++ b/sysdeps/aarch64/multiarch/memmove.c @@ -29,6 +29,7 @@ extern __typeof (__redirect_memmove) __libc_memmove; extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; +extern __typeof (__redirect_memmove) __memmove_simd attribute_hidden; extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden; extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; @@ -40,7 +41,10 @@ libc_ifunc (__libc_memmove, ? __memmove_falkor : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) ? __memmove_thunderx2 - : __memmove_generic)))); + : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) + || IS_NEOVERSE_V1 (midr) + ? __memmove_simd + : __memmove_generic))))); # undef memmove strong_alias (__libc_memmove, memmove); diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S index 548130e41..a8ff52c07 100644 --- a/sysdeps/aarch64/strcpy.S +++ b/sysdeps/aarch64/strcpy.S @@ -234,8 +234,13 @@ L(entry_no_page_cross): #endif /* calculate the loc value */ cmeq datav.16b, datav.16b, #0 +#ifdef __AARCH64EB__ + mov data1, datav.d[1] + mov data2, datav.d[0] +#else mov data1, datav.d[0] mov data2, datav.d[1] +#endif cmp data1, 0 csel data1, data1, data2, ne mov pos, 8 diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S index 5981247dd..086a5c7e9 100644 --- a/sysdeps/aarch64/strnlen.S +++ b/sysdeps/aarch64/strnlen.S @@ -154,8 +154,13 @@ L(loop_end): byte. */ cmeq datav.16b, datav.16b, #0 +#ifdef __AARCH64EB__ + mov data1, datav.d[1] + mov data2, datav.d[0] +#else mov data1, datav.d[0] mov data2, datav.d[1] +#endif cmp data1, 0 csel data1, data1, data2, ne sub len, src, srcin diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h index 604c48917..f1feb19dc 100644 --- a/sysdeps/aarch64/sysdep.h +++ b/sysdeps/aarch64/sysdep.h @@ -45,7 +45,7 @@ #define ENTRY(name) \ .globl C_SYMBOL_NAME(name); \ .type C_SYMBOL_NAME(name),%function; \ - .align 4; \ + .p2align 6; \ C_LABEL(name) \ cfi_startproc; \ CALL_MCOUNT diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S index bf4ac7077..379bb56fc 100644 --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S @@ -268,7 +268,7 @@ ENTRY(memcpy) mov dst, dstin /* Preserve dstin, we need to return it. */ cmp count, #64 - bge .Lcpy_not_short + bhs .Lcpy_not_short /* Deal with small copies quickly by dropping straight into the exit block. */ @@ -351,10 +351,10 @@ ENTRY(memcpy) 1: subs tmp2, count, #64 /* Use tmp2 for count. */ - blt .Ltail63aligned + blo .Ltail63aligned cmp tmp2, #512 - bge .Lcpy_body_long + bhs .Lcpy_body_long .Lcpy_body_medium: /* Count in tmp2. */ #ifdef USE_VFP @@ -378,7 +378,7 @@ ENTRY(memcpy) add src, src, #64 vstr d1, [dst, #56] add dst, dst, #64 - bge 1b + bhs 1b tst tmp2, #0x3f beq .Ldone @@ -412,7 +412,7 @@ ENTRY(memcpy) ldrd A_l, A_h, [src, #64]! strd A_l, A_h, [dst, #64]! subs tmp2, tmp2, #64 - bge 1b + bhs 1b tst tmp2, #0x3f bne 1f ldr tmp2,[sp], #FRAME_SIZE @@ -482,7 +482,7 @@ ENTRY(memcpy) add src, src, #32 subs tmp2, tmp2, #prefetch_lines * 64 * 2 - blt 2f + blo 2f 1: cpy_line_vfp d3, 0 cpy_line_vfp d4, 64 @@ -494,7 +494,7 @@ ENTRY(memcpy) add dst, dst, #2 * 64 add src, src, #2 * 64 subs tmp2, tmp2, #prefetch_lines * 64 - bge 1b + bhs 1b 2: cpy_tail_vfp d3, 0 @@ -615,8 +615,8 @@ ENTRY(memcpy) 1: pld [src, #(3 * 64)] subs count, count, #64 - ldrmi tmp2, [sp], #FRAME_SIZE - bmi .Ltail63unaligned + ldrlo tmp2, [sp], #FRAME_SIZE + blo .Ltail63unaligned pld [src, #(4 * 64)] #ifdef USE_NEON @@ -633,7 +633,7 @@ ENTRY(memcpy) neon_load_multi d0-d3, src neon_load_multi d4-d7, src subs count, count, #64 - bmi 2f + blo 2f 1: pld [src, #(4 * 64)] neon_store_multi d0-d3, dst @@ -641,7 +641,7 @@ ENTRY(memcpy) neon_store_multi d4-d7, dst neon_load_multi d4-d7, src subs count, count, #64 - bpl 1b + bhs 1b 2: neon_store_multi d0-d3, dst neon_store_multi d4-d7, dst diff --git a/sysdeps/arm/be/nofpu/Implies b/sysdeps/arm/be/nofpu/Implies new file mode 100644 index 000000000..c90dd7fd5 --- /dev/null +++ b/sysdeps/arm/be/nofpu/Implies @@ -0,0 +1 @@ +arm/nofpu diff --git a/sysdeps/arm/le/nofpu/Implies b/sysdeps/arm/le/nofpu/Implies new file mode 100644 index 000000000..c90dd7fd5 --- /dev/null +++ b/sysdeps/arm/le/nofpu/Implies @@ -0,0 +1 @@ +arm/nofpu diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S index 510e8adaf..bcfbc51d9 100644 --- a/sysdeps/arm/memcpy.S +++ b/sysdeps/arm/memcpy.S @@ -68,7 +68,7 @@ ENTRY(memcpy) cfi_remember_state subs r2, r2, #4 - blt 8f + blo 8f ands ip, r0, #3 PLD( pld [r1, #0] ) bne 9f @@ -82,7 +82,7 @@ ENTRY(memcpy) cfi_rel_offset (r6, 4) cfi_rel_offset (r7, 8) cfi_rel_offset (r8, 12) - blt 5f + blo 5f CALGN( ands ip, r1, #31 ) CALGN( rsb r3, ip, #32 ) @@ -98,9 +98,9 @@ ENTRY(memcpy) #endif PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) +2: PLD( cmp r2, #96 ) PLD( pld [r1, #28] ) - PLD( blt 4f ) + PLD( blo 4f ) PLD( pld [r1, #60] ) PLD( pld [r1, #92] ) @@ -108,9 +108,7 @@ ENTRY(memcpy) 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) + bhs 3b 5: ands ip, r2, #28 rsb ip, ip, #32 @@ -222,7 +220,7 @@ ENTRY(memcpy) strbge r4, [r0], #1 subs r2, r2, ip strb lr, [r0], #1 - blt 8b + blo 8b ands ip, r1, #3 beq 1b @@ -236,7 +234,7 @@ ENTRY(memcpy) .macro forward_copy_shift pull push subs r2, r2, #28 - blt 14f + blo 14f CALGN( ands ip, r1, #31 ) CALGN( rsb ip, ip, #32 ) @@ -253,9 +251,9 @@ ENTRY(memcpy) cfi_rel_offset (r10, 16) PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) + PLD( cmp r2, #96 ) PLD( pld [r1, #28] ) - PLD( blt 13f ) + PLD( blo 13f ) PLD( pld [r1, #60] ) PLD( pld [r1, #92] ) @@ -280,9 +278,7 @@ ENTRY(memcpy) mov ip, ip, PULL #\pull orr ip, ip, lr, PUSH #\push stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) + bhs 12b pop {r5 - r8, r10} cfi_adjust_cfa_offset (-20) diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S index 954037ef3..0d07b76ee 100644 --- a/sysdeps/arm/memmove.S +++ b/sysdeps/arm/memmove.S @@ -85,7 +85,7 @@ ENTRY(memmove) add r1, r1, r2 add r0, r0, r2 subs r2, r2, #4 - blt 8f + blo 8f ands ip, r0, #3 PLD( pld [r1, #-4] ) bne 9f @@ -99,7 +99,7 @@ ENTRY(memmove) cfi_rel_offset (r6, 4) cfi_rel_offset (r7, 8) cfi_rel_offset (r8, 12) - blt 5f + blo 5f CALGN( ands ip, r1, #31 ) CALGN( sbcsne r4, ip, r2 ) @ C is always set here @@ -114,9 +114,9 @@ ENTRY(memmove) #endif PLD( pld [r1, #-4] ) -2: PLD( subs r2, r2, #96 ) +2: PLD( cmp r2, #96 ) PLD( pld [r1, #-32] ) - PLD( blt 4f ) + PLD( blo 4f ) PLD( pld [r1, #-64] ) PLD( pld [r1, #-96] ) @@ -124,9 +124,7 @@ ENTRY(memmove) 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) + bhs 3b 5: ands ip, r2, #28 rsb ip, ip, #32 @@ -237,7 +235,7 @@ ENTRY(memmove) strbge r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! - blt 8b + blo 8b ands ip, r1, #3 beq 1b @@ -251,7 +249,7 @@ ENTRY(memmove) .macro backward_copy_shift push pull subs r2, r2, #28 - blt 14f + blo 14f CALGN( ands ip, r1, #31 ) CALGN( rsb ip, ip, #32 ) @@ -268,9 +266,9 @@ ENTRY(memmove) cfi_rel_offset (r10, 16) PLD( pld [r1, #-4] ) - PLD( subs r2, r2, #96 ) + PLD( cmp r2, #96 ) PLD( pld [r1, #-32] ) - PLD( blt 13f ) + PLD( blo 13f ) PLD( pld [r1, #-64] ) PLD( pld [r1, #-96] ) @@ -295,9 +293,7 @@ ENTRY(memmove) mov r4, r4, PUSH #\push orr r4, r4, r3, PULL #\pull stmdb r0!, {r4 - r8, r10, ip, lr} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) + bhs 12b pop {r5 - r8, r10} cfi_adjust_cfa_offset (-20) diff --git a/sysdeps/generic/unwind-arch.h b/sysdeps/generic/unwind-arch.h new file mode 100644 index 000000000..d712e5e11 --- /dev/null +++ b/sysdeps/generic/unwind-arch.h @@ -0,0 +1,30 @@ +/* Return backtrace of current program state. Arch-specific bits. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _UNWIND_ARCH_H +#define _UNWIND_ARCH_H + +#include + +static inline void * +unwind_arch_adjustment (void *prev, void *addr) +{ + return addr; +} + +#endif diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c index 0a3739728..25ca8f846 100644 --- a/sysdeps/hppa/dl-fptr.c +++ b/sysdeps/hppa/dl-fptr.c @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp) } install: - fdesc->ip = ip; fdesc->gp = gp; + fdesc->ip = ip; return (ElfW(Addr)) fdesc; } @@ -350,7 +350,9 @@ ElfW(Addr) _dl_lookup_address (const void *address) { ElfW(Addr) addr = (ElfW(Addr)) address; - unsigned int *desc, *gptr; + ElfW(Word) reloc_arg; + volatile unsigned int *desc; + unsigned int *gptr; /* Return ADDR if the least-significant two bits of ADDR are not consistent with ADDR being a linker defined function pointer. The normal value for @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address) if (!_dl_read_access_allowed (desc)) return addr; - /* Load first word of candidate descriptor. It should be a pointer + /* First load the relocation offset. */ + reloc_arg = (ElfW(Word)) desc[1]; + atomic_full_barrier(); + + /* Then load first word of candidate descriptor. It should be a pointer with word alignment and point to memory that can be read. */ gptr = (unsigned int *) desc[0]; if (((unsigned int) gptr & 3) != 0 @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address) /* See if descriptor requires resolution. The following trampoline is used in each global offset table for function resolution: - ldw 0(r20),r22 - bv r0(r22) + ldw 0(r20),r21 + bv r0(r21) ldw 4(r20),r21 tramp: b,l .-12,r20 depwi 0,31,2,r20 @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address) if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */ && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */ && (ElfW(Addr)) gptr[2] == elf_machine_resolve ()) - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]); + { + struct link_map *l = (struct link_map *) gptr[5]; + + /* If gp has been resolved, we need to hunt for relocation offset. */ + if (!(reloc_arg & PA_GP_RELOC)) + reloc_arg = _dl_fix_reloc_arg (addr, l); + + _dl_fixup (l, reloc_arg); + } return (ElfW(Addr)) desc[0]; } diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h index 9e98366ea..8ecff9770 100644 --- a/sysdeps/hppa/dl-machine.h +++ b/sysdeps/hppa/dl-machine.h @@ -48,6 +48,14 @@ #define GOT_FROM_PLT_STUB (4*4) #define PLT_ENTRY_SIZE (2*4) +/* The gp slot in the function descriptor contains the relocation offset + before resolution. To distinguish between a resolved gp value and an + unresolved relocation offset we set an unused bit in the relocation + offset. This would allow us to do a synchronzied two word update + using this bit (interlocked update), but instead of waiting for the + update we simply recompute the gp value given that we know the ip. */ +#define PA_GP_RELOC 1 + /* Initialize the function descriptor table before relocations */ static inline void __hppa_init_bootstrap_fdesc_table (struct link_map *map) @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, volatile Elf32_Addr *rfdesc = reloc_addr; /* map is the link_map for the caller, t is the link_map for the object being called */ - rfdesc[1] = value.gp; - /* Need to ensure that the gp is visible before the code - entry point is updated */ - rfdesc[0] = value.ip; + + /* We would like the function descriptor to be double word aligned. This + helps performance (ip and gp then reside on the same cache line) and + we can update the pair atomically with a single store. The linker + now ensures this alignment but we still have to handle old code. */ + if ((unsigned int)reloc_addr & 7) + { + /* Need to ensure that the gp is visible before the code + entry point is updated */ + rfdesc[1] = value.gp; + atomic_full_barrier(); + rfdesc[0] = value.ip; + } + else + { + /* Update pair atomically with floating point store. */ + union { ElfW(Word) v[2]; double d; } u; + + u.v[0] = value.ip; + u.v[1] = value.gp; + *(volatile double *)rfdesc = u.d; + } return value; } @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) here. The trampoline code will load the proper LTP and pass the reloc offset to the fixup function. */ - fptr->gp = iplt - jmprel; + fptr->gp = (iplt - jmprel) | PA_GP_RELOC; } /* r_sym != 0 */ else { diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c new file mode 100644 index 000000000..885a3f183 --- /dev/null +++ b/sysdeps/hppa/dl-runtime.c @@ -0,0 +1,58 @@ +/* On-demand PLT fixup for shared objects. HPPA version. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Clear PA_GP_RELOC bit in relocation offset. */ +#define reloc_offset (reloc_arg & ~PA_GP_RELOC) +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL) + +#include + +/* The caller has encountered a partially relocated function descriptor. + The gp of the descriptor has been updated, but not the ip. We find + the function descriptor again and compute the relocation offset and + return that to the caller. The caller will continue on to call + _dl_fixup with the relocation offset. */ + +ElfW(Word) +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) +{ + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; + const Elf32_Rela *reloc; + + l_addr = l->l_addr; + jmprel = D_PTR(l, l_info[DT_JMPREL]); + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val; + + /* Look for the entry... */ + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela)) + { + reloc = (const Elf32_Rela *) iplt; + r_type = ELF32_R_TYPE (reloc->r_info); + + if (__builtin_expect (r_type == R_PARISC_IPLT, 1) + && fptr == (struct fdesc *) (reloc->r_offset + l_addr)) + /* Found entry. Return the reloc offset. */ + return iplt - jmprel; + } + + /* Crash if we weren't passed a valid function pointer. */ + ABORT_INSTRUCTION; + return 0; +} diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S index 0114ca8b1..d0804b30c 100644 --- a/sysdeps/hppa/dl-trampoline.S +++ b/sysdeps/hppa/dl-trampoline.S @@ -31,7 +31,7 @@ slow down __cffc when it attempts to call fixup to resolve function descriptor references. Please refer to gcc/gcc/config/pa/fptr.c - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */ + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */ /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */ .text @@ -61,17 +61,20 @@ _dl_runtime_resolve: copy %sp, %r1 /* Copy previous sp */ /* Save function result address (on entry) */ stwm %r28,128(%sp) - /* Fillin some frame info to follow ABI */ + /* Fill in some frame info to follow ABI */ stw %r1,-4(%sp) /* Previous sp */ stw %r21,-32(%sp) /* PIC register value */ /* Save input floating point registers. This must be done in the new frame since the previous frame doesn't have enough space */ - ldo -56(%sp),%r1 + ldo -64(%sp),%r1 fstd,ma %fr4,-8(%r1) fstd,ma %fr5,-8(%r1) fstd,ma %fr6,-8(%r1) + + /* Test PA_GP_RELOC bit. */ + bb,>= %r19,31,2f /* branch if not reloc offset */ fstd,ma %fr7,-8(%r1) /* Set up args to fixup func, needs only two arguments */ @@ -79,7 +82,7 @@ _dl_runtime_resolve: copy %r19,%r25 /* (2) reloc offset */ /* Call the real address resolver. */ - bl _dl_fixup,%rp +3: bl _dl_fixup,%rp copy %r21,%r19 /* set fixup func ltp */ /* While the linker will set a function pointer to NULL when it @@ -102,7 +105,7 @@ _dl_runtime_resolve: copy %r29, %r19 /* Reload arguments fp args */ - ldo -56(%sp),%r1 + ldo -64(%sp),%r1 fldd,ma -8(%r1),%fr4 fldd,ma -8(%r1),%fr5 fldd,ma -8(%r1),%fr6 @@ -129,6 +132,25 @@ _dl_runtime_resolve: bv %r0(%rp) ldo -128(%sp),%sp +2: + /* Set up args for _dl_fix_reloc_arg. */ + copy %r22,%r26 /* (1) function pointer */ + depi 0,31,2,%r26 /* clear least significant bits */ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + /* Save ltp and link map arg for _dl_fixup. */ + stw %r21,-56(%sp) /* ltp */ + stw %r25,-60(%sp) /* struct link map */ + + /* Find reloc offset. */ + bl _dl_fix_reloc_arg,%rp + copy %r21,%r19 /* set func ltp */ + + /* Set up args for _dl_fixup. */ + ldw -56(%sp),%r21 /* ltp */ + ldw -60(%sp),%r26 /* (1) struct link map */ + b 3b + copy %ret0,%r25 /* (2) reloc offset */ .EXIT .PROCEND cfi_endproc @@ -153,7 +175,7 @@ _dl_runtime_profile: copy %sp, %r1 /* Copy previous sp */ /* Save function result address (on entry) */ stwm %r28,192(%sp) - /* Fillin some frame info to follow ABI */ + /* Fill in some frame info to follow ABI */ stw %r1,-4(%sp) /* Previous sp */ stw %r21,-32(%sp) /* PIC register value */ @@ -181,10 +203,11 @@ _dl_runtime_profile: fstd,ma %fr5,8(%r1) fstd,ma %fr6,8(%r1) fstd,ma %fr7,8(%r1) - /* 32-bit stack pointer and return register */ - stw %sp,-56(%sp) - stw %r2,-52(%sp) + /* Test PA_GP_RELOC bit. */ + bb,>= %r19,31,2f /* branch if not reloc offset */ + /* 32-bit stack pointer */ + stw %sp,-56(%sp) /* Set up args to fixup func, needs five arguments */ ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */ @@ -197,7 +220,7 @@ _dl_runtime_profile: stw %r1, -52(%sp) /* (5) long int *framesizep */ /* Call the real address resolver. */ - bl _dl_profile_fixup,%rp +3: bl _dl_profile_fixup,%rp copy %r21,%r19 /* set fixup func ltp */ /* Load up the returned function descriptor */ @@ -215,7 +238,9 @@ _dl_runtime_profile: fldd,ma 8(%r1),%fr5 fldd,ma 8(%r1),%fr6 fldd,ma 8(%r1),%fr7 - ldw -52(%sp),%rp + + /* Reload rp register -(192+20) without adjusting stack */ + ldw -212(%sp),%rp /* Reload static link register -(192+16) without adjusting stack */ ldw -208(%sp),%r29 @@ -303,6 +328,33 @@ L(cont): ldw -20(%sp),%rp /* Return */ bv,n 0(%r2) + +2: + /* Set up args for _dl_fix_reloc_arg. */ + copy %r22,%r26 /* (1) function pointer */ + depi 0,31,2,%r26 /* clear least significant bits */ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + /* Save ltp and link map arg for _dl_fixup. */ + stw %r21,-92(%sp) /* ltp */ + stw %r25,-116(%sp) /* struct link map */ + + /* Find reloc offset. */ + bl _dl_fix_reloc_arg,%rp + copy %r21,%r19 /* set func ltp */ + + /* Restore fixup ltp. */ + ldw -92(%sp),%r21 /* ltp */ + + /* Set up args to fixup func, needs five arguments */ + ldw -116(%sp),%r26 /* (1) struct link map */ + copy %ret0,%r25 /* (2) reloc offset */ + stw %r25,-120(%sp) /* Save reloc offset */ + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */ + ldo -56(%sp),%r23 /* (4) La_hppa_regs */ + ldo -112(%sp), %r1 + b 3b + stw %r1, -52(%sp) /* (5) long int *framesizep */ .EXIT .PROCEND cfi_endproc diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index 8af0789a9..4334ade2a 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, { # ifndef RTLD_BOOTSTRAP if (sym_map != map - && sym_map->l_type != lt_executable && !sym_map->l_relocated) { const char *strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ + if (sym_map->l_type == lt_executable) + _dl_fatal_printf ("\ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ +and creates an unsatisfiable circular dependency.\n", + RTLD_PROGNAME, strtab + refsym->st_name, + map->l_name); + else + _dl_error_printf ("\ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - RTLD_PROGNAME, map->l_name, - sym_map->l_name, - strtab + refsym->st_name); + RTLD_PROGNAME, map->l_name, + sym_map->l_name, + strtab + refsym->st_name); } # endif value = ((Elf32_Addr (*) (void)) value) (); diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h index b4bcd8fb6..6094af8fe 100644 --- a/sysdeps/i386/sysdep.h +++ b/sysdeps/i386/sysdep.h @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \ # define SETUP_PIC_REG(reg) \ .ifndef GET_PC_THUNK(reg); \ - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \ + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \ .globl GET_PC_THUNK(reg); \ .hidden GET_PC_THUNK(reg); \ .p2align 4; \ @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \ # define SETUP_PIC_REG_STR(reg) \ ".ifndef " GET_PC_THUNK_STR (reg) "\n" \ - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \ + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \ + GET_PC_THUNK_STR (reg) ",comdat\n" \ ".globl " GET_PC_THUNK_STR (reg) "\n" \ ".hidden " GET_PC_THUNK_STR (reg) "\n" \ ".p2align 4\n" \ diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile index 995e90d6d..6030adf7e 100644 --- a/sysdeps/ieee754/ldbl-96/Makefile +++ b/sysdeps/ieee754/ldbl-96/Makefile @@ -17,5 +17,8 @@ # . ifeq ($(subdir),math) -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo +ifeq ($(have-ssp),yes) +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all endif +endif # $(subdir) == math diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c index 5f742321a..bcdf20179 100644 --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y) return 0; } + if ((i0 & 0x80000000) == 0) + { + /* Pseudo-zero and unnormal representations are not valid + representations of long double. We need to avoid stack + corruption in __kernel_rem_pio2, which expects input in a + particular normal form, but those representations do not need + to be consistently handled like any particular floating-point + value. */ + y[1] = y[0] = __builtin_nanl (""); + return 0; + } + /* Split the 64 bits of the mantissa into three 24-bit integers stored in a double array. */ exp = j0 - 23; diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c new file mode 100644 index 000000000..f59b97769 --- /dev/null +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c @@ -0,0 +1,41 @@ +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + for (int i = 0; i < 64; i++) + { + uint64_t sig = i == 63 ? 0 : 1ULL << i; + long double ld; + SET_LDOUBLE_WORDS (ld, 0x4141, + sig >> 32, sig & 0xffffffffULL); + /* The requirement is that no stack overflow occurs when the + pseudo-zero or unnormal goes through range reduction. */ + volatile long double ldr; + ldr = sinl (ld); + (void) ldr; + } + return 0; +} + +#include diff --git a/sysdeps/posix/getcwd.c b/sysdeps/posix/getcwd.c index f00b337a1..839d78d7b 100644 --- a/sysdeps/posix/getcwd.c +++ b/sysdeps/posix/getcwd.c @@ -241,6 +241,14 @@ __getcwd (char *buf, size_t size) char *path; #ifndef NO_ALLOCATION size_t allocated = size; + + /* A size of 1 byte is never useful. */ + if (allocated == 1) + { + __set_errno (ERANGE); + return NULL; + } + if (size == 0) { if (buf != NULL) diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c index e613e6a34..a03f478fc 100644 --- a/sysdeps/posix/system.c +++ b/sysdeps/posix/system.c @@ -101,7 +101,8 @@ cancel_handler (void *arg) static int do_system (const char *line) { - int status; + int status = -1; + int ret; pid_t pid; struct sigaction sa; #ifndef _LIBC_REENTRANT @@ -144,14 +145,14 @@ do_system (const char *line) __posix_spawnattr_setflags (&spawn_attr, POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK); - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, - (char *const[]){ (char*) SHELL_NAME, - (char*) "-c", - (char *) line, NULL }, - __environ); + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, + (char *const[]){ (char *) SHELL_NAME, + (char *) "-c", + (char *) line, NULL }, + __environ); __posix_spawnattr_destroy (&spawn_attr); - if (status == 0) + if (ret == 0) { /* Cancellation results in cleanup handlers running as exceptions in the block where they were installed, so it is safe to reference @@ -186,6 +187,9 @@ do_system (const char *line) } DO_UNLOCK (); + if (ret != 0) + __set_errno (ret); + return status; } diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h index 2ba009e91..829eec266 100644 --- a/sysdeps/powerpc/powerpc32/sysdep.h +++ b/sysdeps/powerpc/powerpc32/sysdep.h @@ -179,8 +179,8 @@ GOT_LABEL: ; \ #else /* Position-dependent code does not require access to the GOT. */ # define __GLRO(rOUT, rGOT, member, offset) \ - lis rOUT,(member+LOWORD)@ha; \ - lwz rOUT,(member+LOWORD)@l(rOUT) + lis rOUT,(member)@ha; \ + lwz rOUT,(member)@l(rOUT) #endif /* PIC */ #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c index 8a53a1088..362a2b713 100644 --- a/sysdeps/powerpc/powerpc64/backtrace.c +++ b/sysdeps/powerpc/powerpc64/backtrace.c @@ -54,11 +54,22 @@ struct signal_frame_64 { /* We don't care about the rest, since the IP value is at 'uc' field. */ }; +/* Test if the address match to the inside the trampoline code. + Up to and including kernel 5.8, returning from an interrupt or syscall to a + signal handler starts execution directly at the handler's entry point, with + LR set to address of the sigreturn trampoline (the vDSO symbol). + Newer kernels will branch to signal handler from the trampoline instead, so + checking the stacktrace against the vDSO entrypoint does not work in such + case. + The vDSO branches with a 'bctrl' instruction, so checking either the + vDSO address itself and the next instruction should cover all kernel + versions. */ static inline bool is_sigtramp_address (void *nip) { #ifdef HAVE_SIGTRAMP_RT64 - if (nip == GLRO (dl_vdso_sigtramp_rt64)) + if (nip == GLRO (dl_vdso_sigtramp_rt64) || + nip == GLRO (dl_vdso_sigtramp_rt64) + 4) return true; #endif return false; diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure index fa46e9e35..e7f576338 100644 --- a/sysdeps/s390/configure +++ b/sysdeps/s390/configure @@ -123,7 +123,9 @@ void testinsn (char *buf) __asm__ (".machine \"arch13\" \n\t" ".machinemode \"zarch_nohighgprs\" \n\t" "lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c @@ -271,7 +273,9 @@ else void testinsn (char *buf) { __asm__ ("lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac index 3ed5a8ef8..5c3479e8c 100644 --- a/sysdeps/s390/configure.ac +++ b/sysdeps/s390/configure.ac @@ -88,7 +88,9 @@ void testinsn (char *buf) __asm__ (".machine \"arch13\" \n\t" ".machinemode \"zarch_nohighgprs\" \n\t" "lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF dnl test, if assembler supports S390 arch13 instructions @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF void testinsn (char *buf) { __asm__ ("lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF dnl test, if assembler supports S390 arch13 zarch instructions as default diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c index 5fc85e129..ee59b5de1 100644 --- a/sysdeps/s390/memmove.c +++ b/sysdeps/s390/memmove.c @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; s390_libc_ifunc_expr (__redirect_memmove, memmove, ({ s390_libc_ifunc_expr_stfle_init (); - (HAVE_MEMMOVE_ARCH13 + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) && S390_IS_ARCH13_MIE3 (stfle_bits)) ? MEMMOVE_ARCH13 : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index e6195c6e2..17c0cc395 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, memmove, # if HAVE_MEMMOVE_ARCH13 IFUNC_IMPL_ADD (array, i, memmove, - S390_IS_ARCH13_MIE3 (stfle_bits), + ((dl_hwcap & HWCAP_S390_VXRS_EXT2) + && S390_IS_ARCH13_MIE3 (stfle_bits)), MEMMOVE_ARCH13) # endif # if HAVE_MEMMOVE_Z13 diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies new file mode 100644 index 000000000..71b28ee1a --- /dev/null +++ b/sysdeps/sh/be/sh4/fpu/Implies @@ -0,0 +1 @@ +sh/sh4/fpu diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies new file mode 100644 index 000000000..71b28ee1a --- /dev/null +++ b/sysdeps/sh/le/sh4/fpu/Implies @@ -0,0 +1 @@ +sh/sh4/fpu diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh index c07626677..4f6c3490a 100644 --- a/sysdeps/unix/make-syscalls.sh +++ b/sysdeps/unix/make-syscalls.sh @@ -30,6 +30,7 @@ # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction) # s: non-NULL string (e.g., 1st arg to open) # S: optionally-NULL string (e.g., 1st arg to acct) +# U: unsigned long int (32-bit types are zero-extended to 64-bit types) # v: vararg scalar (e.g., optional 3rd arg to open) # V: byte-per-page vector (3rd arg to mincore) # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4) @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do ?:?????????) nargs=9;; esac + # Derive the unsigned long int arguments from the argument signature + ulong_arg_1=0 + ulong_arg_2=0 + ulong_count=0 + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U) + do + ulong_count=$(expr $ulong_count + 1) + ulong_arg=$(echo $U | sed -e "s/:U//") + case $ulong_count in + 1) + ulong_arg_1=$ulong_arg + ;; + 2) + ulong_arg_2=$ulong_arg + ;; + *) + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)" + exit 2 + esac + done + # Make sure only the first syscall rule is used, if multiple dirs # define the same syscall. echo '' @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do \$(make-target-directory) (echo '#define SYSCALL_NAME $syscall'; \\ echo '#define SYSCALL_NARGS $nargs'; \\ + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\ + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\ echo '#define SYSCALL_SYMBOL $strong'; \\ echo '#define SYSCALL_NOERRNO $noerrno'; \\ echo '#define SYSCALL_ERRVAL $errval'; \\ diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S index cf6c7a58f..f807a8603 100644 --- a/sysdeps/unix/syscall-template.S +++ b/sysdeps/unix/syscall-template.S @@ -25,6 +25,12 @@ defining a few macros: SYSCALL_NAME syscall name SYSCALL_NARGS number of arguments this call takes + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this + call takes. 0 means that there are no + unsigned long int arguments. + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this + call takes. 0 means that there is at most + one unsigned long int argument. SYSCALL_SYMBOL primary symbol name SYSCALL_NOERRNO 1 to define a no-errno version (see below) SYSCALL_ERRVAL 1 to define an error-value version (see below) @@ -44,9 +50,31 @@ /* This indirection is needed so that SYMBOL gets macro-expanded. */ #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL) -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N) -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N) -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N) +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros + have 2 extra arguments for unsigned long int arguments: + Extra argument 1: Position of the first unsigned long int argument. + Extra argument 2: Position of the second unsigned long int argument. + */ +#ifndef PSEUDOS_HAVE_ULONG_INDICES +# undef SYSCALL_ULONG_ARG_1 +# define SYSCALL_ULONG_ARG_1 0 +#endif + +#if SYSCALL_ULONG_ARG_1 +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \ + PSEUDO (SYMBOL, NAME, N, U1, U2) +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \ + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2) +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \ + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2) +#else +# define T_PSEUDO(SYMBOL, NAME, N) \ + PSEUDO (SYMBOL, NAME, N) +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \ + PSEUDO_NOERRNO (SYMBOL, NAME, N) +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \ + PSEUDO_ERRVAL (SYMBOL, NAME, N) +#endif #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL) #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL) #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL) @@ -56,7 +84,12 @@ /* This kind of system call stub never returns an error. We return the return value register to the caller unexamined. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret_NOERRNO T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) value, or zero for success. We may massage the kernel's return value to meet that ABI, but we never set errno here. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret_ERRVAL T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) /* This is a "normal" system call stub: if there is an error, it returns -1 and sets errno. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret T_PSEUDO_END (SYSCALL_SYMBOL) diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list index e28e801c7..6b22b2cb4 100644 --- a/sysdeps/unix/syscalls.list +++ b/sysdeps/unix/syscalls.list @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill link - link i:ss __link link listen - listen i:ii __listen listen lseek - lseek i:iii __libc_lseek __lseek lseek -madvise - madvise i:pii __madvise madvise +madvise - madvise i:pUi __madvise madvise mkdir - mkdir i:si __mkdir mkdir -mmap - mmap b:aniiii __mmap mmap -mprotect - mprotect i:aii __mprotect mprotect -munmap - munmap i:ai __munmap munmap +mmap - mmap b:aUiiii __mmap mmap +mprotect - mprotect i:aUi __mprotect mprotect +munmap - munmap i:aU __munmap munmap open - open Ci:siv __libc_open __open open profil - profil i:piii __profil profil ptrace - ptrace i:iiii ptrace -read - read Ci:ibn __libc_read __read read -readlink - readlink i:spi __readlink readlink +read - read Ci:ibU __libc_read __read read +readlink - readlink i:spU __readlink readlink readv - readv Ci:ipi __readv readv reboot - reboot i:i reboot -recv - recv Ci:ibni __libc_recv recv -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom +recv - recv Ci:ibUi __libc_recv recv +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg rename - rename i:ss rename rmdir - rmdir i:s __rmdir rmdir select - select Ci:iPPPP __select __libc_select select -send - send Ci:ibni __libc_send __send send +send - send Ci:ibUi __libc_send __send send sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto setdomain - setdomainname i:si setdomainname setegid - setegid i:i __setegid setegid seteuid - seteuid i:i __seteuid seteuid @@ -94,5 +94,5 @@ uname - uname i:p __uname uname unlink - unlink i:s __unlink unlink utimes - utimes i:sp __utimes utimes vhangup - vhangup i:i vhangup -write - write Ci:ibn __libc_write __write write +write - write Ci:ibU __libc_write __write write writev - writev Ci:ipi __writev writev diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index f12b7b1a2..0a0da0015 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \ setfsuid setfsgid epoll_pwait signalfd \ eventfd eventfd_read eventfd_write prlimit \ personality epoll_wait tee vmsplice splice \ - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \ + prctl \ + process_vm_readv process_vm_writev CFLAGS-gethostid.c = -fexceptions CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables @@ -273,7 +275,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \ sysdep_headers += bits/fcntl-linux.h -tests += tst-fallocate tst-fallocate64 +tests += tst-fallocate tst-fallocate64 tst-getcwd-smallbuff endif ifeq ($(subdir),elf) diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h index 937838774..c8471947b 100644 --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h @@ -17,6 +17,7 @@ #define __NR_clock_nanosleep 115 #define __NR_clock_settime 112 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_connect 203 #define __NR_copy_file_range 285 diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h index 1389cea1b..346d045fb 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h @@ -51,8 +51,12 @@ #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ && MIDR_PARTNUM(midr) == 0x000) -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - && MIDR_PARTNUM(midr) == 0xd0c) +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd0c) +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd49) +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd40) #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ && MIDR_PARTNUM(midr) == 0x000) diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data index a60053b91..08af68b5e 100644 --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data @@ -7,6 +7,9 @@ libc.so: malloc libc.so: memalign libc.so: realloc libm.so: matherr +# If outline atomics are used, libgcc (built outside of glibc) may +# call __getauxval using the PLT. +libc.so: __getauxval ? # The dynamic loader needs __tls_get_addr for TLS. ld.so: __tls_get_addr # The main malloc is interposed into the dynamic linker, for diff --git a/sysdeps/unix/sysv/linux/getpt.c b/sysdeps/unix/sysv/linux/getpt.c index 1803b232c..3cc745e11 100644 --- a/sysdeps/unix/sysv/linux/getpt.c +++ b/sysdeps/unix/sysv/linux/getpt.c @@ -16,69 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include #include -#include #include #include -#include - -#include "linux_fsinfo.h" /* Path to the master pseudo terminal cloning device. */ #define _PATH_DEVPTMX _PATH_DEV "ptmx" -/* Directory containing the UNIX98 pseudo terminals. */ -#define _PATH_DEVPTS _PATH_DEV "pts" - -/* Prototype for function that opens BSD-style master pseudo-terminals. */ -extern int __bsd_getpt (void) attribute_hidden; /* Open a master pseudo terminal and return its file descriptor. */ int __posix_openpt (int oflag) { - static int have_no_dev_ptmx; - int fd; - - if (!have_no_dev_ptmx) - { - fd = __open (_PATH_DEVPTMX, oflag); - if (fd != -1) - { - struct statfs fsbuf; - static int devpts_mounted; - - /* Check that the /dev/pts filesystem is mounted - or if /dev is a devfs filesystem (this implies /dev/pts). */ - if (devpts_mounted - || (__statfs (_PATH_DEVPTS, &fsbuf) == 0 - && fsbuf.f_type == DEVPTS_SUPER_MAGIC) - || (__statfs (_PATH_DEV, &fsbuf) == 0 - && fsbuf.f_type == DEVFS_SUPER_MAGIC)) - { - /* Everything is ok. */ - devpts_mounted = 1; - return fd; - } - - /* If /dev/pts is not mounted then the UNIX98 pseudo terminals - are not usable. */ - __close (fd); - have_no_dev_ptmx = 1; - __set_errno (ENOENT); - } - else - { - if (errno == ENOENT || errno == ENODEV) - have_no_dev_ptmx = 1; - else - return -1; - } - } - else - __set_errno (ENOENT); - - return -1; + return __open (_PATH_DEVPTMX, oflag); } weak_alias (__posix_openpt, posix_openpt) @@ -86,16 +35,6 @@ weak_alias (__posix_openpt, posix_openpt) int __getpt (void) { - int fd = __posix_openpt (O_RDWR); - if (fd == -1) - fd = __bsd_getpt (); - return fd; + return __posix_openpt (O_RDWR); } - - -#define PTYNAME1 "pqrstuvwxyzabcde"; -#define PTYNAME2 "0123456789abcdef"; - -#define __getpt __bsd_getpt -#define HAVE_POSIX_OPENPT -#include +weak_alias (__getpt, getpt) diff --git a/sysdeps/unix/sysv/linux/grantpt.c b/sysdeps/unix/sysv/linux/grantpt.c index 2030e07fa..43122f9a7 100644 --- a/sysdeps/unix/sysv/linux/grantpt.c +++ b/sysdeps/unix/sysv/linux/grantpt.c @@ -1,44 +1,41 @@ -#include -#include -#include -#include -#include -#include -#include -#include +/* grantpt implementation for Linux. + Copyright (C) 1998-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Zack Weinberg , 1998. -#include + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -#include "pty-private.h" + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#if HAVE_PT_CHOWN -/* Close all file descriptors except the one specified. */ -static void -close_all_fds (void) -{ - DIR *dir = __opendir ("/proc/self/fd"); - if (dir != NULL) - { - struct dirent64 *d; - while ((d = __readdir64 (dir)) != NULL) - if (isdigit (d->d_name[0])) - { - char *endp; - long int fd = strtol (d->d_name, &endp, 10); - if (*endp == '\0' && fd != PTY_FILENO && fd != dirfd (dir)) - __close_nocancel_nostatus (fd); - } + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include - __closedir (dir); +int +grantpt (int fd) +{ + /* Without pt_chown on Linux, we have delegated the creation of the + pty node with the right group and permission mode to the kernel, and + non-root users are unlikely to be able to change it. Therefore let's + consider that POSIX enforcement is the responsibility of the whole + system and not only the GNU libc. */ - int nullfd = __open_nocancel (_PATH_DEVNULL, O_RDONLY); - assert (nullfd == STDIN_FILENO); - nullfd = __open_nocancel (_PATH_DEVNULL, O_WRONLY); - assert (nullfd == STDOUT_FILENO); - __dup2 (STDOUT_FILENO, STDERR_FILENO); - } + /* Verify that fd refers to a ptmx descriptor. */ + unsigned int ptyno; + int ret = __ioctl (fd, TIOCGPTN, &ptyno); + if (ret != 0 && errno == ENOTTY) + /* POSIX requires EINVAL instead of ENOTTY provided by the kernel. */ + __set_errno (EINVAL); + return ret; } -# define CLOSE_ALL_FDS() close_all_fds() -#endif - -#include diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h index 9d8ffbe86..bf61b66b7 100644 --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t; typedef intmax_t atomic_max_t; typedef uintmax_t uatomic_max_t; +#define atomic_full_barrier() __sync_synchronize () + #define __HAVE_64B_ATOMICS 0 #define USE_ATOMIC_COMPILER_BUILTINS 0 +/* We use the compiler atomic load and store builtins as the generic + defines are not atomic. In particular, we need to use compare and + exchange for stores as the implementation is synthesized. */ +void __atomic_link_error (void); +#define __atomic_check_size_ls(mem) \ + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ + __atomic_link_error (); + +#define atomic_load_relaxed(mem) \ + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_RELAXED); }) +#define atomic_load_acquire(mem) \ + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) + +#define atomic_store_relaxed(mem, val) \ + do { \ + __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ + } while (0) +#define atomic_store_release(mem, val) \ + do { \ + __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ + } while (0) + /* XXX Is this actually correct? */ #define ATOMIC_EXCHANGE_USES_CAS 1 diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h index ed873d9dd..796663a23 100644 --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall0(name,dummy) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r12) \ @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall1(name,arg1) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ + long int __arg1 = (long int) (arg1); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r12) \ @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall2(name,arg1,arg2) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r12) \ @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall3(name,arg1,arg2,arg3) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \ @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall4(name,arg1,arg2,arg3,arg4) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \ @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ - register long __r9 __asm__("r9") = (long)(arg5); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + long int __arg5 = (long int) (arg5); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ + register long int __r9 __asm__("r9") = __arg5; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \ @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ - register long __r9 __asm__("r9") = (long)(arg5); \ - register long __r10 __asm__("r10") = (long)(arg6); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + long int __arg5 = (long int) (arg5); \ + long int __arg6 = (long int) (arg6); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ + register long int __r9 __asm__("r9") = __arg5; \ + register long int __r10 __asm__("r10") = __arg6; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S index b2bbf1018..ff445a540 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S @@ -22,9 +22,9 @@ .text .set nomips16 -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4, - long arg5, - long number) */ +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, + long int number) */ ENTRY(__mips_syscall5) lw v0, 20(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S index 572d7c113..2b4a3117d 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S @@ -22,9 +22,9 @@ .text .set nomips16 -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4, - long arg5, long arg6, - long number) */ +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, long int arg6, + long int number) */ ENTRY(__mips_syscall6) lw v0, 24(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S index 05164cb25..2723bbb13 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S @@ -22,9 +22,10 @@ .text .set nomips16 -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4, - long arg5, long arg6, long arg7, - long number) */ +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, long int arg6, + long int arg7, + long int number) */ ENTRY(__mips_syscall7) lw v0, 28(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h index 9bf551ace..f23ede025 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h @@ -19,51 +19,57 @@ #ifndef MIPS16_SYSCALL_H #define MIPS16_SYSCALL_H 1 -long long __nomips16 __mips16_syscall0 (long number); +long long int __nomips16 __mips16_syscall0 (long int number); #define __mips16_syscall0(dummy, number) \ - __mips16_syscall0 ((long) (number)) + __mips16_syscall0 ((long int) (number)) -long long __nomips16 __mips16_syscall1 (long a0, - long number); +long long int __nomips16 __mips16_syscall1 (long int a0, + long int number); #define __mips16_syscall1(a0, number) \ - __mips16_syscall1 ((long) (a0), \ - (long) (number)) + __mips16_syscall1 ((long int) (a0), \ + (long int) (number)) -long long __nomips16 __mips16_syscall2 (long a0, long a1, - long number); +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1, + long int number); #define __mips16_syscall2(a0, a1, number) \ - __mips16_syscall2 ((long) (a0), (long) (a1), \ - (long) (number)) + __mips16_syscall2 ((long int) (a0), (long int) (a1), \ + (long int) (number)) -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2, - long number); +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1, + long int a2, + long int number); #define __mips16_syscall3(a0, a1, a2, number) \ - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (number)) + __mips16_syscall3 ((long int) (a0), (long int) (a1), \ + (long int) (a2), \ + (long int) (number)) -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3, - long number); +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1, + long int a2, long int a3, + long int number); #define __mips16_syscall4(a0, a1, a2, a3, number) \ - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), \ - (long) (number)) + __mips16_syscall4 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (number)) /* The remaining ones use regular MIPS wrappers. */ #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \ - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), \ - (long) (number)) + __mips_syscall5 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), \ + (long int) (number)) #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \ - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), (long) (a5), \ - (long) (number)) + __mips_syscall6 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), (long int) (a5), \ + (long int) (number)) #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \ - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), (long) (a5), \ - (long) (a6), \ - (long) (number)) + __mips_syscall7 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), (long int) (a5), \ + (long int) (a6), \ + (long int) (number)) #endif diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c index 92f16e272..43c05f805 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c @@ -20,8 +20,8 @@ #undef __mips16_syscall0 -long long __nomips16 -__mips16_syscall0 (long number) +long long int __nomips16 +__mips16_syscall0 (long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0); diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c index fa985a96e..16a567e83 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c @@ -20,9 +20,9 @@ #undef __mips16_syscall1 -long long __nomips16 -__mips16_syscall1 (long a0, - long number) +long long int __nomips16 +__mips16_syscall1 (long int a0, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c index f042ac815..c0a856c34 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c @@ -20,9 +20,9 @@ #undef __mips16_syscall2 -long long __nomips16 -__mips16_syscall2 (long a0, long a1, - long number) +long long int __nomips16 +__mips16_syscall2 (long int a0, long int a1, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c index dfe2f7feb..042768ebf 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c @@ -20,9 +20,9 @@ #undef __mips16_syscall3 -long long __nomips16 -__mips16_syscall3 (long a0, long a1, long a2, - long number) +long long int __nomips16 +__mips16_syscall3 (long int a0, long int a1, long int a2, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c index 39de51035..8658d822a 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c @@ -20,9 +20,9 @@ #undef __mips16_syscall4 -long long __nomips16 -__mips16_syscall4 (long a0, long a1, long a2, long a3, - long number) +long long int __nomips16 +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h index beefcf284..0c6a83e9b 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h @@ -52,7 +52,7 @@ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -61,10 +61,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -103,11 +103,11 @@ union __mips_syscall_return { - long long val; + long long int val; struct { - long v0; - long v1; + long int v0; + long int v1; } reg; }; @@ -152,13 +152,13 @@ union __mips_syscall_return #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -175,14 +175,15 @@ union __mips_syscall_return #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -199,15 +200,17 @@ union __mips_syscall_return #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -225,16 +228,19 @@ union __mips_syscall_return #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -252,16 +258,20 @@ union __mips_syscall_return #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -285,63 +295,66 @@ union __mips_syscall_return compiler specifics required for the stack arguments to be pushed, which would be the case if these syscalls were inlined. */ -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3, - long arg4, long arg5, - long number); +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, + long int number); libc_hidden_proto (__mips_syscall5, nomips16) #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall5 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, - long number); +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, long int arg6, + long int number); libc_hidden_proto (__mips_syscall6, nomips16) #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall6 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (arg6), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (arg6), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, - long arg7, - long number); +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, long int arg6, + long int arg7, + long int number); libc_hidden_proto (__mips_syscall7, nomips16) #define internal_syscall7(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall7 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (arg6), \ - (long) (arg7), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (arg6), \ + (long int) (arg7), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h index f96636538..4a9d7054f 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h @@ -47,14 +47,14 @@ /* Convert X to a long long, without losing any bits if it is one already or warning if it is a 32-bit pointer. */ -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X)) +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X)) /* Define a macro which expands into the inline wrapper code for a system call. */ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -63,10 +63,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -112,13 +112,13 @@ #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -135,14 +135,15 @@ #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -159,15 +160,17 @@ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -185,16 +188,19 @@ #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -212,16 +218,20 @@ #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -239,17 +249,22 @@ #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + long long int _arg5 = ARGIFY (arg5); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - register long long __a4 asm ("$8") = ARGIFY (arg5); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ + register long long int __a4 asm ("$8") = _arg5; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -267,18 +282,24 @@ #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + long long int _arg5 = ARGIFY (arg5); \ + long long int _arg6 = ARGIFY (arg6); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - register long long __a4 asm ("$8") = ARGIFY (arg5); \ - register long long __a5 asm ("$9") = ARGIFY (arg6); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ + register long long int __a4 asm ("$8") = _arg5; \ + register long long int __a5 asm ("$9") = _arg6; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h index 9d30291f8..3e1f1cc3c 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h @@ -50,7 +50,7 @@ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -59,10 +59,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -108,13 +108,13 @@ #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -131,14 +131,15 @@ #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -155,15 +156,17 @@ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -181,16 +184,19 @@ #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -208,16 +214,20 @@ #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -235,17 +245,22 @@ #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ - register long __a4 asm ("$8") = (long) (arg5); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ + register long int __a4 asm ("$8") = _arg5; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -263,18 +278,24 @@ #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + long int _arg6 = (long int) (arg6); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ - register long __a4 asm ("$8") = (long) (arg5); \ - register long __a5 asm ("$9") = (long) (arg6); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ + register long int __a4 asm ("$8") = _arg5; \ + register long int __a5 asm ("$9") = _arg6; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S index 26adf2cd0..a9baff3c1 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S @@ -20,7 +20,7 @@ #include /* Usage: - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) We need to do some arg shifting, syscall_number will be in v0. */ diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h index cdfc0b1b5..a4cf1540f 100644 --- a/sysdeps/unix/sysv/linux/mips/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h @@ -36,8 +36,8 @@ the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */ #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \ ({ \ - long _ret = funcptr (args); \ - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \ + long int _ret = funcptr (args); \ + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \ if (err) \ _ret = -_ret; \ _ret; \ diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h new file mode 100644 index 000000000..a00989998 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h @@ -0,0 +1,67 @@ +/* Return backtrace of current program state. Arch-specific bits. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _UNWIND_ARCH_H +#define _UNWIND_ARCH_H + +#include + +/* MIPS fallback code handle a frame where its FDE can not be obtained + (for instance a signal frame) by reading the kernel allocated signal frame + and adding '2' to the value of 'sc_pc' [1]. The added value is used to + recognize an end of an EH region on mips16 [2]. + + The idea here is to adjust the obtained signal frame ADDR value and remove + the libgcc added value by checking if the previous frame is a signal frame + one. + + [1] libgcc/config/mips/linux-unwind.h from gcc code. + [2] gcc/config/mips/mips.h from gcc code. */ + +static inline void * +unwind_arch_adjustment (void *prev, void *addr) +{ + uint32_t *pc = (uint32_t *) prev; + + if (pc == NULL) + return addr; + + /* For MIPS16 or microMIPS frame libgcc makes no adjustment. */ + if ((uintptr_t) pc & 0x3) + return addr; + + /* The vDSO containes either + + 24021061 li v0, 0x1061 (rt_sigreturn) + 0000000c syscall + or + 24021017 li v0, 0x1017 (sigreturn) + 0000000c syscall */ + if (pc[1] != 0x0000000c) + return addr; +#if _MIPS_SIM == _ABIO32 + if (pc[0] == (0x24020000 | __NR_sigreturn)) + return (void *) ((uintptr_t) addr - 2); +#endif + if (pc[0] == (0x24020000 | __NR_rt_sigreturn)) + return (void *) ((uintptr_t) addr - 2); + + return addr; +} + +#endif diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c index 27879e76c..fd46aec1a 100644 --- a/sysdeps/unix/sysv/linux/msgctl.c +++ b/sysdeps/unix/sysv/linux/msgctl.c @@ -21,6 +21,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ #ifndef DEFAULT_VERSION # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T @@ -61,7 +62,6 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) int ret = msgctl_syscall (msqid, cmd, buf); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -69,10 +69,16 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) case IPC_STAT: case MSG_STAT: case MSG_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T buf->msg_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct msqid_ds){0}.msg_perm.mode) + != sizeof (__kernel_mode_t)) + buf->msg_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h index 725dfafde..ffc150851 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h @@ -134,47 +134,47 @@ # define LOADARGS_0(name, dummy) \ r0 = name # define LOADARGS_1(name, __arg1) \ - long int arg1 = (long int) (__arg1); \ + long int _arg1 = (long int) (__arg1); \ LOADARGS_0(name, 0); \ extern void __illegally_sized_syscall_arg1 (void); \ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 4) \ __illegally_sized_syscall_arg1 (); \ - r3 = arg1 + r3 = _arg1 # define LOADARGS_2(name, __arg1, __arg2) \ - long int arg2 = (long int) (__arg2); \ + long int _arg2 = (long int) (__arg2); \ LOADARGS_1(name, __arg1); \ extern void __illegally_sized_syscall_arg2 (void); \ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 4) \ __illegally_sized_syscall_arg2 (); \ - r4 = arg2 + r4 = _arg2 # define LOADARGS_3(name, __arg1, __arg2, __arg3) \ - long int arg3 = (long int) (__arg3); \ + long int _arg3 = (long int) (__arg3); \ LOADARGS_2(name, __arg1, __arg2); \ extern void __illegally_sized_syscall_arg3 (void); \ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 4) \ __illegally_sized_syscall_arg3 (); \ - r5 = arg3 + r5 = _arg3 # define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ - long int arg4 = (long int) (__arg4); \ + long int _arg4 = (long int) (__arg4); \ LOADARGS_3(name, __arg1, __arg2, __arg3); \ extern void __illegally_sized_syscall_arg4 (void); \ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 4) \ __illegally_sized_syscall_arg4 (); \ - r6 = arg4 + r6 = _arg4 # define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ - long int arg5 = (long int) (__arg5); \ + long int _arg5 = (long int) (__arg5); \ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ extern void __illegally_sized_syscall_arg5 (void); \ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 4) \ __illegally_sized_syscall_arg5 (); \ - r7 = arg5 + r7 = _arg5 # define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ - long int arg6 = (long int) (__arg6); \ + long int _arg6 = (long int) (__arg6); \ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ extern void __illegally_sized_syscall_arg6 (void); \ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 4) \ __illegally_sized_syscall_arg6 (); \ - r8 = arg6 + r8 = _arg6 # define ASM_INPUT_0 "0" (r0) # define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h index ee7f43653..8a3f1c43e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h @@ -139,47 +139,47 @@ #define LOADARGS_0(name, dummy) \ r0 = name #define LOADARGS_1(name, __arg1) \ - long int arg1 = (long int) (__arg1); \ + long int _arg1 = (long int) (__arg1); \ LOADARGS_0(name, 0); \ extern void __illegally_sized_syscall_arg1 (void); \ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 8) \ __illegally_sized_syscall_arg1 (); \ - r3 = arg1 + r3 = _arg1 #define LOADARGS_2(name, __arg1, __arg2) \ - long int arg2 = (long int) (__arg2); \ + long int _arg2 = (long int) (__arg2); \ LOADARGS_1(name, __arg1); \ extern void __illegally_sized_syscall_arg2 (void); \ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 8) \ __illegally_sized_syscall_arg2 (); \ - r4 = arg2 + r4 = _arg2 #define LOADARGS_3(name, __arg1, __arg2, __arg3) \ - long int arg3 = (long int) (__arg3); \ + long int _arg3 = (long int) (__arg3); \ LOADARGS_2(name, __arg1, __arg2); \ extern void __illegally_sized_syscall_arg3 (void); \ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 8) \ __illegally_sized_syscall_arg3 (); \ - r5 = arg3 + r5 = _arg3 #define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ - long int arg4 = (long int) (__arg4); \ + long int _arg4 = (long int) (__arg4); \ LOADARGS_3(name, __arg1, __arg2, __arg3); \ extern void __illegally_sized_syscall_arg4 (void); \ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 8) \ __illegally_sized_syscall_arg4 (); \ - r6 = arg4 + r6 = _arg4 #define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ - long int arg5 = (long int) (__arg5); \ + long int _arg5 = (long int) (__arg5); \ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ extern void __illegally_sized_syscall_arg5 (void); \ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 8) \ __illegally_sized_syscall_arg5 (); \ - r7 = arg5 + r7 = _arg5 #define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ - long int arg6 = (long int) (__arg6); \ + long int _arg6 = (long int) (__arg6); \ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ extern void __illegally_sized_syscall_arg6 (void); \ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 8) \ __illegally_sized_syscall_arg6 (); \ - r8 = arg6 + r8 = _arg6 #define ASM_INPUT_0 "0" (r0) #define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) diff --git a/sysdeps/unix/sysv/linux/prctl.c b/sysdeps/unix/sysv/linux/prctl.c new file mode 100644 index 000000000..d5725f14c --- /dev/null +++ b/sysdeps/unix/sysv/linux/prctl.c @@ -0,0 +1,42 @@ +/* prctl - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +/* Unconditionally read all potential arguments. This may pass + garbage values to the kernel, but avoids the need for teaching + glibc the argument counts of individual options (including ones + that are added to the kernel in the future). */ + +int +__prctl (int option, ...) +{ + va_list arg; + va_start (arg, option); + unsigned long int arg2 = va_arg (arg, unsigned long int); + unsigned long int arg3 = va_arg (arg, unsigned long int); + unsigned long int arg4 = va_arg (arg, unsigned long int); + unsigned long int arg5 = va_arg (arg, unsigned long int); + va_end (arg); + return INLINE_SYSCALL_CALL (prctl, option, arg2, arg3, arg4, arg5); +} + +libc_hidden_def (__prctl) +weak_alias (__prctl, prctl) diff --git a/sysdeps/unix/sysv/linux/process_vm_readv.c b/sysdeps/unix/sysv/linux/process_vm_readv.c new file mode 100644 index 000000000..e1377f7e5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/process_vm_readv.c @@ -0,0 +1,32 @@ +/* process_vm_readv - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +ssize_t +process_vm_readv (pid_t pid, const struct iovec *local_iov, + unsigned long int liovcnt, + const struct iovec *remote_iov, + unsigned long int riovcnt, unsigned long int flags) +{ + return INLINE_SYSCALL_CALL (process_vm_readv, pid, local_iov, + liovcnt, remote_iov, riovcnt, flags); +} diff --git a/sysdeps/unix/sysv/linux/process_vm_writev.c b/sysdeps/unix/sysv/linux/process_vm_writev.c new file mode 100644 index 000000000..944ab9b7f --- /dev/null +++ b/sysdeps/unix/sysv/linux/process_vm_writev.c @@ -0,0 +1,32 @@ +/* process_vm_writev - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +ssize_t +process_vm_writev (pid_t pid, const struct iovec *local_iov, + unsigned long int liovcnt, + const struct iovec *remote_iov, + unsigned long int riovcnt, unsigned long int flags) +{ + return INLINE_SYSCALL_CALL (process_vm_writev, pid, local_iov, + liovcnt, remote_iov, riovcnt, flags); +} diff --git a/sysdeps/unix/sysv/linux/ptsname.c b/sysdeps/unix/sysv/linux/ptsname.c index 81d9d26f1..3e9be3f0d 100644 --- a/sysdeps/unix/sysv/linux/ptsname.c +++ b/sysdeps/unix/sysv/linux/ptsname.c @@ -21,39 +21,14 @@ #include #include #include -#include -#include #include #include #include <_itoa.h> -/* Check if DEV corresponds to a master pseudo terminal device. */ -#define MASTER_P(Dev) \ - (__gnu_dev_major ((Dev)) == 2 \ - || (__gnu_dev_major ((Dev)) == 4 \ - && __gnu_dev_minor ((Dev)) >= 128 && __gnu_dev_minor ((Dev)) < 192) \ - || (__gnu_dev_major ((Dev)) >= 128 && __gnu_dev_major ((Dev)) < 136)) - -/* Check if DEV corresponds to a slave pseudo terminal device. */ -#define SLAVE_P(Dev) \ - (__gnu_dev_major ((Dev)) == 3 \ - || (__gnu_dev_major ((Dev)) == 4 \ - && __gnu_dev_minor ((Dev)) >= 192 && __gnu_dev_minor ((Dev)) < 256) \ - || (__gnu_dev_major ((Dev)) >= 136 && __gnu_dev_major ((Dev)) < 144)) - -/* Note that major number 4 corresponds to the old BSD style pseudo - terminal devices. As of Linux 2.1.115 these are no longer - supported. They have been replaced by major numbers 2 (masters) - and 3 (slaves). */ - /* Directory where we can find the slave pty nodes. */ #define _PATH_DEVPTS "/dev/pts/" -/* The are declared in getpt.c. */ -extern const char __libc_ptyname1[] attribute_hidden; -extern const char __libc_ptyname2[] attribute_hidden; - /* Static buffer for `ptsname'. */ static char buffer[sizeof (_PATH_DEVPTS) + 20]; @@ -68,19 +43,15 @@ ptsname (int fd) } +/* Store at most BUFLEN characters of the pathname of the slave pseudo + terminal associated with the master FD is open on in BUF. + Return 0 on success, otherwise an error number. */ int -__ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) +__ptsname_r (int fd, char *buf, size_t buflen) { int save_errno = errno; unsigned int ptyno; - if (!__isatty (fd)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - -#ifdef TIOCGPTN if (__ioctl (fd, TIOCGPTN, &ptyno) == 0) { /* Buffer we use to print the number in. For a maximum size for @@ -101,67 +72,11 @@ __ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) memcpy (__stpcpy (buf, devpts), p, &numbuf[sizeof (numbuf)] - p); } - else if (errno != EINVAL) - return errno; else -#endif - { - char *p; - - if (buflen < strlen (_PATH_TTY) + 3) - { - __set_errno (ERANGE); - return ERANGE; - } - - if (__fxstat64 (_STAT_VER, fd, stp) < 0) - return errno; - - /* Check if FD really is a master pseudo terminal. */ - if (! MASTER_P (stp->st_rdev)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - - ptyno = __gnu_dev_minor (stp->st_rdev); - - if (ptyno / 16 >= strlen (__libc_ptyname1)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - - p = __stpcpy (buf, _PATH_TTY); - p[0] = __libc_ptyname1[ptyno / 16]; - p[1] = __libc_ptyname2[ptyno % 16]; - p[2] = '\0'; - } - - if (__xstat64 (_STAT_VER, buf, stp) < 0) + /* Bad file descriptor, or not a ptmx descriptor. */ return errno; - /* Check if the name we're about to return really corresponds to a - slave pseudo terminal. */ - if (! S_ISCHR (stp->st_mode) || ! SLAVE_P (stp->st_rdev)) - { - /* This really is a configuration problem. */ - __set_errno (ENOTTY); - return ENOTTY; - } - __set_errno (save_errno); return 0; } - - -/* Store at most BUFLEN characters of the pathname of the slave pseudo - terminal associated with the master FD is open on in BUF. - Return 0 on success, otherwise an error number. */ -int -__ptsname_r (int fd, char *buf, size_t buflen) -{ - struct stat64 st; - return __ptsname_internal (fd, buf, buflen, &st); -} weak_alias (__ptsname_r, ptsname_r) diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h index 201bf9a91..2bd9b16f3 100644 --- a/sysdeps/unix/sysv/linux/riscv/sysdep.h +++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h @@ -176,10 +176,11 @@ # define internal_syscall1(number, err, arg0) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ + register long int __a0 asm ("a0") = _arg0; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -193,11 +194,13 @@ # define internal_syscall2(number, err, arg0, arg1) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -211,12 +214,15 @@ # define internal_syscall3(number, err, arg0, arg1, arg2) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -230,13 +236,17 @@ # define internal_syscall4(number, err, arg0, arg1, arg2, arg3) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -250,14 +260,19 @@ # define internal_syscall5(number, err, arg0, arg1, arg2, arg3, arg4) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -271,15 +286,21 @@ # define internal_syscall6(number, err, arg0, arg1, arg2, arg3, arg4, arg5) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ - register long int __a5 asm ("a5") = (long int) (arg5); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ + register long int __a5 asm ("a5") = _arg5; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -294,16 +315,23 @@ # define internal_syscall7(number, err, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + long int _arg6 = (long int) (arg6); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ - register long int __a5 asm ("a5") = (long int) (arg5); \ - register long int __a6 asm ("a6") = (long int) (arg6); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ + register long int __a5 asm ("a5") = _arg5; \ + register long int __a6 asm ("a6") = _arg6; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c index 0c3eb0932..30571af49 100644 --- a/sysdeps/unix/sysv/linux/semctl.c +++ b/sysdeps/unix/sysv/linux/semctl.c @@ -22,6 +22,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ /* Define a `union semun' suitable for Linux here. */ union semun @@ -92,7 +93,6 @@ __new_semctl (int semid, int semnum, int cmd, ...) int ret = semctl_syscall (semid, semnum, cmd, arg); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -100,10 +100,16 @@ __new_semctl (int semid, int semnum, int cmd, ...) case IPC_STAT: case SEM_STAT: case SEM_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T arg.buf->sem_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct semid_ds){0}.sem_perm.mode) + != sizeof (__kernel_mode_t)) + arg.buf->sem_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies new file mode 100644 index 000000000..7eeaf15a5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies @@ -0,0 +1 @@ +unix/sysv/linux/sh/sh4/fpu diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies new file mode 100644 index 000000000..7eeaf15a5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies @@ -0,0 +1 @@ +unix/sysv/linux/sh/sh4/fpu diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c index 39fa861e1..f41b359b8 100644 --- a/sysdeps/unix/sysv/linux/shmctl.c +++ b/sysdeps/unix/sysv/linux/shmctl.c @@ -22,6 +22,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ #ifndef DEFAULT_VERSION # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T @@ -63,7 +64,6 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) int ret = shmctl_syscall (shmid, cmd, buf); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -71,10 +71,16 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) case IPC_STAT: case SHM_STAT: case SHM_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T buf->shm_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct shmid_ds){0}.shm_perm.mode) + != sizeof (__kernel_mode_t)) + buf->shm_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/sparc/Makefile b/sysdeps/unix/sysv/linux/sparc/Makefile index b0d182a43..147503967 100644 --- a/sysdeps/unix/sysv/linux/sparc/Makefile +++ b/sysdeps/unix/sysv/linux/sparc/Makefile @@ -11,8 +11,12 @@ ifeq ($(subdir),sysvipc) sysdep_routines += getshmlba endif +ifeq ($(subdir),signal) +sysdep_routines += sigreturn_stub +endif + ifeq ($(subdir),nptl) # pull in __syscall_error routine -libpthread-routines += sysdep -libpthread-shared-only-routines += sysdep +libpthread-routines += sysdep sigreturn_stub +libpthread-shared-only-routines += sysdep sigreturn_stub endif diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c index 6b2f66422..938aa7aa8 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c @@ -24,8 +24,8 @@ #include #include -static void __rt_sigreturn_stub (void); -static void __sigreturn_stub (void); +void __rt_sigreturn_stub (void); +void __sigreturn_stub (void); #define STUB(act, sigsetsize) \ (act) ? ((unsigned long)((act->sa_flags & SA_SIGINFO) \ @@ -35,25 +35,3 @@ static void __sigreturn_stub (void); (sigsetsize) #include - -static -inhibit_stack_protector -void -__rt_sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x10\n\t" - : /* no outputs */ - : "i" (__NR_rt_sigreturn)); -} - -static -inhibit_stack_protector -void -__sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x10\n\t" - : /* no outputs */ - : "i" (__NR_sigreturn)); -} diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S new file mode 100644 index 000000000..727cc9473 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S @@ -0,0 +1,34 @@ +/* Sigreturn stub function used on sa_restore field. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* These functions must not change the register window or the stack + pointer [1]. + + [1] https://lkml.org/lkml/2016/5/27/465 */ + +ENTRY (__rt_sigreturn_stub) + mov __NR_rt_sigreturn, %g1 + ta 0x10 +END (__rt_sigreturn_stub) + +ENTRY (__sigreturn_stub) + mov __NR_sigreturn, %g1 + ta 0x10 +END (__sigreturn_stub) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c index 9c0dc2a63..4e2617232 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c @@ -22,21 +22,11 @@ #include #include -static void __rt_sigreturn_stub (void); +/* Defined on sigreturn_stub.S. */ +void __rt_sigreturn_stub (void); #define STUB(act, sigsetsize) \ (((unsigned long) &__rt_sigreturn_stub) - 8), \ (sigsetsize) #include - -static -inhibit_stack_protector -void -__rt_sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x6d\n\t" - : /* no outputs */ - : "i" (__NR_rt_sigreturn)); -} diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S new file mode 100644 index 000000000..add476683 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S @@ -0,0 +1,29 @@ +/* Sigreturn stub function used on sa_restore field. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* This function must not change the register window or the stack + pointer [1]. + + [1] https://lkml.org/lkml/2016/5/27/465 */ + +ENTRY (__rt_sigreturn_stub) + mov __NR_rt_sigreturn, %g1 + ta 0x6d +END (__rt_sigreturn_stub) diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 36e087d8f..3d8981400 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 5.4. -kernel 5.4 +# The list of system calls is current as of Linux 5.5. +kernel 5.5 FAST_atomic_update FAST_cmpxchg diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list index 5f1352ad4..52e6dafc8 100644 --- a/sysdeps/unix/sysv/linux/syscalls.list +++ b/sysdeps/unix/sysv/linux/syscalls.list @@ -28,25 +28,24 @@ inotify_add_watch EXTRA inotify_add_watch i:isi inotify_add_watch inotify_init EXTRA inotify_init i: inotify_init inotify_init1 EXTRA inotify_init1 i:I inotify_init1 inotify_rm_watch EXTRA inotify_rm_watch i:ii inotify_rm_watch -ioperm - ioperm i:iii ioperm +ioperm - ioperm i:UUi ioperm iopl - iopl i:i iopl klogctl EXTRA syslog i:isi klogctl lchown - lchown i:sii __lchown lchown -mincore - mincore i:anV mincore -mlock - mlock i:bn mlock +mincore - mincore i:aUV mincore +mlock - mlock i:bU mlock mlockall - mlockall i:i mlockall -mount EXTRA mount i:sssip __mount mount -mremap EXTRA mremap b:ainip __mremap mremap -munlock - munlock i:ai munlock +mount EXTRA mount i:sssUp __mount mount +mremap EXTRA mremap b:aUUip __mremap mremap +munlock - munlock i:aU munlock munlockall - munlockall i: munlockall nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28 pipe - pipe i:f __pipe pipe pipe2 - pipe2 i:fi __pipe2 pipe2 pivot_root EXTRA pivot_root i:ss pivot_root -prctl EXTRA prctl i:iiiii __prctl prctl query_module EXTRA query_module i:sipip __compat_query_module query_module@GLIBC_2.0:GLIBC_2.23 quotactl EXTRA quotactl i:isip quotactl -remap_file_pages - remap_file_pages i:piiii __remap_file_pages remap_file_pages +remap_file_pages - remap_file_pages i:pUiUi __remap_file_pages remap_file_pages sched_getp - sched_getparam i:ip __sched_getparam sched_getparam sched_gets - sched_getscheduler i:i __sched_getscheduler sched_getscheduler sched_primax - sched_get_priority_max i:i __sched_get_priority_max sched_get_priority_max @@ -55,8 +54,8 @@ sched_rr_gi - sched_rr_get_interval i:ip __sched_rr_get_interval sched_rr_get_in sched_setp - sched_setparam i:ip __sched_setparam sched_setparam sched_sets - sched_setscheduler i:iip __sched_setscheduler sched_setscheduler sched_yield - sched_yield i: __sched_yield sched_yield -sendfile - sendfile i:iipi sendfile -sendfile64 - sendfile64 i:iipi sendfile64 +sendfile - sendfile i:iipU sendfile +sendfile64 - sendfile64 i:iipU sendfile64 setfsgid EXTRA setfsgid i:i setfsgid setfsuid EXTRA setfsuid i:i setfsuid setpgid - setpgid i:ii __setpgid setpgid @@ -73,19 +72,19 @@ chown - chown i:sii __libc_chown __chown chown fchownat - fchownat i:isiii fchownat linkat - linkat i:isisi linkat mkdirat - mkdirat i:isi mkdirat -readlinkat - readlinkat i:issi readlinkat +readlinkat - readlinkat i:issU readlinkat symlinkat - symlinkat i:sis symlinkat unlinkat - unlinkat i:isi unlinkat -setxattr - setxattr i:sspii setxattr -lsetxattr - lsetxattr i:sspii lsetxattr -fsetxattr - fsetxattr i:ispii fsetxattr -getxattr - getxattr i:sspi getxattr -lgetxattr - lgetxattr i:sspi lgetxattr -fgetxattr - fgetxattr i:ispi fgetxattr -listxattr - listxattr i:ssi listxattr -llistxattr - llistxattr i:ssi llistxattr -flistxattr - flistxattr i:isi flistxattr +setxattr - setxattr i:sspUi setxattr +lsetxattr - lsetxattr i:sspUi lsetxattr +fsetxattr - fsetxattr i:ispUi fsetxattr +getxattr - getxattr i:sspU getxattr +lgetxattr - lgetxattr i:sspU lgetxattr +fgetxattr - fgetxattr i:ispU fgetxattr +listxattr - listxattr i:ssU listxattr +llistxattr - llistxattr i:ssU llistxattr +flistxattr - flistxattr i:isU flistxattr removexattr - removexattr i:ss removexattr lremovexattr - lremovexattr i:ss lremovexattr fremovexattr - fremovexattr i:is fremovexattr @@ -102,8 +101,6 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at setns EXTRA setns i:ii setns -process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv -process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev memfd_create EXTRA memfd_create i:si memfd_create pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc pkey_free EXTRA pkey_free i:i pkey_free diff --git a/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c new file mode 100644 index 000000000..55362f606 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c @@ -0,0 +1,259 @@ +/* Verify that getcwd returns ERANGE for size 1 byte and does not underflow + buffer when the CWD is too long and is also a mount target of /. See bug + #28769 or CVE-2021-3999 for more context. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static char *base; +#define BASENAME "tst-getcwd-smallbuff" +#define MOUNT_NAME "mpoint" +static int sockfd[2]; + +static void +do_cleanup (void) +{ + support_chdir_toolong_temp_directory (base); + TEST_VERIFY_EXIT (rmdir (MOUNT_NAME) == 0); + free (base); +} + +static void +send_fd (const int sock, const int fd) +{ + struct msghdr msg = {0}; + union + { + struct cmsghdr hdr; + char buf[CMSG_SPACE (sizeof (int))]; + } cmsgbuf = {0}; + struct cmsghdr *cmsg; + struct iovec vec; + char ch = 'A'; + ssize_t n; + + msg.msg_control = &cmsgbuf.buf; + msg.msg_controllen = sizeof (cmsgbuf.buf); + + cmsg = CMSG_FIRSTHDR (&msg); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &fd, sizeof (fd)); + + vec.iov_base = &ch; + vec.iov_len = 1; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + + while ((n = sendmsg (sock, &msg, 0)) == -1 && errno == EINTR); + + TEST_VERIFY_EXIT (n == 1); +} + +static int +recv_fd (const int sock) +{ + struct msghdr msg = {0}; + union + { + struct cmsghdr hdr; + char buf[CMSG_SPACE(sizeof(int))]; + } cmsgbuf = {0}; + struct cmsghdr *cmsg; + struct iovec vec; + ssize_t n; + char ch = '\0'; + int fd = -1; + + vec.iov_base = &ch; + vec.iov_len = 1; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + + msg.msg_control = &cmsgbuf.buf; + msg.msg_controllen = sizeof (cmsgbuf.buf); + + while ((n = recvmsg (sock, &msg, 0)) == -1 && errno == EINTR); + if (n != 1 || ch != 'A') + return -1; + + cmsg = CMSG_FIRSTHDR (&msg); + if (cmsg == NULL) + return -1; + if (cmsg->cmsg_type != SCM_RIGHTS) + return -1; + memcpy (&fd, CMSG_DATA (cmsg), sizeof (fd)); + if (fd < 0) + return -1; + return fd; +} + +static int +child_func (void * const arg) +{ + xclose (sockfd[0]); + const int sock = sockfd[1]; + char ch; + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + TEST_VERIFY_EXIT (ch == '1'); + + if (mount ("/", MOUNT_NAME, NULL, MS_BIND | MS_REC, NULL)) + FAIL_EXIT1 ("mount failed: %m\n"); + const int fd = xopen ("mpoint", + O_RDONLY | O_PATH | O_DIRECTORY | O_NOFOLLOW, 0); + + send_fd (sock, fd); + xclose (fd); + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + TEST_VERIFY_EXIT (ch == 'a'); + + xclose (sock); + return 0; +} + +static void +update_map (char * const mapping, const char * const map_file) +{ + const size_t map_len = strlen (mapping); + + const int fd = xopen (map_file, O_WRONLY, 0); + xwrite (fd, mapping, map_len); + xclose (fd); +} + +static void +proc_setgroups_write (const long child_pid, const char * const str) +{ + const size_t str_len = strlen(str); + + char setgroups_path[sizeof ("/proc//setgroups") + INT_STRLEN_BOUND (long)]; + + snprintf (setgroups_path, sizeof (setgroups_path), + "/proc/%ld/setgroups", child_pid); + + const int fd = open (setgroups_path, O_WRONLY); + + if (fd < 0) + { + TEST_VERIFY_EXIT (errno == ENOENT); + FAIL_UNSUPPORTED ("/proc/%ld/setgroups not found\n", child_pid); + } + + xwrite (fd, str, str_len); + xclose(fd); +} + +static char child_stack[1024 * 1024]; + +int +do_test (void) +{ + base = support_create_and_chdir_toolong_temp_directory (BASENAME); + + xmkdir (MOUNT_NAME, S_IRWXU); + atexit (do_cleanup); + + /* Check whether user namespaces are supported. */ + { + pid_t pid = xfork (); + if (pid == 0) + { + if (unshare (CLONE_NEWUSER | CLONE_NEWNS) != 0) + _exit (EXIT_UNSUPPORTED); + else + _exit (0); + } + int status; + xwaitpid (pid, &status, 0); + TEST_VERIFY_EXIT (WIFEXITED (status)); + if (WEXITSTATUS (status) != 0) + return WEXITSTATUS (status); + } + + TEST_VERIFY_EXIT (socketpair (AF_UNIX, SOCK_STREAM, 0, sockfd) == 0); + pid_t child_pid = xclone (child_func, NULL, child_stack, + sizeof (child_stack), + CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD); + + xclose (sockfd[1]); + const int sock = sockfd[0]; + + char map_path[sizeof ("/proc//uid_map") + INT_STRLEN_BOUND (long)]; + char map_buf[sizeof ("0 1") + INT_STRLEN_BOUND (long)]; + + snprintf (map_path, sizeof (map_path), "/proc/%ld/uid_map", + (long) child_pid); + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getuid()); + update_map (map_buf, map_path); + + proc_setgroups_write ((long) child_pid, "deny"); + snprintf (map_path, sizeof (map_path), "/proc/%ld/gid_map", + (long) child_pid); + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getgid()); + update_map (map_buf, map_path); + + TEST_VERIFY_EXIT (send (sock, "1", 1, MSG_NOSIGNAL) == 1); + const int fd = recv_fd (sock); + TEST_VERIFY_EXIT (fd >= 0); + TEST_VERIFY_EXIT (fchdir (fd) == 0); + + static char buf[2 * 10 + 1]; + memset (buf, 'A', sizeof (buf)); + + /* Finally, call getcwd and check if it resulted in a buffer underflow. */ + char * cwd = getcwd (buf + sizeof (buf) / 2, 1); + TEST_VERIFY (cwd == NULL); + TEST_VERIFY (errno == ERANGE); + + for (int i = 0; i < sizeof (buf); i++) + if (buf[i] != 'A') + { + printf ("buf[%d] = %02x\n", i, (unsigned int) buf[i]); + support_record_failure (); + } + + TEST_VERIFY_EXIT (send (sock, "a", 1, MSG_NOSIGNAL) == 1); + xclose (sock); + TEST_VERIFY_EXIT (xwaitpid (child_pid, NULL, 0) == child_pid); + + return 0; +} + +#define CLEANUP_HANDLER do_cleanup +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index c2eb37e57..c7f740a1d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -61,13 +61,31 @@ # define SYSCALL_ERROR_LABEL syscall_error # endif +/* PSEUDO and T_PSEUDO macros have 2 extra arguments for unsigned long + int arguments. */ +# define PSEUDOS_HAVE_ULONG_INDICES 1 + +# ifndef SYSCALL_ULONG_ARG_1 +# define SYSCALL_ULONG_ARG_1 0 +# define SYSCALL_ULONG_ARG_2 0 +# endif + # undef PSEUDO -# define PSEUDO(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args); \ - cmpq $-4095, %rax; \ +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + cmpq $-4095, %rax; \ jae SYSCALL_ERROR_LABEL +# else +# define PSEUDO(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0); \ + cmpq $-4095, %rax; \ + jae SYSCALL_ERROR_LABEL +# endif # undef PSEUDO_END # define PSEUDO_END(name) \ @@ -75,10 +93,17 @@ END (name) # undef PSEUDO_NOERRNO -# define PSEUDO_NOERRNO(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args) +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO_NOERRNO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2) +# else +# define PSEUDO_NOERRNO(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0) +# endif # undef PSEUDO_END_NOERRNO # define PSEUDO_END_NOERRNO(name) \ @@ -87,11 +112,19 @@ # define ret_NOERRNO ret # undef PSEUDO_ERRVAL -# define PSEUDO_ERRVAL(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args); \ +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO_ERRVAL(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + negq %rax +# else +# define PSEUDO_ERRVAL(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0); \ negq %rax +# endif # undef PSEUDO_END_ERRVAL # define PSEUDO_END_ERRVAL(name) \ @@ -163,8 +196,10 @@ Syscalls of more than 6 arguments are not supported. */ # undef DO_CALL -# define DO_CALL(syscall_name, args) \ +# define DO_CALL(syscall_name, args, ulong_arg_1, ulong_arg_2) \ DOARGS_##args \ + ZERO_EXTEND_##ulong_arg_1 \ + ZERO_EXTEND_##ulong_arg_2 \ movl $SYS_ify (syscall_name), %eax; \ syscall; @@ -176,6 +211,14 @@ # define DOARGS_5 DOARGS_4 # define DOARGS_6 DOARGS_5 +# define ZERO_EXTEND_0 /* nothing */ +# define ZERO_EXTEND_1 /* nothing */ +# define ZERO_EXTEND_2 /* nothing */ +# define ZERO_EXTEND_3 /* nothing */ +# define ZERO_EXTEND_4 /* nothing */ +# define ZERO_EXTEND_5 /* nothing */ +# define ZERO_EXTEND_6 /* nothing */ + #else /* !__ASSEMBLER__ */ /* Define a macro which expands inline into the wrapper code for a system call. */ @@ -210,12 +253,15 @@ /* Registers clobbered by syscall. */ # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx" -/* Create a variable 'name' based on type 'X' to avoid explicit types. - This is mainly used set use 64-bits arguments in x32. */ -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name -/* Explicit cast the argument to avoid integer from pointer warning on - x32. */ -#define ARGIFY(X) ((__typeof__ ((X) - (X))) (X)) +/* NB: This also works when X is an array. For an array X, type of + (X) - (X) is ptrdiff_t, which is signed, since size of ptrdiff_t + == size of pointer, cast is a NOP. */ +#define TYPEFY1(X) __typeof__ ((X) - (X)) +/* Explicit cast the argument. */ +#define ARGIFY(X) ((TYPEFY1 (X)) (X)) +/* Create a variable 'name' based on type of variable 'X' to avoid + explicit types. */ +#define TYPEFY(X, name) __typeof__ (ARGIFY (X)) name #undef INTERNAL_SYSCALL #define INTERNAL_SYSCALL(name, err, nr, args...) \ diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h index 5bf9eed80..62e6f8fe1 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h @@ -26,4 +26,39 @@ #undef LO_HI_LONG #define LO_HI_LONG(val) (val) +#ifdef __ASSEMBLER__ +/* Zero-extend 32-bit unsigned long int arguments to 64 bits. */ +# undef ZERO_EXTEND_1 +# define ZERO_EXTEND_1 movl %edi, %edi; +# undef ZERO_EXTEND_2 +# define ZERO_EXTEND_2 movl %esi, %esi; +# undef ZERO_EXTEND_3 +# define ZERO_EXTEND_3 movl %edx, %edx; +# if SYSCALL_ULONG_ARG_1 == 4 || SYSCALL_ULONG_ARG_2 == 4 +# undef DOARGS_4 +# define DOARGS_4 movl %ecx, %r10d; +# else +# undef ZERO_EXTEND_4 +# define ZERO_EXTEND_4 movl %r10d, %r10d; +# endif +# undef ZERO_EXTEND_5 +# define ZERO_EXTEND_5 movl %r8d, %r8d; +# undef ZERO_EXTEND_6 +# define ZERO_EXTEND_6 movl %r9d, %r9d; +#else /* !__ASSEMBLER__ */ +# undef ARGIFY +/* Enforce zero-extension for pointers and array system call arguments. + For integer types, extend to int64_t (the full register) using a + regular cast, resulting in zero or sign extension based on the + signedness of the original type. */ +# define ARGIFY(X) \ + ({ \ + _Pragma ("GCC diagnostic push"); \ + _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\""); \ + (__builtin_classify_type (X) == 5 \ + ? (uintptr_t) (X) : (int64_t) (X)); \ + _Pragma ("GCC diagnostic pop"); \ + }) +#endif /* __ASSEMBLER__ */ + #endif /* linux/x86_64/x32/sysdep.h */ diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 95182a508..b7aec5df2 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -12,6 +12,42 @@ endif ifeq ($(subdir),setjmp) gen-as-const-headers += jmp_buf-ssp.sym sysdep_routines += __longjmp_cancel +ifneq ($(enable-cet),no) +ifneq ($(have-tunables),no) +tests += tst-setjmp-cet +tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on +endif +endif +endif + +ifeq ($(subdir),string) +sysdep_routines += cacheinfo + +tests += \ + tst-memchr-rtm \ + tst-memcmp-rtm \ + tst-memmove-rtm \ + tst-memrchr-rtm \ + tst-memset-rtm \ + tst-strchr-rtm \ + tst-strcpy-rtm \ + tst-strlen-rtm \ + tst-strncmp-rtm \ + tst-strrchr-rtm \ + tst-wcsncmp-rtm \ +# tests + +CFLAGS-tst-memchr-rtm.c += -mrtm +CFLAGS-tst-memcmp-rtm.c += -mrtm +CFLAGS-tst-memmove-rtm.c += -mrtm +CFLAGS-tst-memrchr-rtm.c += -mrtm +CFLAGS-tst-memset-rtm.c += -mrtm +CFLAGS-tst-strchr-rtm.c += -mrtm +CFLAGS-tst-strcpy-rtm.c += -mrtm +CFLAGS-tst-strlen-rtm.c += -mrtm +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error +CFLAGS-tst-strrchr-rtm.c += -mrtm +CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error endif ifeq ($(enable-cet),yes) diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index e3e8ef27b..39c13b719 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -722,7 +722,7 @@ intel_bug_no_cache_info: threads = 1 << ((ecx >> 12) & 0x0f); } - if (threads == 0) + if (threads == 0 || cpu_features->basic.family >= 0x17) { /* If APIC ID width is not available, use logical processor count. */ @@ -737,8 +737,22 @@ intel_bug_no_cache_info: if (threads > 0) shared /= threads; - /* Account for exclusive L2 and L3 caches. */ - shared += core; + /* Get shared cache per ccx for Zen architectures. */ + if (cpu_features->basic.family >= 0x17) + { + unsigned int eax; + + /* Get number of threads share the L3 cache in CCX. */ + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); + + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; + shared *= threads_per_ccx; + } + else + { + /* Account for exclusive L2 and L3 caches. */ + shared += core; + } } #ifndef DISABLE_PREFETCHW @@ -778,14 +792,20 @@ intel_bug_no_cache_info: __x86_shared_cache_size = shared; } - /* The large memcpy micro benchmark in glibc shows that 6 times of - shared cache size is the approximate value above which non-temporal - store becomes faster on a 8-core processor. This is the 3/4 of the - total shared cache size. */ + /* The default setting for the non_temporal threshold is 3/4 of one + thread's share of the chip's cache. For most Intel and AMD processors + with an initial release date between 2017 and 2020, a thread's typical + share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4 + threshold leaves 125 KBytes to 500 KBytes of the thread's data + in cache after a maximum temporal copy, which will maintain + in cache a reasonable portion of the thread's stack and other + active data. If the threshold is set higher than one thread's + share of the cache, it has a substantial risk of negatively + impacting the performance of other threads running on the chip. */ __x86_shared_non_temporal_threshold = (cpu_features->non_temporal_threshold != 0 ? cpu_features->non_temporal_threshold - : __x86_shared_cache_size * threads * 3 / 4); + : __x86_shared_cache_size * 3 / 4); } #endif diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 81a170a81..e1c22e3e5 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -333,6 +333,9 @@ init_cpu_features (struct cpu_features *cpu_features) get_extended_indices (cpu_features); + if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT)) + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; + if (family == 0x06) { model += extended_model; @@ -394,11 +397,42 @@ init_cpu_features (struct cpu_features *cpu_features) break; } - /* Disable TSX on some Haswell processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which disables - broken feature by default). */ + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ switch (model) { + case 0x55: + if (stepping <= 5) + goto disable_tsx; + break; + case 0x8e: + /* NB: Although the errata documents that for model == 0x8e, + only 0xb stepping or lower are impacted, the intention of + the errata was to disable TSX on all client processors on + all steppings. Include 0xc stepping which is an Intel + Core i7-8665U, a client mobile processor. */ + case 0x9e: + if (stepping > 0xc) + break; + /* Fall through. */ + case 0x4e: + case 0x5e: + { + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + processors listed in: + +https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + */ +disable_tsx: + cpu_features->cpuid[index_cpu_HLE].reg_HLE + &= ~bit_cpu_HLE; + cpu_features->cpuid[index_cpu_RTM].reg_RTM + &= ~bit_cpu_RTM; + cpu_features->cpuid[index_cpu_RTM_ALWAYS_ABORT].reg_RTM_ALWAYS_ABORT + |= bit_cpu_RTM_ALWAYS_ABORT; + } + break; case 0x3f: /* Xeon E7 v3 with stepping >= 4 has working TSX. */ if (stepping >= 4) @@ -424,8 +458,24 @@ init_cpu_features (struct cpu_features *cpu_features) cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] |= bit_arch_Prefer_No_VZEROUPPER; else - cpu_features->feature[index_arch_Prefer_No_AVX512] - |= bit_arch_Prefer_No_AVX512; + { + cpu_features->feature[index_arch_Prefer_No_AVX512] + |= bit_arch_Prefer_No_AVX512; + + /* Avoid RTM abort triggered by VZEROUPPER inside a + transactionally executing RTM region. */ + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + + /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp + requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp + requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, + AVX2 strcmp is faster than EVEX strcmp. */ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + cpu_features->feature[index_arch_Prefer_AVX2_STRCMP] + |= bit_arch_Prefer_AVX2_STRCMP; + } } /* This spells out "AuthenticAMD" or "HygonGenuine". */ else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index aea83e6e3..9fb97907b 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -499,6 +499,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define bit_cpu_AVX512_4VNNIW (1u << 2) #define bit_cpu_AVX512_4FMAPS (1u << 3) #define bit_cpu_FSRM (1u << 4) +#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11) #define bit_cpu_PCONFIG (1u << 18) #define bit_cpu_IBT (1u << 20) #define bit_cpu_IBRS_IBPB (1u << 26) @@ -667,6 +668,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7 #define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7 #define index_cpu_FSRM COMMON_CPUID_INDEX_7 +#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7 #define index_cpu_PCONFIG COMMON_CPUID_INDEX_7 #define index_cpu_IBT COMMON_CPUID_INDEX_7 #define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7 @@ -835,6 +837,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define reg_AVX512_4VNNIW edx #define reg_AVX512_4FMAPS edx #define reg_FSRM edx +#define reg_RTM_ALWAYS_ABORT edx #define reg_PCONFIG edx #define reg_IBT edx #define reg_IBRS_IBPB edx @@ -897,6 +900,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define bit_arch_Prefer_FSRM (1u << 13) #define bit_arch_Prefer_No_AVX512 (1u << 14) #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15) +#define bit_arch_Prefer_AVX2_STRCMP (1u << 16) #define index_arch_Fast_Rep_String FEATURE_INDEX_2 #define index_arch_Fast_Copy_Backward FEATURE_INDEX_2 @@ -914,6 +918,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define index_arch_Prefer_No_AVX512 FEATURE_INDEX_2 #define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_2 #define index_arch_Prefer_FSRM FEATURE_INDEX_2 +#define index_arch_Prefer_AVX2_STRCMP FEATURE_INDEX_2 /* XCR0 Feature flags. */ #define bit_XMM_state (1u << 1) diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index 861bd7bca..cb83ecc3b 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -282,6 +282,9 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Fast_Copy_Backward, disable, 18); + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, Prefer_AVX2_STRCMP, AVX2_Usable, + disable, 18); } break; case 19: diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c index ca3b5849b..8ffaf94a0 100644 --- a/sysdeps/x86/dl-cet.c +++ b/sysdeps/x86/dl-cet.c @@ -105,7 +105,11 @@ dl_cet_check (struct link_map *m, const char *program) /* No legacy object check if both IBT and SHSTK are always on. */ if (enable_ibt_type == CET_ALWAYS_ON && enable_shstk_type == CET_ALWAYS_ON) - return; + { + THREAD_SETMEM (THREAD_SELF, header.feature_1, + GL(dl_x86_feature_1)[0]); + return; + } /* Check if IBT is enabled by kernel. */ bool ibt_enabled diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c index 0f55987ae..bbb5cd356 100644 --- a/sysdeps/x86/tst-get-cpu-features.c +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -176,6 +176,7 @@ do_test (void) CHECK_CPU_FEATURE (AVX512_4VNNIW); CHECK_CPU_FEATURE (AVX512_4FMAPS); CHECK_CPU_FEATURE (FSRM); + CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT); CHECK_CPU_FEATURE (PCONFIG); CHECK_CPU_FEATURE (IBT); CHECK_CPU_FEATURE (IBRS_IBPB); diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c new file mode 100644 index 000000000..e47494011 --- /dev/null +++ b/sysdeps/x86/tst-memchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for memchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = memchr (string1, 'c', STRING_SIZE); + if (p == &string1[100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = memchr (string1, 'c', STRING_SIZE); + if (p == &string1[100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c new file mode 100644 index 000000000..e4c8a623b --- /dev/null +++ b/sysdeps/x86/tst-memcmp-rtm.c @@ -0,0 +1,52 @@ +/* Test case for memcmp inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + memset (string2, 'a', STRING_SIZE); + if (memcmp (string1, string2, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (memcmp (string1, string2, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memcmp", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c new file mode 100644 index 000000000..4bf97ef1e --- /dev/null +++ b/sysdeps/x86/tst-memmove-rtm.c @@ -0,0 +1,53 @@ +/* Test case for memmove inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + if (memmove (string2, string1, STRING_SIZE) == string2 + && memcmp (string2, string1, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (memmove (string2, string1, STRING_SIZE) == string2 + && memcmp (string2, string1, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memmove", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c new file mode 100644 index 000000000..a57a5a8eb --- /dev/null +++ b/sysdeps/x86/tst-memrchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for memrchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = memrchr (string1, 'c', STRING_SIZE); + if (p == &string1[STRING_SIZE - 100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = memrchr (string1, 'c', STRING_SIZE); + if (p == &string1[STRING_SIZE - 100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memrchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c new file mode 100644 index 000000000..bf343a4da --- /dev/null +++ b/sysdeps/x86/tst-memset-rtm.c @@ -0,0 +1,45 @@ +/* Test case for memset inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + return EXIT_SUCCESS; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + memset (string1, 'a', STRING_SIZE); + return 0; +} + +static int +do_test (void) +{ + return do_test_1 ("memset", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c new file mode 100644 index 000000000..42c795d2a --- /dev/null +++ b/sysdeps/x86/tst-setjmp-cet.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c new file mode 100644 index 000000000..a82e29c07 --- /dev/null +++ b/sysdeps/x86/tst-strchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for strchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = strchr (string1, 'c'); + if (p == &string1[100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = strchr (string1, 'c'); + if (p == &string1[100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c new file mode 100644 index 000000000..2b2a583fb --- /dev/null +++ b/sysdeps/x86/tst-strcpy-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strcpy inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + if (strcpy (string2, string1) == string2 + && strcmp (string2, string1) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (strcpy (string2, string1) == string2 + && strcmp (string2, string1) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strcpy", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h new file mode 100644 index 000000000..6ed9eca01 --- /dev/null +++ b/sysdeps/x86/tst-string-rtm.h @@ -0,0 +1,72 @@ +/* Test string function in a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test_1 (const char *name, unsigned int loop, int (*prepare) (void), + int (*function) (void)) +{ + if (!CPU_FEATURE_USABLE (RTM)) + return EXIT_UNSUPPORTED; + + int status = prepare (); + if (status != EXIT_SUCCESS) + return status; + + unsigned int i; + unsigned int naborts = 0; + unsigned int failed = 0; + for (i = 0; i < loop; i++) + { + failed |= function (); + if (_xbegin() == _XBEGIN_STARTED) + { + failed |= function (); + _xend(); + } + else + { + failed |= function (); + ++naborts; + } + } + + if (failed) + FAIL_EXIT1 ("%s() failed", name); + + if (naborts) + { + /* NB: Low single digit (<= 5%) noise-level aborts are normal for + TSX. */ + double rate = 100 * ((double) naborts) / ((double) loop); + if (rate > 5) + FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)", + rate, naborts, loop); + } + + return EXIT_SUCCESS; +} + +static int do_test (void); + +#include diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c new file mode 100644 index 000000000..0dcf14db8 --- /dev/null +++ b/sysdeps/x86/tst-strlen-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strlen inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[STRING_SIZE - 100] = '\0'; + size_t len = strlen (string1); + if (len == STRING_SIZE - 100) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + size_t len = strlen (string1); + if (len == STRING_SIZE - 100) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strlen", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c new file mode 100644 index 000000000..aef9866cf --- /dev/null +++ b/sysdeps/x86/tst-strncmp-rtm.c @@ -0,0 +1,81 @@ +/* Test case for strncmp inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#ifdef WIDE +# define CHAR wchar_t +# define MEMSET wmemset +# define STRNCMP wcsncmp +# define TEST_NAME "wcsncmp" +#else /* !WIDE */ +# define CHAR char +# define MEMSET memset +# define STRNCMP strncmp +# define TEST_NAME "strncmp" +#endif /* !WIDE */ + + + +#define LOOP 3000 +#define STRING_SIZE 1024 +CHAR string1[STRING_SIZE]; +CHAR string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + MEMSET (string1, 'a', STRING_SIZE - 1); + MEMSET (string2, 'a', STRING_SIZE - 1); + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +__attribute__ ((noinline, noclone)) +static int +function_overflow (void) +{ + if (STRNCMP (string1, string2, SIZE_MAX) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + int status = do_test_1 (TEST_NAME, LOOP, prepare, function); + if (status != EXIT_SUCCESS) + return status; + status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow); + return status; +} diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c new file mode 100644 index 000000000..e32bfaf5f --- /dev/null +++ b/sysdeps/x86/tst-strrchr-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strrchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[STRING_SIZE - 100] = 'c'; + char *p = strrchr (string1, 'c'); + if (p == &string1[STRING_SIZE - 100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = strrchr (string1, 'c'); + if (p == &string1[STRING_SIZE - 100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strrchr", LOOP, prepare, function); +} diff --git a/sysdeps/unix/sysv/linux/nios2/kernel-features.h b/sysdeps/x86/tst-wcsncmp-rtm.c similarity index 74% rename from sysdeps/unix/sysv/linux/nios2/kernel-features.h rename to sysdeps/x86/tst-wcsncmp-rtm.c index d68d11498..bad3b8637 100644 --- a/sysdeps/unix/sysv/linux/nios2/kernel-features.h +++ b/sysdeps/x86/tst-wcsncmp-rtm.c @@ -1,6 +1,5 @@ -/* Set flags signalling availability of kernel features based on given - kernel version number. NIOS2 version. - Copyright (C) 2019-2020 Free Software Foundation, Inc. +/* Test case for wcsncmp inside a transactionally executing RTM region. + Copyright (C) 2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +16,6 @@ License along with the GNU C Library; if not, see . */ -#include_next - -#undef __ASSUME_SYSVIPC_DEFAULT_IPC_64 +#define WIDE 1 +#include +#include "tst-strncmp-rtm.c" diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index d51cf03ac..b1951adce 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -20,6 +20,8 @@ endif ifeq ($(subdir),string) sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii gen-as-const-headers += locale-defines.sym +tests += \ + tst-rsi-strlen endif ifeq ($(subdir),elf) @@ -150,6 +152,11 @@ ifeq ($(subdir),csu) gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif +ifeq ($(subdir),wcsmbs) +tests += \ + tst-rsi-wcslen +endif + $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os $(make-target-directory) rm -f $@ diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure old mode 100644 new mode 100755 index 84f82c240..fc1840e23 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -if test "$static_pie" = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5 -$as_echo_n "checking for linker static PIE support... " >&6; } -if ${libc_cv_ld_static_pie+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.s <<\EOF - .text - .global _start - .weak foo -_start: - leaq foo(%rip), %rax -EOF - libc_cv_pie_option="-Wl,-pie" - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - libc_cv_ld_static_pie=yes - else - libc_cv_ld_static_pie=no - fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5 -$as_echo "$libc_cv_ld_static_pie" >&6; } - if test "$libc_cv_ld_static_pie" != yes; then - as_fn_error $? "linker support for static PIE needed" "$LINENO" 5 - fi -fi - $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index cdaba0c07..611a7d9ba 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -dnl Check if linker supports static PIE with the fix for -dnl -dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782 -dnl -if test "$static_pie" = yes; then - AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl -cat > conftest.s <<\EOF - .text - .global _start - .weak foo -_start: - leaq foo(%rip), %rax -EOF - libc_cv_pie_option="-Wl,-pie" - if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then - libc_cv_ld_static_pie=yes - else - libc_cv_ld_static_pie=no - fi -rm -f conftest*]) - if test "$libc_cv_ld_static_pie" != yes; then - AC_MSG_ERROR([linker support for static PIE needed]) - fi -fi - dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 8e9baffeb..74029871d 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, { # ifndef RTLD_BOOTSTRAP if (sym_map != map - && sym_map->l_type != lt_executable && !sym_map->l_relocated) { const char *strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ + if (sym_map->l_type == lt_executable) + _dl_fatal_printf ("\ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ +and creates an unsatisfiable circular dependency.\n", + RTLD_PROGNAME, strtab + refsym->st_name, + map->l_name); + else + _dl_error_printf ("\ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - RTLD_PROGNAME, map->l_name, - sym_map->l_name, - strtab + refsym->st_name); + RTLD_PROGNAME, map->l_name, + sym_map->l_name, + strtab + refsym->st_name); } # endif value = ((ElfW(Addr) (*) (void)) value) (); diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index a5c879d2a..070e5ef90 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -21,9 +21,11 @@ #ifdef USE_AS_WMEMCHR # define MEMCHR wmemchr # define PCMPEQ pcmpeqd +# define CHAR_PER_VEC 4 #else # define MEMCHR memchr # define PCMPEQ pcmpeqb +# define CHAR_PER_VEC 16 #endif /* fast SSE2 version with using pmaxub and 64 byte loop */ @@ -33,15 +35,14 @@ ENTRY(MEMCHR) movd %esi, %xmm1 mov %edi, %ecx +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +#endif #ifdef USE_AS_WMEMCHR test %RDX_LP, %RDX_LP jz L(return_null) - shl $2, %RDX_LP #else -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %edx, %edx -# endif punpcklbw %xmm1, %xmm1 test %RDX_LP, %RDX_LP jz L(return_null) @@ -60,13 +61,16 @@ ENTRY(MEMCHR) test %eax, %eax jnz L(matches_1) - sub $16, %rdx + sub $CHAR_PER_VEC, %rdx jbe L(return_null) add $16, %rdi and $15, %ecx and $-16, %rdi +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif add %rcx, %rdx - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) jmp L(loop_prolog) @@ -77,16 +81,21 @@ L(crosscache): movdqa (%rdi), %xmm0 PCMPEQ %xmm1, %xmm0 -/* Check if there is a match. */ + /* Check if there is a match. */ pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ + /* Remove the leading bytes. */ sar %cl, %eax test %eax, %eax je L(unaligned_no_match) -/* Check which byte is a match. */ + /* Check which byte is a match. */ bsf %eax, %eax - +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) add %rdi, %rax add %rcx, %rax @@ -94,15 +103,18 @@ L(crosscache): .p2align 4 L(unaligned_no_match): - /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void possible addition overflow. */ neg %rcx add $16, %rcx +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif sub %rcx, %rdx jbe L(return_null) add $16, %rdi - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) .p2align 4 @@ -135,7 +147,7 @@ L(loop_prolog): test $0x3f, %rdi jz L(align64_loop) - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) movdqa (%rdi), %xmm0 @@ -167,11 +179,14 @@ L(loop_prolog): mov %rdi, %rcx and $-64, %rdi and $63, %ecx +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif add %rcx, %rdx .p2align 4 L(align64_loop): - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) movdqa (%rdi), %xmm0 movdqa 16(%rdi), %xmm2 @@ -218,7 +233,7 @@ L(align64_loop): .p2align 4 L(exit_loop): - add $32, %edx + add $(CHAR_PER_VEC * 2), %edx jle L(exit_loop_32) movdqa (%rdi), %xmm0 @@ -238,7 +253,7 @@ L(exit_loop): pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32_1) - sub $16, %edx + sub $CHAR_PER_VEC, %edx jle L(return_null) PCMPEQ 48(%rdi), %xmm1 @@ -250,13 +265,13 @@ L(exit_loop): .p2align 4 L(exit_loop_32): - add $32, %edx + add $(CHAR_PER_VEC * 2), %edx movdqa (%rdi), %xmm0 PCMPEQ %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches_1) - sub $16, %edx + sub $CHAR_PER_VEC, %edx jbe L(return_null) PCMPEQ 16(%rdi), %xmm1 @@ -293,7 +308,13 @@ L(matches32): .p2align 4 L(matches_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) add %rdi, %rax ret @@ -301,7 +322,13 @@ L(matches_1): .p2align 4 L(matches16_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 16(%rdi, %rax), %rax ret @@ -309,7 +336,13 @@ L(matches16_1): .p2align 4 L(matches32_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 32(%rdi, %rax), %rax ret @@ -317,7 +350,13 @@ L(matches32_1): .p2align 4 L(matches48_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 48(%rdi, %rax), %rax ret diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 395e432c0..da1446d73 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \ memmove-avx512-unaligned-erms \ memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ - memset-avx512-unaligned-erms + memset-avx512-unaligned-erms \ + memchr-avx2-rtm \ + memcmp-avx2-movbe-rtm \ + memmove-avx-unaligned-erms-rtm \ + memrchr-avx2-rtm \ + memset-avx2-unaligned-erms-rtm \ + rawmemchr-avx2-rtm \ + strchr-avx2-rtm \ + strcmp-avx2-rtm \ + strchrnul-avx2-rtm \ + stpcpy-avx2-rtm \ + stpncpy-avx2-rtm \ + strcat-avx2-rtm \ + strcpy-avx2-rtm \ + strlen-avx2-rtm \ + strncat-avx2-rtm \ + strncmp-avx2-rtm \ + strncpy-avx2-rtm \ + strnlen-avx2-rtm \ + strrchr-avx2-rtm \ + memchr-evex \ + memcmp-evex-movbe \ + memmove-evex-unaligned-erms \ + memrchr-evex \ + memset-evex-unaligned-erms \ + rawmemchr-evex \ + stpcpy-evex \ + stpncpy-evex \ + strcat-evex \ + strchr-evex \ + strchrnul-evex \ + strcmp-evex \ + strcpy-evex \ + strlen-evex \ + strncat-evex \ + strncmp-evex \ + strncpy-evex \ + strnlen-evex \ + strrchr-evex CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 @@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ wcscpy-ssse3 wcscpy-c \ wcschr-sse2 wcschr-avx2 \ wcsrchr-sse2 wcsrchr-avx2 \ - wcsnlen-sse4_1 wcsnlen-c \ - wcslen-sse2 wcslen-avx2 wcsnlen-avx2 + wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ + wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ + wcschr-avx2-rtm \ + wcscmp-avx2-rtm \ + wcslen-avx2-rtm \ + wcsncmp-avx2-rtm \ + wcsnlen-avx2-rtm \ + wcsrchr-avx2-rtm \ + wmemchr-avx2-rtm \ + wmemcmp-avx2-movbe-rtm \ + wcschr-evex \ + wcscmp-evex \ + wcslen-evex \ + wcsncmp-evex \ + wcsnlen-evex \ + wcsrchr-evex \ + wmemchr-evex \ + wmemcmp-evex-movbe endif ifeq ($(subdir),debug) diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h index 69f30398a..925e5b61e 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h @@ -21,16 +21,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } return OPTIMIZE (sse2); } diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index ce7eb1eec..e712b148f 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -41,8 +41,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, IFUNC_IMPL_ADD (array, i, memchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __memchr_avx2) + IFUNC_IMPL_ADD (array, i, memchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __memchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, memchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __memchr_evex) IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2)) /* Support sysdeps/x86_64/multiarch/memcmp.c. */ @@ -51,6 +62,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (HAS_ARCH_FEATURE (AVX2_Usable) && HAS_CPU_FEATURE (MOVBE)), __memcmp_avx2_movbe) + IFUNC_IMPL_ADD (array, i, memcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE) + && HAS_CPU_FEATURE (RTM)), + __memcmp_avx2_movbe_rtm) + IFUNC_IMPL_ADD (array, i, memcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __memcmp_evex_movbe) IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1), __memcmp_sse4_1) IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), @@ -64,10 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -75,6 +96,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), __memmove_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_CPU_FEATURE (SSSE3), __memmove_chk_ssse3_back) @@ -97,14 +132,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX_Usable), __memmove_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memmove, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memmove, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memmove, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) @@ -119,8 +168,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memrchr.c. */ IFUNC_IMPL (i, name, memrchr, IFUNC_IMPL_ADD (array, i, memrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __memrchr_avx2) + IFUNC_IMPL_ADD (array, i, memrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __memrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, memrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __memrchr_evex) + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) #ifdef SHARED @@ -139,10 +200,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __memset_chk_avx2_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_chk_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_chk_avx2_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -164,10 +243,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __memset_avx2_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_avx2_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -177,22 +274,55 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */ IFUNC_IMPL (i, name, rawmemchr, IFUNC_IMPL_ADD (array, i, rawmemchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __rawmemchr_avx2) + IFUNC_IMPL_ADD (array, i, rawmemchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __rawmemchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, rawmemchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __rawmemchr_evex) IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) /* Support sysdeps/x86_64/multiarch/strlen.c. */ IFUNC_IMPL (i, name, strlen, IFUNC_IMPL_ADD (array, i, strlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strlen_avx2) + IFUNC_IMPL_ADD (array, i, strlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strlen_evex) IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) /* Support sysdeps/x86_64/multiarch/strnlen.c. */ IFUNC_IMPL (i, name, strnlen, IFUNC_IMPL_ADD (array, i, strnlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strnlen_avx2) + IFUNC_IMPL_ADD (array, i, strnlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strnlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strnlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strnlen_evex) IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ @@ -201,6 +331,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpncpy_ssse3) IFUNC_IMPL_ADD (array, i, stpncpy, HAS_ARCH_FEATURE (AVX2_Usable), __stpncpy_avx2) + IFUNC_IMPL_ADD (array, i, stpncpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __stpncpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, stpncpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) @@ -211,6 +349,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpcpy_ssse3) IFUNC_IMPL_ADD (array, i, stpcpy, HAS_ARCH_FEATURE (AVX2_Usable), __stpcpy_avx2) + IFUNC_IMPL_ADD (array, i, stpcpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __stpcpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, stpcpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __stpcpy_evex) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2)) @@ -245,6 +391,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcat, IFUNC_IMPL_ADD (array, i, strcat, HAS_ARCH_FEATURE (AVX2_Usable), __strcat_avx2) + IFUNC_IMPL_ADD (array, i, strcat, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcat_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcat, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strcat_evex) IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), __strcat_ssse3) IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned) @@ -253,23 +407,56 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strchr.c. */ IFUNC_IMPL (i, name, strchr, IFUNC_IMPL_ADD (array, i, strchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strchr_avx2) + IFUNC_IMPL_ADD (array, i, strchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strchr_evex) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ IFUNC_IMPL (i, name, strchrnul, IFUNC_IMPL_ADD (array, i, strchrnul, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strchrnul_avx2) + IFUNC_IMPL_ADD (array, i, strchrnul, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strchrnul_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strchrnul, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strchrnul_evex) IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) /* Support sysdeps/x86_64/multiarch/strrchr.c. */ IFUNC_IMPL (i, name, strrchr, IFUNC_IMPL_ADD (array, i, strrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strrchr_avx2) + IFUNC_IMPL_ADD (array, i, strrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strrchr_evex) IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) /* Support sysdeps/x86_64/multiarch/strcmp.c. */ @@ -277,6 +464,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strcmp, HAS_ARCH_FEATURE (AVX2_Usable), __strcmp_avx2) + IFUNC_IMPL_ADD (array, i, strcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strcmp_evex) IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), __strcmp_sse42) IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), @@ -288,6 +484,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcpy, IFUNC_IMPL_ADD (array, i, strcpy, HAS_ARCH_FEATURE (AVX2_Usable), __strcpy_avx2) + IFUNC_IMPL_ADD (array, i, strcpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strcpy_evex) IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), __strcpy_ssse3) IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned) @@ -331,6 +535,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strncat, IFUNC_IMPL_ADD (array, i, strncat, HAS_ARCH_FEATURE (AVX2_Usable), __strncat_avx2) + IFUNC_IMPL_ADD (array, i, strncat, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncat_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncat, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), __strncat_ssse3) IFUNC_IMPL_ADD (array, i, strncat, 1, @@ -341,6 +553,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strncpy, IFUNC_IMPL_ADD (array, i, strncpy, HAS_ARCH_FEATURE (AVX2_Usable), __strncpy_avx2) + IFUNC_IMPL_ADD (array, i, strncpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), __strncpy_ssse3) IFUNC_IMPL_ADD (array, i, strncpy, 1, @@ -368,29 +588,73 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcschr.c. */ IFUNC_IMPL (i, name, wcschr, IFUNC_IMPL_ADD (array, i, wcschr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcschr_avx2) + IFUNC_IMPL_ADD (array, i, wcschr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcschr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcschr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcschr_evex) IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ IFUNC_IMPL (i, name, wcsrchr, IFUNC_IMPL_ADD (array, i, wcsrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsrchr_avx2) + IFUNC_IMPL_ADD (array, i, wcsrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsrchr_evex) IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ IFUNC_IMPL (i, name, wcscmp, IFUNC_IMPL_ADD (array, i, wcscmp, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcscmp_avx2) + IFUNC_IMPL_ADD (array, i, wcscmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcscmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcscmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcscmp_evex) IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ IFUNC_IMPL (i, name, wcsncmp, IFUNC_IMPL_ADD (array, i, wcsncmp, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsncmp_avx2) + IFUNC_IMPL_ADD (array, i, wcsncmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsncmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsncmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsncmp_evex) IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ @@ -402,15 +666,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, IFUNC_IMPL_ADD (array, i, wcslen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcslen_avx2) + IFUNC_IMPL_ADD (array, i, wcslen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcslen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcslen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcslen_evex) + IFUNC_IMPL_ADD (array, i, wcslen, + CPU_FEATURE_USABLE (SSE4_1), + __wcslen_sse4_1) IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ IFUNC_IMPL (i, name, wcsnlen, IFUNC_IMPL_ADD (array, i, wcsnlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsnlen_avx2) + IFUNC_IMPL_ADD (array, i, wcsnlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsnlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsnlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsnlen_evex) IFUNC_IMPL_ADD (array, i, wcsnlen, HAS_CPU_FEATURE (SSE4_1), __wcsnlen_sse4_1) @@ -419,8 +708,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, IFUNC_IMPL_ADD (array, i, wmemchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wmemchr_avx2) + IFUNC_IMPL_ADD (array, i, wmemchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wmemchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wmemchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wmemchr_evex) IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2)) /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */ @@ -429,6 +729,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (HAS_ARCH_FEATURE (AVX2_Usable) && HAS_CPU_FEATURE (MOVBE)), __wmemcmp_avx2_movbe) + IFUNC_IMPL_ADD (array, i, wmemcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE) + && HAS_CPU_FEATURE (RTM)), + __wmemcmp_avx2_movbe_rtm) + IFUNC_IMPL_ADD (array, i, wmemcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __wmemcmp_evex_movbe) IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1), __wmemcmp_sse4_1) IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), @@ -443,7 +753,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __wmemset_avx2_unaligned) IFUNC_IMPL_ADD (array, i, wmemset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __wmemset_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, wmemset, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __wmemset_evex_unaligned) + IFUNC_IMPL_ADD (array, i, wmemset, + HAS_ARCH_FEATURE (AVX512VL_Usable), __wmemset_avx512_unaligned)) #ifdef SHARED @@ -453,10 +770,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -464,6 +781,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __memcpy_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_CPU_FEATURE (SSSE3), __memcpy_chk_ssse3_back) @@ -486,6 +817,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX_Usable), __memcpy_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), __memcpy_ssse3_back) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), @@ -494,10 +839,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, @@ -511,10 +856,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -522,6 +867,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_CPU_FEATURE (SSSE3), __mempcpy_chk_ssse3_back) @@ -542,10 +901,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, mempcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_avx512_unaligned) IFUNC_IMPL_ADD (array, i, mempcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), @@ -553,6 +912,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, mempcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_evex_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), __mempcpy_ssse3_back) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), @@ -568,6 +941,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strncmp, HAS_ARCH_FEATURE (AVX2_Usable), __strncmp_avx2) + IFUNC_IMPL_ADD (array, i, strncmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncmp_evex) IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), __strncmp_sse42) IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), @@ -582,6 +963,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __wmemset_chk, HAS_ARCH_FEATURE (AVX2_Usable), __wmemset_chk_avx2_unaligned) + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __wmemset_chk_evex_unaligned) IFUNC_IMPL_ADD (array, i, __wmemset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __wmemset_chk_avx512_unaligned)) diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h index c14db39cf..ebbb0c01c 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h +++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h @@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_CPU_P (cpu_features, MOVBE) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2_movbe); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex_movbe); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_movbe_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2_movbe); + } if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h index 81673d201..dfc5a2848 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memmove.h +++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h @@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) @@ -48,21 +56,42 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) { - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_no_vzeroupper); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx512_unaligned_erms); + return OPTIMIZE (avx512_unaligned); + } - return OPTIMIZE (avx512_unaligned); + return OPTIMIZE (avx512_no_vzeroupper); } if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) { - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx_unaligned_erms); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (evex_unaligned_erms); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx_unaligned_erms_rtm); + + return OPTIMIZE (avx_unaligned_rtm); + } + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx_unaligned_erms); - return OPTIMIZE (avx_unaligned); + return OPTIMIZE (avx_unaligned); + } } if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h index d69029338..48fdb24b0 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memset.h +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) @@ -45,21 +53,44 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) { - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_no_vzeroupper); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx512_unaligned_erms); + return OPTIMIZE (avx512_unaligned); + } - return OPTIMIZE (avx512_unaligned); + return OPTIMIZE (avx512_no_vzeroupper); } if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) { - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx2_unaligned_erms); - else - return OPTIMIZE (avx2_unaligned); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (evex_unaligned_erms); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms_rtm); + + return OPTIMIZE (avx2_unaligned_rtm); + } + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms); + + return OPTIMIZE (avx2_unaligned); + } } if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index ae4f45180..f38a3b750 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h new file mode 100644 index 000000000..564cc8cbe --- /dev/null +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -0,0 +1,52 @@ +/* Common definition for ifunc selections for wcslen and wcsnlen + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4_1); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h index 583f6310a..0ce29a229 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h @@ -20,6 +20,9 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; static inline void * @@ -27,14 +30,21 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) { - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) - return OPTIMIZE (avx512_unaligned); - else + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + return OPTIMIZE (avx512_unaligned); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_unaligned_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) return OPTIMIZE (avx2_unaligned); } diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S new file mode 100644 index 000000000..87b076c7c --- /dev/null +++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMCHR +# define MEMCHR __memchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S index e5a9abd21..0987616a1 100644 --- a/sysdeps/x86_64/multiarch/memchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S @@ -26,319 +26,407 @@ # ifdef USE_AS_WMEMCHR # define VPCMPEQ vpcmpeqd +# define VPBROADCAST vpbroadcastd +# define CHAR_SIZE 4 # else # define VPCMPEQ vpcmpeqb +# define VPBROADCAST vpbroadcastb +# define CHAR_SIZE 1 +# endif + +# ifdef USE_AS_RAWMEMCHR +# define ERAW_PTR_REG ecx +# define RRAW_PTR_REG rcx +# define ALGN_PTR_REG rdi +# else +# define ERAW_PTR_REG edi +# define RRAW_PTR_REG rdi +# define ALGN_PTR_REG rcx # endif # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (MEMCHR) # ifndef USE_AS_RAWMEMCHR /* Check for zero length. */ +# ifdef __ILP32__ + /* Clear upper bits. */ + and %RDX_LP, %RDX_LP +# else test %RDX_LP, %RDX_LP +# endif jz L(null) # endif - movl %edi, %ecx - /* Broadcast CHAR to YMM0. */ + /* Broadcast CHAR to YMMMATCH. */ vmovd %esi, %xmm0 -# ifdef USE_AS_WMEMCHR - shl $2, %RDX_LP - vpbroadcastd %xmm0, %ymm0 -# else -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %edx, %edx -# endif - vpbroadcastb %xmm0, %ymm0 -# endif + VPBROADCAST %xmm0, %ymm0 /* Check if we may cross page boundary with one vector load. */ - andl $(2 * VEC_SIZE - 1), %ecx - cmpl $VEC_SIZE, %ecx - ja L(cros_page_boundary) + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) /* Check the first VEC_SIZE bytes. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 + VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - # ifndef USE_AS_RAWMEMCHR - jnz L(first_vec_x0_check) - /* Adjust length and check the end of data. */ - subq $VEC_SIZE, %rdx - jbe L(zero) -# else - jnz L(first_vec_x0) + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rdx + jbe L(first_vec_x0) # endif - - /* Align data for aligned loads in the loop. */ - addq $VEC_SIZE, %rdi - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax + addq %rdi, %rax + VZEROUPPER_RETURN # ifndef USE_AS_RAWMEMCHR - /* Adjust length. */ - addq %rcx, %rdx + .p2align 5 +L(first_vec_x0): + /* Check if first match was before length. */ + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + xorl %ecx, %ecx + cmpl %eax, %edx + leaq (%rdi, %rax), %rax + cmovle %rcx, %rax + VZEROUPPER_RETURN - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) +L(null): + xorl %eax, %eax + ret # endif - jmp L(more_4x_vec) - .p2align 4 -L(cros_page_boundary): - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 +L(cross_page_boundary): + /* Save pointer before aligning as its original value is + necessary for computer return address if byte is found or + adjusting length if it is not and this is memchr. */ + movq %rdi, %rcx + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr + and rdi for rawmemchr. */ + orq $(VEC_SIZE - 1), %ALGN_PTR_REG + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 vpmovmskb %ymm1, %eax +# ifndef USE_AS_RAWMEMCHR + /* Calculate length until end of page (length checked for a + match). */ + leaq 1(%ALGN_PTR_REG), %rsi + subq %RRAW_PTR_REG, %rsi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %esi +# endif +# endif /* Remove the leading bytes. */ - sarl %cl, %eax - testl %eax, %eax - jz L(aligned_more) - tzcntl %eax, %eax + sarxl %ERAW_PTR_REG, %eax, %eax # ifndef USE_AS_RAWMEMCHR /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) + cmpq %rsi, %rdx + jbe L(first_vec_x0) # endif + testl %eax, %eax + jz L(cross_page_continue) + tzcntl %eax, %eax + addq %RRAW_PTR_REG, %rax +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + incq %rdi addq %rdi, %rax - addq %rcx, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(aligned_more): -# ifndef USE_AS_RAWMEMCHR - /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)" - instead of "(rdx + rcx) - VEC_SIZE" to void possible addition - overflow. */ - negq %rcx - addq $VEC_SIZE, %rcx +L(first_vec_x2): + tzcntl %eax, %eax + addq $(VEC_SIZE + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN - /* Check the end of data. */ - subq %rcx, %rdx - jbe L(zero) -# endif + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + addq $(VEC_SIZE * 2 + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN - addq $VEC_SIZE, %rdi -# ifndef USE_AS_RAWMEMCHR - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) -# endif + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + addq $(VEC_SIZE * 3 + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN -L(more_4x_vec): + .p2align 4 +L(aligned_more): /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time since data is only aligned to VEC_SIZE. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 +# ifndef USE_AS_RAWMEMCHR +L(cross_page_continue): + /* Align data to VEC_SIZE - 1. */ + xorl %ecx, %ecx + subl %edi, %ecx + orq $(VEC_SIZE - 1), %rdi + /* esi is for adjusting length to see if near the end. */ + leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %esi +# endif +# else + orq $(VEC_SIZE - 1), %rdi +L(cross_page_continue): +# endif + /* Load first VEC regardless. */ + VPCMPEQ 1(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax +# ifndef USE_AS_RAWMEMCHR + /* Adjust length. If near end handle specially. */ + subq %rsi, %rdx + jbe L(last_4x_vec_or_less) +# endif testl %eax, %eax jnz L(first_vec_x1) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x2) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x3) - addq $(VEC_SIZE * 4), %rdi - -# ifndef USE_AS_RAWMEMCHR - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) -# endif - - /* Align data to 4 * VEC_SIZE. */ - movq %rdi, %rcx - andl $(4 * VEC_SIZE - 1), %ecx - andq $-(4 * VEC_SIZE), %rdi + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x4) # ifndef USE_AS_RAWMEMCHR - /* Adjust length. */ + /* Check if at last VEC_SIZE * 4 length. */ + subq $(CHAR_PER_VEC * 4), %rdx + jbe L(last_4x_vec_or_less_cmpeq) + /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust + length. */ + incq %rdi + movl %edi, %ecx + orq $(VEC_SIZE * 4 - 1), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif addq %rcx, %rdx +# else + /* Align data to VEC_SIZE * 4 - 1 for loop. */ + incq %rdi + orq $(VEC_SIZE * 4 - 1), %rdi # endif + /* Compare 4 * VEC at a time forward. */ .p2align 4 L(loop_4x_vec): - /* Compare 4 * VEC at a time forward. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2 - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3 - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4 - + VPCMPEQ 1(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3 + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4 vpor %ymm1, %ymm2, %ymm5 vpor %ymm3, %ymm4, %ymm6 vpor %ymm5, %ymm6, %ymm5 - vpmovmskb %ymm5, %eax - testl %eax, %eax - jnz L(4x_vec_end) - - addq $(VEC_SIZE * 4), %rdi - + vpmovmskb %ymm5, %ecx # ifdef USE_AS_RAWMEMCHR - jmp L(loop_4x_vec) + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) # else - subq $(VEC_SIZE * 4), %rdx - ja L(loop_4x_vec) + testl %ecx, %ecx + jnz L(loop_4x_vec_end) -L(last_4x_vec_or_less): - /* Less than 4 * VEC and aligned to VEC_SIZE. */ - addl $(VEC_SIZE * 2), %edx - jle L(last_2x_vec) + subq $-(VEC_SIZE * 4), %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) + subq $(CHAR_PER_VEC * 4), %rdx + ja L(loop_4x_vec) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + /* Fall through into less than 4 remaining vectors of length + case. */ + VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + .p2align 4 +L(last_4x_vec_or_less): +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + /* Check if first VEC contained match. */ testl %eax, %eax - jnz L(first_vec_x1) + jnz L(first_vec_x1_check) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax + /* If remaining length > VEC_SIZE * 2. */ + addl $(VEC_SIZE * 2), %edx + jg L(last_4x_vec) - jnz L(first_vec_x2_check) - subl $VEC_SIZE, %edx - jle L(zero) +L(last_2x_vec): + /* If remaining length < VEC_SIZE. */ + addl $VEC_SIZE, %edx + jle L(zero_end) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + /* Check VEC2 and compare any match with remaining length. */ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - - jnz L(first_vec_x3_check) - xorl %eax, %eax - VZEROUPPER - ret + tzcntl %eax, %eax + cmpl %eax, %edx + jbe L(set_zero_end) + addq $(VEC_SIZE + 1), %rdi + addq %rdi, %rax +L(zero_end): + VZEROUPPER_RETURN .p2align 4 -L(last_2x_vec): - addl $(VEC_SIZE * 2), %edx - VPCMPEQ (%rdi), %ymm0, %ymm1 +L(loop_4x_vec_end): +# endif + /* rawmemchr will fall through into this if match was found in + loop. */ + vpmovmskb %ymm1, %eax testl %eax, %eax + jnz L(last_vec_x1_return) - jnz L(first_vec_x0_check) - subl $VEC_SIZE, %edx - jle L(zero) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax + vpmovmskb %ymm2, %eax testl %eax, %eax - jnz L(first_vec_x1_check) - xorl %eax, %eax - VZEROUPPER - ret + jnz L(last_vec_x2_return) - .p2align 4 -L(first_vec_x0_check): - tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) + vpmovmskb %ymm3, %eax + /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */ + salq $32, %rcx + orq %rcx, %rax + tzcntq %rax, %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 2 - 1), %rdi +# else + subq $-(VEC_SIZE * 2 + 1), %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# ifndef USE_AS_RAWMEMCHR .p2align 4 L(first_vec_x1_check): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $VEC_SIZE, %rax + /* Adjust length. */ + subl $-(VEC_SIZE * 4), %edx + /* Check if match within remaining length. */ + cmpl %eax, %edx + jbe L(set_zero_end) + incq %rdi addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN + .p2align 4 +L(set_zero_end): + xorl %eax, %eax + VZEROUPPER_RETURN +# endif .p2align 4 -L(first_vec_x2_check): +L(last_vec_x1_return): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $(VEC_SIZE * 2), %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 4 - 1), %rdi +# else + incq %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x3_check): +L(last_vec_x2_return): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $(VEC_SIZE * 3), %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 3 - 1), %rdi +# else + subq $-(VEC_SIZE + 1), %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# ifndef USE_AS_RAWMEMCHR .p2align 4 -L(zero): - VZEROUPPER -L(null): - xorl %eax, %eax - ret -# endif +L(last_4x_vec_or_less_cmpeq): + VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + subq $-(VEC_SIZE * 4), %rdi + /* Check first VEC regardless. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + /* If remaining length <= CHAR_PER_VEC * 2. */ + addl $(VEC_SIZE * 2), %edx + jle L(last_2x_vec) .p2align 4 -L(first_vec_x0): - tzcntl %eax, %eax - addq %rdi, %rax - VZEROUPPER - ret +L(last_4x_vec): + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x2_return) - .p2align 4 -L(first_vec_x1): - tzcntl %eax, %eax - addq $VEC_SIZE, %rax - addq %rdi, %rax - VZEROUPPER - ret + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax - .p2align 4 -L(first_vec_x2): + /* Create mask for possible matches within remaining length. */ + movq $-1, %rcx + bzhiq %rdx, %rcx, %rcx + + /* Test matches in data against length match. */ + andl %ecx, %eax + jnz L(last_vec_x3) + + /* if remaining length <= VEC_SIZE * 3 (Note this is after + remaining length was found to be > VEC_SIZE * 2. */ + subl $VEC_SIZE, %edx + jbe L(zero_end2) + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + /* Shift remaining length mask for last VEC. */ + shrq $32, %rcx + andl %ecx, %eax + jz L(zero_end2) tzcntl %eax, %eax - addq $(VEC_SIZE * 2), %rax + addq $(VEC_SIZE * 3 + 1), %rdi addq %rdi, %rax - VZEROUPPER - ret +L(zero_end2): + VZEROUPPER_RETURN .p2align 4 -L(4x_vec_end): - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - vpmovmskb %ymm2, %eax - testl %eax, %eax - jnz L(first_vec_x1) - vpmovmskb %ymm3, %eax - testl %eax, %eax - jnz L(first_vec_x2) - vpmovmskb %ymm4, %eax - testl %eax, %eax -L(first_vec_x3): +L(last_vec_x3): tzcntl %eax, %eax - addq $(VEC_SIZE * 3), %rax + subq $-(VEC_SIZE * 2 + 1), %rdi addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# endif END (MEMCHR) #endif diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S new file mode 100644 index 000000000..f3fdad4fd --- /dev/null +++ b/sysdeps/x86_64/multiarch/memchr-evex.S @@ -0,0 +1,478 @@ +/* memchr/wmemchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef MEMCHR +# define MEMCHR __memchr_evex +# endif + +# ifdef USE_AS_WMEMCHR +# define VPBROADCAST vpbroadcastd +# define VPMINU vpminud +# define VPCMP vpcmpd +# define VPCMPEQ vpcmpeqd +# define CHAR_SIZE 4 +# else +# define VPBROADCAST vpbroadcastb +# define VPMINU vpminub +# define VPCMP vpcmpb +# define VPCMPEQ vpcmpeqb +# define CHAR_SIZE 1 +# endif + +# ifdef USE_AS_RAWMEMCHR +# define RAW_PTR_REG rcx +# define ALGN_PTR_REG rdi +# else +# define RAW_PTR_REG rdi +# define ALGN_PTR_REG rcx +# endif + +# define XMMZERO xmm23 +# define YMMZERO ymm23 +# define XMMMATCH xmm16 +# define YMMMATCH ymm16 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) +# define PAGE_SIZE 4096 + + .section .text.evex,"ax",@progbits +ENTRY (MEMCHR) +# ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ + test %RDX_LP, %RDX_LP + jz L(zero) + +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif +# endif + /* Broadcast CHAR to YMMMATCH. */ + VPBROADCAST %esi, %YMMMATCH + /* Check if we may cross page boundary with one vector load. */ + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. */ + VPCMP $0, (%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax +# ifndef USE_AS_RAWMEMCHR + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rdx + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%rdi, %rax, CHAR_SIZE), %rax +# else + addq %rdi, %rax +# endif + ret + +# ifndef USE_AS_RAWMEMCHR +L(zero): + xorl %eax, %eax + ret + + .p2align 5 +L(first_vec_x0): + /* Check if first match was before length. */ + tzcntl %eax, %eax + xorl %ecx, %ecx + cmpl %eax, %edx + leaq (%rdi, %rax, CHAR_SIZE), %rax + cmovle %rcx, %rax + ret +# else + /* NB: first_vec_x0 is 17 bytes which will leave + cross_page_boundary (which is relatively cold) close enough + to ideal alignment. So only realign L(cross_page_boundary) if + rawmemchr. */ + .p2align 4 +# endif +L(cross_page_boundary): + /* Save pointer before aligning as its original value is + necessary for computer return address if byte is found or + adjusting length if it is not and this is memchr. */ + movq %rdi, %rcx + /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi + for rawmemchr. */ + andq $-VEC_SIZE, %ALGN_PTR_REG + VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0 + kmovd %k0, %r8d +# ifdef USE_AS_WMEMCHR + /* NB: Divide shift count by 4 since each bit in K0 represent 4 + bytes. */ + sarl $2, %eax +# endif +# ifndef USE_AS_RAWMEMCHR + movl $(PAGE_SIZE / CHAR_SIZE), %esi + subl %eax, %esi +# endif +# ifdef USE_AS_WMEMCHR + andl $(CHAR_PER_VEC - 1), %eax +# endif + /* Remove the leading bytes. */ + sarxl %eax, %r8d, %eax +# ifndef USE_AS_RAWMEMCHR + /* Check the end of data. */ + cmpq %rsi, %rdx + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(cross_page_continue) + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax +# else + addq %RAW_PTR_REG, %rax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 5 +L(aligned_more): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + +# ifndef USE_AS_RAWMEMCHR + /* Align data to VEC_SIZE. */ +L(cross_page_continue): + xorl %ecx, %ecx + subl %edi, %ecx + andq $-VEC_SIZE, %rdi + /* esi is for adjusting length to see if near the end. */ + leal (VEC_SIZE * 5)(%rdi, %rcx), %esi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %esi +# endif +# else + andq $-VEC_SIZE, %rdi +L(cross_page_continue): +# endif + /* Load first VEC regardless. */ + VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax +# ifndef USE_AS_RAWMEMCHR + /* Adjust length. If near end handle specially. */ + subq %rsi, %rdx + jbe L(last_4x_vec_or_less) +# endif + testl %eax, %eax + jnz L(first_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x2) + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x3) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x4) + + +# ifndef USE_AS_RAWMEMCHR + /* Check if at last CHAR_PER_VEC * 4 length. */ + subq $(CHAR_PER_VEC * 4), %rdx + jbe L(last_4x_vec_or_less_cmpeq) + addq $VEC_SIZE, %rdi + + /* Align data to VEC_SIZE * 4 for the loop and readjust length. + */ +# ifdef USE_AS_WMEMCHR + movl %edi, %ecx + andq $-(4 * VEC_SIZE), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx + addq %rcx, %rdx +# else + addq %rdi, %rdx + andq $-(4 * VEC_SIZE), %rdi + subq %rdi, %rdx +# endif +# else + addq $VEC_SIZE, %rdi + andq $-(4 * VEC_SIZE), %rdi +# endif + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Compare 4 * VEC at a time forward. */ + .p2align 4 +L(loop_4x_vec): + /* It would be possible to save some instructions using 4x VPCMP + but bottleneck on port 5 makes it not woth it. */ + VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1 + /* xor will set bytes match esi to zero. */ + vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2 + vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3 + VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3 + /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */ + VPMINU %YMM2, %YMM3, %YMM3{%k1}{z} + VPCMP $0, %YMM3, %YMMZERO, %k2 +# ifdef USE_AS_RAWMEMCHR + subq $-(VEC_SIZE * 4), %rdi + kortestd %k2, %k3 + jz L(loop_4x_vec) +# else + kortestd %k2, %k3 + jnz L(loop_4x_vec_end) + + subq $-(VEC_SIZE * 4), %rdi + + subq $(CHAR_PER_VEC * 4), %rdx + ja L(loop_4x_vec) + + /* Fall through into less than 4 remaining vectors of length case. + */ + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + addq $(VEC_SIZE * 3), %rdi + .p2align 4 +L(last_4x_vec_or_less): + /* Check if first VEC contained match. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + + /* If remaining length > CHAR_PER_VEC * 2. */ + addl $(CHAR_PER_VEC * 2), %edx + jg L(last_4x_vec) + +L(last_2x_vec): + /* If remaining length < CHAR_PER_VEC. */ + addl $CHAR_PER_VEC, %edx + jle L(zero_end) + + /* Check VEC2 and compare any match with remaining length. */ + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + cmpl %eax, %edx + jbe L(set_zero_end) + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax +L(zero_end): + ret + + + .p2align 4 +L(first_vec_x1_check): + tzcntl %eax, %eax + /* Adjust length. */ + subl $-(CHAR_PER_VEC * 4), %edx + /* Check if match within remaining length. */ + cmpl %eax, %edx + jbe L(set_zero_end) + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + ret +L(set_zero_end): + xorl %eax, %eax + ret + + .p2align 4 +L(loop_4x_vec_end): +# endif + /* rawmemchr will fall through into this if match was found in + loop. */ + + /* k1 has not of matches with VEC1. */ + kmovd %k1, %eax +# ifdef USE_AS_WMEMCHR + subl $((1 << CHAR_PER_VEC) - 1), %eax +# else + incl %eax +# endif + jnz L(last_vec_x1_return) + + VPCMP $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2_return) + + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x3_return) + + kmovd %k3, %eax + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax +# else + leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x1_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%rdi, %rax, CHAR_SIZE), %rax +# else + addq %rdi, %rax +# endif +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x2_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x3_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + +# ifndef USE_AS_RAWMEMCHR +L(last_4x_vec_or_less_cmpeq): + VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + subq $-(VEC_SIZE * 4), %rdi + /* Check first VEC regardless. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + + /* If remaining length <= CHAR_PER_VEC * 2. */ + addl $(CHAR_PER_VEC * 2), %edx + jle L(last_2x_vec) + + .p2align 4 +L(last_4x_vec): + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + /* Create mask for possible matches within remaining length. */ +# ifdef USE_AS_WMEMCHR + movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx + bzhil %edx, %ecx, %ecx +# else + movq $-1, %rcx + bzhiq %rdx, %rcx, %rcx +# endif + /* Test matches in data against length match. */ + andl %ecx, %eax + jnz L(last_vec_x3) + + /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after + remaining length was found to be > CHAR_PER_VEC * 2. */ + subl $CHAR_PER_VEC, %edx + jbe L(zero_end2) + + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + /* Shift remaining length mask for last VEC. */ +# ifdef USE_AS_WMEMCHR + shrl $CHAR_PER_VEC, %ecx +# else + shrq $CHAR_PER_VEC, %rcx +# endif + andl %ecx, %eax + jz L(zero_end2) + tzcntl %eax, %eax + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax +L(zero_end2): + ret + +L(last_vec_x2): + tzcntl %eax, %eax + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(last_vec_x3): + tzcntl %eax, %eax + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + ret +# endif + +END (MEMCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S new file mode 100644 index 000000000..cf4eff5d4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMCMP +# define MEMCMP __memcmp_avx2_movbe_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memcmp-avx2-movbe.S" diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S index 67fc575b5..87f9478ea 100644 --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S @@ -47,6 +47,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 # define VEC_MASK ((1 << VEC_SIZE) - 1) @@ -55,7 +59,7 @@ memcmp has to use UNSIGNED comparison for elemnts. */ - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP shl $2, %RDX_LP @@ -123,8 +127,8 @@ ENTRY (MEMCMP) vptest %ymm0, %ymm5 jnc L(4x_vec_end) xorl %eax, %eax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(last_2x_vec): @@ -144,8 +148,7 @@ L(last_vec): vpmovmskb %ymm2, %eax subl $VEC_MASK, %eax jnz L(first_vec) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec): @@ -164,8 +167,7 @@ L(wmemcmp_return): movzbl (%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_WMEMCMP .p2align 4 @@ -367,8 +369,7 @@ L(last_4x_vec): vpmovmskb %ymm2, %eax subl $VEC_MASK, %eax jnz L(first_vec) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(4x_vec_end): @@ -394,8 +395,7 @@ L(4x_vec_end): movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x1): @@ -410,8 +410,7 @@ L(first_vec_x1): movzbl VEC_SIZE(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x2): @@ -426,7 +425,6 @@ L(first_vec_x2): movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN END (MEMCMP) #endif diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S new file mode 100644 index 000000000..9c093972e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S @@ -0,0 +1,440 @@ +/* memcmp/wmemcmp optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +/* memcmp/wmemcmp is implemented as: + 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap + to avoid branches. + 2. Use overlapping compare to avoid branch. + 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8 + bytes for wmemcmp. + 4. If size is 8 * VEC_SIZE or less, unroll the loop. + 5. Compare 4 * VEC_SIZE at a time with the aligned first memory + area. + 6. Use 2 vector compares when size is 2 * VEC_SIZE or less. + 7. Use 4 vector compares when size is 4 * VEC_SIZE or less. + 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */ + +# include + +# ifndef MEMCMP +# define MEMCMP __memcmp_evex_movbe +# endif + +# define VMOVU vmovdqu64 + +# ifdef USE_AS_WMEMCMP +# define VPCMPEQ vpcmpeqd +# else +# define VPCMPEQ vpcmpeqb +# endif + +# define XMM1 xmm17 +# define XMM2 xmm18 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# ifdef USE_AS_WMEMCMP +# define VEC_MASK 0xff +# define XMM_MASK 0xf +# else +# define VEC_MASK 0xffffffff +# define XMM_MASK 0xffff +# endif + +/* Warning! + wmemcmp has to use SIGNED comparison for elements. + memcmp has to use UNSIGNED comparison for elemnts. +*/ + + .section .text.evex,"ax",@progbits +ENTRY (MEMCMP) +# ifdef USE_AS_WMEMCMP + shl $2, %RDX_LP +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) + + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k1 + kmovd %k1, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + cmpq $(VEC_SIZE * 2), %rdx + jbe L(last_vec) + + /* More than 2 * VEC. */ + cmpq $(VEC_SIZE * 8), %rdx + ja L(more_8x_vec) + cmpq $(VEC_SIZE * 4), %rdx + jb L(last_4x_vec) + + /* From 4 * VEC to 8 * VEC, inclusively. */ + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + + kandd %k1, %k2, %k5 + kandd %k3, %k4, %k6 + kandd %k5, %k6, %k6 + + kmovd %k6, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + + leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi + leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + kandd %k1, %k2, %k5 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + kandd %k3, %k5, %k5 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + kandd %k4, %k5, %k5 + + kmovd %k5, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + xorl %eax, %eax + ret + + .p2align 4 +L(last_2x_vec): + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + +L(last_vec): + /* Use overlapping loads to avoid branches. */ + leaq -VEC_SIZE(%rdi, %rdx), %rdi + leaq -VEC_SIZE(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(first_vec): + /* A byte or int32 is different within 16 or 32 bytes. */ + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (%rdi, %rcx, 4), %edx + cmpl (%rsi, %rcx, 4), %edx +L(wmemcmp_return): + setl %al + negl %eax + orl $1, %eax +# else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + +# ifdef USE_AS_WMEMCMP + .p2align 4 +L(4): + xorl %eax, %eax + movl (%rdi), %edx + cmpl (%rsi), %edx + jne L(wmemcmp_return) + ret +# else + .p2align 4 +L(between_4_7): + /* Load as big endian with overlapping movbe to avoid branches. */ + movbe (%rdi), %eax + movbe (%rsi), %ecx + shlq $32, %rax + shlq $32, %rcx + movbe -4(%rdi, %rdx), %edi + movbe -4(%rsi, %rdx), %esi + orq %rdi, %rax + orq %rsi, %rcx + subq %rcx, %rax + je L(exit) + sbbl %eax, %eax + orl $1, %eax + ret + + .p2align 4 +L(exit): + ret + + .p2align 4 +L(between_2_3): + /* Load as big endian to avoid branches. */ + movzwl (%rdi), %eax + movzwl (%rsi), %ecx + shll $8, %eax + shll $8, %ecx + bswap %eax + bswap %ecx + movb -1(%rdi, %rdx), %al + movb -1(%rsi, %rdx), %cl + /* Subtraction is okay because the upper 8 bits are zero. */ + subl %ecx, %eax + ret + + .p2align 4 +L(1): + movzbl (%rdi), %eax + movzbl (%rsi), %ecx + subl %ecx, %eax + ret +# endif + + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(less_vec): +# ifdef USE_AS_WMEMCMP + /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */ + cmpb $4, %dl + je L(4) + jb L(zero) +# else + cmpb $1, %dl + je L(1) + jb L(zero) + cmpb $4, %dl + jb L(between_2_3) + cmpb $8, %dl + jb L(between_4_7) +# endif + cmpb $16, %dl + jae L(between_16_31) + /* It is between 8 and 15 bytes. */ + vmovq (%rdi), %XMM1 + vmovq (%rsi), %XMM2 + VPCMPEQ %XMM1, %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + /* Use overlapping loads to avoid branches. */ + leaq -8(%rdi, %rdx), %rdi + leaq -8(%rsi, %rdx), %rsi + vmovq (%rdi), %XMM1 + vmovq (%rsi), %XMM2 + VPCMPEQ %XMM1, %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ + VMOVU (%rsi), %XMM2 + VPCMPEQ (%rdi), %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + + /* Use overlapping loads to avoid branches. */ + leaq -16(%rdi, %rdx), %rdi + leaq -16(%rsi, %rdx), %rsi + VMOVU (%rsi), %XMM2 + VPCMPEQ (%rdi), %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(more_8x_vec): + /* More than 8 * VEC. Check the first VEC. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + /* Align the first memory area for aligned loads in the loop. + Compute how much the first memory area is misaligned. */ + movq %rdi, %rcx + andl $(VEC_SIZE - 1), %ecx + /* Get the negative of offset for alignment. */ + subq $VEC_SIZE, %rcx + /* Adjust the second memory area. */ + subq %rcx, %rsi + /* Adjust the first memory area which should be aligned now. */ + subq %rcx, %rdi + /* Adjust length. */ + addq %rcx, %rdx + +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + kandd %k2, %k1, %k5 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + kandd %k3, %k5, %k5 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + kandd %k4, %k5, %k5 + + kmovd %k5, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + + addq $(VEC_SIZE * 4), %rdi + addq $(VEC_SIZE * 4), %rsi + + subq $(VEC_SIZE * 4), %rdx + cmpq $(VEC_SIZE * 4), %rdx + jae L(loop_4x_vec) + + /* Less than 4 * VEC. */ + cmpq $VEC_SIZE, %rdx + jbe L(last_vec) + cmpq $(VEC_SIZE * 2), %rdx + jbe L(last_2x_vec) + +L(last_4x_vec): + /* From 2 * VEC to 4 * VEC. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + /* Use overlapping loads to avoid branches. */ + leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi + leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(4x_vec_end): + kmovd %k1, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec_x1) + kmovd %k3, %eax + subl $VEC_MASK, %eax + jnz L(first_vec_x2) + kmovd %k4, %eax + subl $VEC_MASK, %eax + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx + cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl VEC_SIZE(%rdi, %rcx, 4), %edx + cmpl VEC_SIZE(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx + cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret +END (MEMCMP) +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S new file mode 100644 index 000000000..1ec1962e8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S @@ -0,0 +1,17 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define VEC(i) ymm##i +# define VMOVNT vmovntdq +# define VMOVU vmovdqu +# define VMOVA vmovdqa + +# define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +# define VZEROUPPER_RETURN jmp L(return) + +# define SECTION(p) p##.avx.rtm +# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S index aac1515cf..7dad1ad74 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S @@ -1,11 +1,25 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define XMM1 xmm17 +# define YMM0 ymm16 +# define YMM1 ymm17 +# define VEC0 zmm16 +# define VEC1 zmm17 +# define VEC2 zmm18 +# define VEC3 zmm19 +# define VEC4 zmm20 +# define VEC5 zmm21 +# define VEC6 zmm22 +# define VEC7 zmm23 +# define VEC8 zmm24 +# define VEC(i) VEC##i # define VMOVNT vmovntdq # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s # include "memmove-vec-unaligned-erms.S" diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S new file mode 100644 index 000000000..b879007e8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S @@ -0,0 +1,26 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define XMM0 xmm16 +# define XMM1 xmm17 +# define YMM0 ymm16 +# define YMM1 ymm17 +# define VEC0 ymm16 +# define VEC1 ymm17 +# define VEC2 ymm18 +# define VEC3 ymm19 +# define VEC4 ymm20 +# define VEC5 ymm21 +# define VEC6 ymm22 +# define VEC7 ymm23 +# define VEC8 ymm24 +# define VEC(i) VEC##i +# define VMOVNT vmovntdq +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define SECTION(p) p##.evex +# define MEMMOVE_SYMBOL(p,s) p##_evex_##s + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index c763b7d87..d13d23d6c 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -48,6 +48,14 @@ # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s) #endif +#ifndef XMM0 +# define XMM0 xmm0 +#endif + +#ifndef YMM0 +# define YMM0 ymm0 +#endif + #ifndef VZEROUPPER # if VEC_SIZE > 16 # define VZEROUPPER vzeroupper @@ -67,6 +75,13 @@ # define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16)) #endif +/* Avoid short distance rep movsb only with non-SSE vector. */ +#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB +# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16) +#else +# define AVOID_SHORT_DISTANCE_REP_MOVSB 0 +#endif + #ifndef PREFETCH # define PREFETCH(addr) prefetcht0 addr #endif @@ -143,11 +158,12 @@ L(last_2x_vec): VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) VMOVU %VEC(0), (%rdi) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) - VZEROUPPER #if !defined USE_MULTIARCH || !IS_IN (libc) L(nop): -#endif ret +#else + VZEROUPPER_RETURN +#endif #if defined USE_MULTIARCH && IS_IN (libc) END (MEMMOVE_SYMBOL (__memmove, unaligned)) @@ -240,11 +256,14 @@ L(last_2x_vec): VMOVU %VEC(0), (%rdi) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) L(return): - VZEROUPPER +#if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +#else ret +#endif L(movsb): - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP jae L(more_8x_vec) cmpq %rsi, %rdi jb 1f @@ -257,7 +276,21 @@ L(movsb): # error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE! # endif jb L(more_8x_vec_backward) +# if AVOID_SHORT_DISTANCE_REP_MOVSB + movq %rdi, %rcx + subq %rsi, %rcx + jmp 2f +# endif 1: +# if AVOID_SHORT_DISTANCE_REP_MOVSB + movq %rsi, %rcx + subq %rdi, %rcx +2: +/* Avoid "rep movsb" if RCX, the distance between source and destination, + is N*4GB + [1..63] with N >= 0. */ + cmpl $63, %ecx + jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ +# endif mov %RDX_LP, %RCX_LP rep movsb L(nop): @@ -291,21 +324,20 @@ L(less_vec): #if VEC_SIZE > 32 L(between_32_63): /* From 32 to 63. No branch when size == 32. */ - vmovdqu (%rsi), %ymm0 - vmovdqu -32(%rsi,%rdx), %ymm1 - vmovdqu %ymm0, (%rdi) - vmovdqu %ymm1, -32(%rdi,%rdx) - VZEROUPPER - ret + VMOVU (%rsi), %YMM0 + VMOVU -32(%rsi,%rdx), %YMM1 + VMOVU %YMM0, (%rdi) + VMOVU %YMM1, -32(%rdi,%rdx) + VZEROUPPER_RETURN #endif #if VEC_SIZE > 16 /* From 16 to 31. No branch when size == 16. */ L(between_16_31): - vmovdqu (%rsi), %xmm0 - vmovdqu -16(%rsi,%rdx), %xmm1 - vmovdqu %xmm0, (%rdi) - vmovdqu %xmm1, -16(%rdi,%rdx) - ret + VMOVU (%rsi), %XMM0 + VMOVU -16(%rsi,%rdx), %XMM1 + VMOVU %XMM0, (%rdi) + VMOVU %XMM1, -16(%rdi,%rdx) + VZEROUPPER_RETURN #endif L(between_8_15): /* From 8 to 15. No branch when size == 8. */ @@ -358,8 +390,7 @@ L(more_2x_vec): VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) - VZEROUPPER - ret + VZEROUPPER_RETURN L(last_4x_vec): /* Copy from 2 * VEC to 4 * VEC. */ VMOVU (%rsi), %VEC(0) @@ -370,8 +401,7 @@ L(last_4x_vec): VMOVU %VEC(1), VEC_SIZE(%rdi) VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) - VZEROUPPER - ret + VZEROUPPER_RETURN L(more_8x_vec): cmpq %rsi, %rdi @@ -402,7 +432,7 @@ L(more_8x_vec): addq %r8, %rdx #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) /* Check non-temporal store threshold. */ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ja L(large_forward) #endif L(loop_4x_vec_forward): @@ -427,8 +457,7 @@ L(loop_4x_vec_forward): VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) /* Store the first VEC. */ VMOVU %VEC(4), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN L(more_8x_vec_backward): /* Load the first 4 * VEC and last VEC to support overlapping @@ -454,7 +483,7 @@ L(more_8x_vec_backward): subq %r8, %rdx #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) /* Check non-temporal store threshold. */ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ja L(large_backward) #endif L(loop_4x_vec_backward): @@ -479,8 +508,7 @@ L(loop_4x_vec_backward): VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) /* Store the last VEC. */ VMOVU %VEC(8), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) L(large_forward): @@ -515,8 +543,7 @@ L(loop_large_forward): VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) /* Store the first VEC. */ VMOVU %VEC(4), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN L(large_backward): /* Don't use non-temporal store if there is overlap between @@ -550,8 +577,7 @@ L(loop_large_backward): VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) /* Store the last VEC. */ VMOVU %VEC(8), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN #endif END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S new file mode 100644 index 000000000..cea2d2a72 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMRCHR +# define MEMRCHR __memrchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index f5437b54d..c8d54c08d 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -20,14 +20,22 @@ # include +# ifndef MEMRCHR +# define MEMRCHR __memrchr_avx2 +# endif + # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits -ENTRY (__memrchr_avx2) + .section SECTION(.text),"ax",@progbits +ENTRY (MEMRCHR) /* Broadcast CHAR to YMM0. */ vmovd %esi, %xmm0 vpbroadcastb %xmm0, %ymm0 @@ -134,8 +142,8 @@ L(loop_4x_vec): vpmovmskb %ymm1, %eax bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(last_4x_vec_or_less): @@ -169,8 +177,7 @@ L(last_4x_vec_or_less): addq %rax, %rdx jl L(zero) addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_2x_vec): @@ -191,31 +198,27 @@ L(last_2x_vec): jl L(zero) addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x0): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x1): bsrl %eax, %eax addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x2): bsrl %eax, %eax addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3): @@ -232,8 +235,7 @@ L(last_vec_x1_check): jl L(zero) addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3_check): @@ -243,12 +245,14 @@ L(last_vec_x3_check): jl L(zero) addl $(VEC_SIZE * 3), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(zero): - VZEROUPPER + xorl %eax, %eax + VZEROUPPER_RETURN + + .p2align 4 L(null): xorl %eax, %eax ret @@ -273,8 +277,7 @@ L(last_vec_or_less_aligned): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_or_less): @@ -315,8 +318,7 @@ L(last_vec_or_less): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_2x_aligned): @@ -353,7 +355,6 @@ L(last_vec_2x_aligned): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret -END (__memrchr_avx2) + VZEROUPPER_RETURN +END (MEMRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S new file mode 100644 index 000000000..16bf8e02b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S @@ -0,0 +1,337 @@ +/* memrchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# define VMOVA vmovdqa64 + +# define YMMMATCH ymm16 + +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (__memrchr_evex) + /* Broadcast CHAR to YMMMATCH. */ + vpbroadcastb %esi, %YMMMATCH + + sub $VEC_SIZE, %RDX_LP + jbe L(last_vec_or_less) + + add %RDX_LP, %RDI_LP + + /* Check the last VEC_SIZE bytes. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x0) + + subq $(VEC_SIZE * 4), %rdi + movl %edi, %ecx + andl $(VEC_SIZE - 1), %ecx + jz L(aligned_more) + + /* Align data for aligned loads in the loop. */ + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rdx + andq $-VEC_SIZE, %rdi + subq %rcx, %rdx + + .p2align 4 +L(aligned_more): + subq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec_or_less) + + /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x1) + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + kmovd %k4, %eax + testl %eax, %eax + jnz L(last_vec_x0) + + /* Align data to 4 * VEC_SIZE for loop with fewer branches. + There are some overlaps with above if data isn't aligned + to 4 * VEC_SIZE. */ + movl %edi, %ecx + andl $(VEC_SIZE * 4 - 1), %ecx + jz L(loop_4x_vec) + + addq $(VEC_SIZE * 4), %rdi + addq $(VEC_SIZE * 4), %rdx + andq $-(VEC_SIZE * 4), %rdi + subq %rcx, %rdx + + .p2align 4 +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + subq $(VEC_SIZE * 4), %rdi + subq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec_or_less) + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2 + kord %k1, %k2, %k5 + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3 + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4 + + kord %k3, %k4, %k6 + kortestd %k5, %k6 + jz L(loop_4x_vec) + + /* There is a match. */ + kmovd %k4, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x1) + + kmovd %k1, %eax + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_4x_vec_or_less): + addl $(VEC_SIZE * 4), %edx + cmpl $(VEC_SIZE * 2), %edx + jbe L(last_2x_vec) + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x1_check) + cmpl $(VEC_SIZE * 3), %edx + jbe L(zero) + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + kmovd %k4, %eax + testl %eax, %eax + jz L(zero) + bsrl %eax, %eax + subq $(VEC_SIZE * 4), %rdx + addq %rax, %rdx + jl L(zero) + addq %rdi, %rax + ret + + .p2align 4 +L(last_2x_vec): + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3_check) + cmpl $VEC_SIZE, %edx + jbe L(zero) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jz L(zero) + bsrl %eax, %eax + subq $(VEC_SIZE * 2), %rdx + addq %rax, %rdx + jl L(zero) + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x0): + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x1): + bsrl %eax, %eax + addl $VEC_SIZE, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x2): + bsrl %eax, %eax + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x3): + bsrl %eax, %eax + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x1_check): + bsrl %eax, %eax + subq $(VEC_SIZE * 3), %rdx + addq %rax, %rdx + jl L(zero) + addl $VEC_SIZE, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x3_check): + bsrl %eax, %eax + subq $VEC_SIZE, %rdx + addq %rax, %rdx + jl L(zero) + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(last_vec_or_less_aligned): + movl %edx, %ecx + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + movl $1, %edx + /* Support rdx << 32. */ + salq %cl, %rdx + subq $1, %rdx + + kmovd %k1, %eax + + /* Remove the trailing bytes. */ + andl %edx, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_or_less): + addl $VEC_SIZE, %edx + + /* Check for zero length. */ + testl %edx, %edx + jz L(zero) + + movl %edi, %ecx + andl $(VEC_SIZE - 1), %ecx + jz L(last_vec_or_less_aligned) + + movl %ecx, %esi + movl %ecx, %r8d + addl %edx, %esi + andq $-VEC_SIZE, %rdi + + subl $VEC_SIZE, %esi + ja L(last_vec_2x_aligned) + + /* Check the last VEC. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + + /* Remove the leading and trailing bytes. */ + sarl %cl, %eax + movl %edx, %ecx + + movl $1, %edx + sall %cl, %edx + subl $1, %edx + + andl %edx, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + ret + + .p2align 4 +L(last_vec_2x_aligned): + movl %esi, %ecx + + /* Check the last VEC. */ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1 + + movl $1, %edx + sall %cl, %edx + subl $1, %edx + + kmovd %k1, %eax + + /* Remove the trailing bytes. */ + andl %edx, %eax + + testl %eax, %eax + jnz L(last_vec_x1) + + /* Check the second last VEC. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + movl %r8d, %ecx + + kmovd %k1, %eax + + /* Remove the leading bytes. Must use unsigned right shift for + bsrl below. */ + shrl %cl, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + ret +END (__memrchr_evex) +#endif diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S new file mode 100644 index 000000000..8ac3e479b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S @@ -0,0 +1,10 @@ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return) + +#define SECTION(p) p##.avx.rtm +#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm +#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + +#include "memset-avx2-unaligned-erms.S" diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S index 7ab3d8984..ae0860f36 100644 --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S @@ -14,9 +14,15 @@ movq r, %rax; \ vpbroadcastd %xmm0, %ymm0 -# define SECTION(p) p##.avx -# define MEMSET_SYMBOL(p,s) p##_avx2_##s -# define WMEMSET_SYMBOL(p,s) p##_avx2_##s +# ifndef SECTION +# define SECTION(p) p##.avx +# endif +# ifndef MEMSET_SYMBOL +# define MEMSET_SYMBOL(p,s) p##_avx2_##s +# endif +# ifndef WMEMSET_SYMBOL +# define WMEMSET_SYMBOL(p,s) p##_avx2_##s +# endif # include "memset-vec-unaligned-erms.S" #endif diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S index 0783979ca..22e7b187c 100644 --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S @@ -1,22 +1,22 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 zmm16 +# define VEC(i) VEC##i # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastb %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastb d, %VEC0 # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastd %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastd d, %VEC0 -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMSET_SYMBOL(p,s) p##_avx512_##s # define WMEMSET_SYMBOL(p,s) p##_avx512_##s diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S new file mode 100644 index 000000000..ae0a4d6e4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S @@ -0,0 +1,24 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 ymm16 +# define VEC(i) VEC##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movq r, %rax; \ + vpbroadcastb d, %VEC0 + +# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movq r, %rax; \ + vpbroadcastd d, %VEC0 + +# define SECTION(p) p##.evex +# define MEMSET_SYMBOL(p,s) p##_evex_##s +# define WMEMSET_SYMBOL(p,s) p##_evex_##s + +# include "memset-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index af2299709..16bed6ec1 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -34,20 +34,25 @@ # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s) #endif +#ifndef XMM0 +# define XMM0 xmm0 +#endif + +#ifndef YMM0 +# define YMM0 ymm0 +#endif + #ifndef VZEROUPPER # if VEC_SIZE > 16 # define VZEROUPPER vzeroupper +# define VZEROUPPER_SHORT_RETURN vzeroupper; ret # else # define VZEROUPPER # endif #endif #ifndef VZEROUPPER_SHORT_RETURN -# if VEC_SIZE > 16 -# define VZEROUPPER_SHORT_RETURN vzeroupper -# else -# define VZEROUPPER_SHORT_RETURN rep -# endif +# define VZEROUPPER_SHORT_RETURN rep; ret #endif #ifndef MOVQ @@ -77,7 +82,7 @@ ENTRY (__bzero) mov %RDI_LP, %RAX_LP /* Set return value. */ mov %RSI_LP, %RDX_LP /* Set n. */ - pxor %xmm0, %xmm0 + pxor %XMM0, %XMM0 jmp L(entry_from_bzero) END (__bzero) weak_alias (__bzero, bzero) @@ -119,8 +124,7 @@ L(entry_from_bzero): /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN #if defined USE_MULTIARCH && IS_IN (libc) END (MEMSET_SYMBOL (__memset, unaligned)) @@ -143,14 +147,12 @@ ENTRY (__memset_erms) ENTRY (MEMSET_SYMBOL (__memset, erms)) # endif L(stosb): - /* Issue vzeroupper before rep stosb. */ - VZEROUPPER mov %RDX_LP, %RCX_LP movzbl %sil, %eax mov %RDI_LP, %RDX_LP rep stosb mov %RDX_LP, %RAX_LP - ret + VZEROUPPER_RETURN # if VEC_SIZE == 16 END (__memset_erms) # else @@ -177,8 +179,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(stosb_more_2x_vec): cmpq $REP_STOSB_THRESHOLD, %rdx @@ -192,8 +193,11 @@ L(more_2x_vec): VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) L(return): - VZEROUPPER +#if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +#else ret +#endif L(loop_start): leaq (VEC_SIZE * 4)(%rdi), %rcx @@ -219,7 +223,6 @@ L(loop): cmpq %rcx, %rdx jne L(loop) VZEROUPPER_SHORT_RETURN - ret L(less_vec): /* Less than 1 VEC. */ # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 @@ -233,7 +236,7 @@ L(less_vec): cmpb $16, %dl jae L(between_16_31) # endif - MOVQ %xmm0, %rcx + MOVQ %XMM0, %rcx cmpb $8, %dl jae L(between_8_15) cmpb $4, %dl @@ -243,40 +246,34 @@ L(less_vec): jb 1f movb %cl, (%rdi) 1: - VZEROUPPER - ret + VZEROUPPER_RETURN # if VEC_SIZE > 32 /* From 32 to 63. No branch when size == 32. */ L(between_32_63): - vmovdqu %ymm0, -32(%rdi,%rdx) - vmovdqu %ymm0, (%rdi) - VZEROUPPER - ret + VMOVU %YMM0, -32(%rdi,%rdx) + VMOVU %YMM0, (%rdi) + VZEROUPPER_RETURN # endif # if VEC_SIZE > 16 /* From 16 to 31. No branch when size == 16. */ L(between_16_31): - vmovdqu %xmm0, -16(%rdi,%rdx) - vmovdqu %xmm0, (%rdi) - VZEROUPPER - ret + VMOVU %XMM0, -16(%rdi,%rdx) + VMOVU %XMM0, (%rdi) + VZEROUPPER_RETURN # endif /* From 8 to 15. No branch when size == 8. */ L(between_8_15): movq %rcx, -8(%rdi,%rdx) movq %rcx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(between_4_7): /* From 4 to 7. No branch when size == 4. */ movl %ecx, -4(%rdi,%rdx) movl %ecx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(between_2_3): /* From 2 to 3. No branch when size == 2. */ movw %cx, -2(%rdi,%rdx) movw %cx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S new file mode 100644 index 000000000..acc5f6e2f --- /dev/null +++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S @@ -0,0 +1,4 @@ +#define MEMCHR __rawmemchr_avx2_rtm +#define USE_AS_RAWMEMCHR 1 + +#include "memchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S new file mode 100644 index 000000000..ec942b77b --- /dev/null +++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S @@ -0,0 +1,4 @@ +#define MEMCHR __rawmemchr_evex +#define USE_AS_RAWMEMCHR 1 + +#include "memchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S new file mode 100644 index 000000000..2b9c07a59 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STPCPY +#define STRCPY __stpcpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S new file mode 100644 index 000000000..7c6f26cd9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STPCPY +#define STRCPY __stpcpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S new file mode 100644 index 000000000..60a2ccfe5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S @@ -0,0 +1,4 @@ +#define USE_AS_STPCPY +#define USE_AS_STRNCPY +#define STRCPY __stpncpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S new file mode 100644 index 000000000..1570014d1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S @@ -0,0 +1,4 @@ +#define USE_AS_STPCPY +#define USE_AS_STRNCPY +#define STRCPY __stpncpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S new file mode 100644 index 000000000..637fb557c --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCAT +# define STRCAT __strcat_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcat-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S index a4143bf8f..1e6d4827e 100644 --- a/sysdeps/x86_64/multiarch/strcat-avx2.S +++ b/sysdeps/x86_64/multiarch/strcat-avx2.S @@ -30,7 +30,11 @@ /* Number of bytes in a vector register */ # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + + .section SECTION(.text),"ax",@progbits ENTRY (STRCAT) mov %rdi, %r9 # ifdef USE_AS_STRNCAT diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S new file mode 100644 index 000000000..97c3d85b6 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcat-evex.S @@ -0,0 +1,283 @@ +/* strcat with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCAT +# define STRCAT __strcat_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +/* zero register */ +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 + +# define USE_AS_STRCAT + +/* Number of bytes in a vector register */ +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (STRCAT) + mov %rdi, %r9 +# ifdef USE_AS_STRNCAT + mov %rdx, %r8 +# endif + + xor %eax, %eax + mov %edi, %ecx + and $((VEC_SIZE * 4) - 1), %ecx + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + cmp $(VEC_SIZE * 3), %ecx + ja L(fourth_vector_boundary) + vpcmpb $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_first_vector) + mov %rdi, %rax + and $-VEC_SIZE, %rax + jmp L(align_vec_size_start) +L(fourth_vector_boundary): + mov %rdi, %rax + and $-VEC_SIZE, %rax + vpcmpb $0, (%rax), %YMMZERO, %k0 + mov $-1, %r10d + sub %rax, %rcx + shl %cl, %r10d + kmovd %k0, %edx + and %r10d, %edx + jnz L(exit) + +L(align_vec_size_start): + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 4), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + kmovd %k4, %edx + add $(VEC_SIZE * 4), %rax + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 4), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 5), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + add $VEC_SIZE, %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + add $VEC_SIZE, %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1 + add $VEC_SIZE, %rax + kmovd %k1, %edx + test %edx, %edx + jnz L(exit) + + add $VEC_SIZE, %rax + + .p2align 4 +L(align_four_vec_loop): + VMOVA (%rax), %YMM0 + VMOVA (VEC_SIZE * 2)(%rax), %YMM1 + vpminub VEC_SIZE(%rax), %YMM0, %YMM0 + vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1 + vpminub %YMM0, %YMM1, %YMM0 + /* If K0 != 0, there is a null byte. */ + vpcmpb $0, %YMM0, %YMMZERO, %k0 + add $(VEC_SIZE * 4), %rax + ktestd %k0, %k0 + jz L(align_four_vec_loop) + + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0 + sub $(VEC_SIZE * 5), %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 4), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit): + sub %rdi, %rax +L(exit_null_on_first_vector): + bsf %rdx, %rdx + add %rdx, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_second_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $VEC_SIZE, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_third_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 2), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_fourth_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 3), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_fifth_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 4), %rax + + .p2align 4 +L(StartStrcpyPart): + lea (%r9, %rax), %rdi + mov %rsi, %rcx + mov %r9, %rax /* save result */ + +# ifdef USE_AS_STRNCAT + test %r8, %r8 + jz L(ExitZero) +# define USE_AS_STRNCPY +# endif + +# include "strcpy-evex.S" +#endif diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S new file mode 100644 index 000000000..81f20d1d8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCHR +# define STRCHR __strchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S index 39fc69da7..0a5217514 100644 --- a/sysdeps/x86_64/multiarch/strchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S @@ -38,9 +38,13 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCHR) movl %edi, %ecx /* Broadcast CHAR to YMM0. */ @@ -93,8 +97,8 @@ L(cros_page_boundary): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(aligned_more): @@ -190,8 +194,7 @@ L(first_vec_x0): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x1): @@ -205,8 +208,7 @@ L(first_vec_x1): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x2): @@ -220,8 +222,7 @@ L(first_vec_x2): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(4x_vec_end): @@ -247,8 +248,7 @@ L(first_vec_x3): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S new file mode 100644 index 000000000..ddc86a705 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchr-evex.S @@ -0,0 +1,335 @@ +/* strchr/strchrnul optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCHR +# define STRCHR __strchr_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSCHR +# define VPBROADCAST vpbroadcastd +# define VPCMP vpcmpd +# define VPMINU vpminud +# define CHAR_REG esi +# define SHIFT_REG r8d +# else +# define VPBROADCAST vpbroadcastb +# define VPCMP vpcmpb +# define VPMINU vpminub +# define CHAR_REG sil +# define SHIFT_REG ecx +# endif + +# define XMMZERO xmm16 + +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 +# define YMM2 ymm19 +# define YMM3 ymm20 +# define YMM4 ymm21 +# define YMM5 ymm22 +# define YMM6 ymm23 +# define YMM7 ymm24 +# define YMM8 ymm25 + +# define VEC_SIZE 32 +# define PAGE_SIZE 4096 + + .section .text.evex,"ax",@progbits +ENTRY (STRCHR) + movl %edi, %ecx +# ifndef USE_AS_STRCHRNUL + xorl %edx, %edx +# endif + + /* Broadcast CHAR to YMM0. */ + VPBROADCAST %esi, %YMM0 + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Check if we cross page boundary with one vector load. */ + andl $(PAGE_SIZE - 1), %ecx + cmpl $(PAGE_SIZE - VEC_SIZE), %ecx + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. Search for both CHAR and the + null bytes. */ + VMOVU (%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + ktestd %k0, %k0 + jz L(more_vecs) + kmovd %k0, %eax + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(more_vecs): + /* Align data for aligned loads in the loop. */ + andq $-VEC_SIZE, %rdi +L(aligned_more): + + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + VMOVA VEC_SIZE(%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x0) + + VMOVA VEC_SIZE(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + VMOVA (VEC_SIZE * 2)(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x2) + + VMOVA (VEC_SIZE * 3)(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + ktestd %k0, %k0 + jz L(prep_loop_4x) + + kmovd %k0, %eax + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 3)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x0): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq VEC_SIZE(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + +L(prep_loop_4x): + /* Align data to 4 * VEC_SIZE. */ + andq $-(VEC_SIZE * 4), %rdi + + .p2align 4 +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 + VMOVA (VEC_SIZE * 5)(%rdi), %YMM2 + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 + VMOVA (VEC_SIZE * 7)(%rdi), %YMM4 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM5 + vpxorq %YMM2, %YMM0, %YMM6 + vpxorq %YMM3, %YMM0, %YMM7 + vpxorq %YMM4, %YMM0, %YMM8 + + VPMINU %YMM5, %YMM1, %YMM5 + VPMINU %YMM6, %YMM2, %YMM6 + VPMINU %YMM7, %YMM3, %YMM7 + VPMINU %YMM8, %YMM4, %YMM8 + + VPMINU %YMM5, %YMM6, %YMM1 + VPMINU %YMM7, %YMM8, %YMM2 + + VPMINU %YMM1, %YMM2, %YMM1 + + /* Each bit in K0 represents a CHAR or a null byte. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + + addq $(VEC_SIZE * 4), %rdi + + ktestd %k0, %k0 + jz L(loop_4x_vec) + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM5, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x0) + + /* Each bit in K1 represents a CHAR or a null byte in YMM2. */ + VPCMP $0, %YMMZERO, %YMM6, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + /* Each bit in K2 represents a CHAR or a null byte in YMM3. */ + VPCMP $0, %YMMZERO, %YMM7, %k2 + /* Each bit in K3 represents a CHAR or a null byte in YMM4. */ + VPCMP $0, %YMMZERO, %YMM8, %k3 + +# ifdef USE_AS_WCSCHR + /* NB: Each bit in K2/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k1 +# else + kshiftlq $32, %k3, %k1 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rax + + tzcntq %rax, %rax +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + /* Cold case for crossing page with first load. */ + .p2align 4 +L(cross_page_boundary): + andq $-VEC_SIZE, %rdi + andl $(VEC_SIZE - 1), %ecx + + VMOVA (%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + +# ifdef USE_AS_WCSCHR + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG + sarl $2, %SHIFT_REG +# endif + + /* Remove the leading bits. */ + sarxl %SHIFT_REG, %eax, %eax + testl %eax, %eax + + jz L(aligned_more) + tzcntl %eax, %eax + addq %rcx, %rdi +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + +END (STRCHR) +# endif diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c index f27980dd3..a04ac8eb1 100644 --- a/sysdeps/x86_64/multiarch/strchr.c +++ b/sysdeps/x86_64/multiarch/strchr.c @@ -29,16 +29,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) return OPTIMIZE (sse2_no_bsf); diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S new file mode 100644 index 000000000..cdcf818b9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRCHR __strchrnul_avx2_rtm +#define USE_AS_STRCHRNUL 1 +#include "strchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S new file mode 100644 index 000000000..064fe7ca9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S @@ -0,0 +1,3 @@ +#define STRCHR __strchrnul_evex +#define USE_AS_STRCHRNUL 1 +#include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S new file mode 100644 index 000000000..aecd30d97 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCMP +# define STRCMP __strcmp_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S index 48d03a9f4..4d434fd14 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S @@ -55,6 +55,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + /* Warning! wcscmp/wcsncmp have to use SIGNED comparison for elements. strcmp/strncmp have to use UNSIGNED comparison for elements. @@ -75,7 +79,7 @@ the maximum offset is reached before a difference is found, zero is returned. */ - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCMP) # ifdef USE_AS_STRNCMP /* Check for simple cases (0 or 1) in offset. */ @@ -83,6 +87,16 @@ ENTRY (STRCMP) je L(char0) jb L(zero) # ifdef USE_AS_WCSCMP +# ifndef __ILP32__ + movq %rdx, %rcx + /* Check if length could overflow when multiplied by + sizeof(wchar_t). Checking top 8 bits will cover all potential + overflow cases as well as redirect cases where its impossible to + length to bound a valid memory region. In these cases just use + 'wcscmp'. */ + shrq $56, %rcx + jnz OVERFLOW_STRCMP +# endif /* Convert units: from wide to byte char. */ shl $2, %RDX_LP # endif @@ -127,8 +141,8 @@ L(return): movzbl (%rsi, %rdx), %edx subl %edx, %eax # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(return_vec_size): @@ -161,8 +175,7 @@ L(return_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_2_vec_size): @@ -195,8 +208,7 @@ L(return_2_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_3_vec_size): @@ -229,8 +241,7 @@ L(return_3_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(next_3_vectors): @@ -356,8 +367,7 @@ L(back_to_loop): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_vec): @@ -400,8 +410,7 @@ L(test_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_2_vec): @@ -444,8 +453,7 @@ L(test_2_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_3_vec): @@ -486,8 +494,7 @@ L(test_3_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(loop_cross_page): @@ -556,8 +563,7 @@ L(loop_cross_page): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(loop_cross_page_2_vec): @@ -591,7 +597,14 @@ L(loop_cross_page_2_vec): movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi testq %rdi, %rdi +# ifdef USE_AS_STRNCMP + /* At this point, if %rdi value is 0, it already tested + VEC_SIZE*4+%r10 byte starting from %rax. This label + checks whether strncmp maximum offset reached or not. */ + je L(string_nbyte_offset_check) +# else je L(back_to_loop) +# endif tzcntq %rdi, %rcx addq %r10, %rcx /* Adjust for number of bytes skipped. */ @@ -624,8 +637,15 @@ L(loop_cross_page_2_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN + +# ifdef USE_AS_STRNCMP +L(string_nbyte_offset_check): + leaq (VEC_SIZE * 4)(%r10), %r10 + cmpq %r10, %r11 + jbe L(zero) + jmp L(back_to_loop) +# endif .p2align 4 L(cross_page_loop): @@ -659,8 +679,7 @@ L(cross_page_loop): # ifndef USE_AS_WCSCMP L(different): # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_WCSCMP .p2align 4 @@ -670,16 +689,14 @@ L(different): setl %al negl %eax orl $1, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN # endif # ifdef USE_AS_STRNCMP .p2align 4 L(zero): xorl %eax, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(char0): @@ -693,8 +710,7 @@ L(char0): movzbl (%rdi), %eax subl %ecx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # endif .p2align 4 @@ -719,8 +735,7 @@ L(last_vector): movzbl (%rsi, %rdx), %edx subl %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN /* Comparing on page boundary region requires special treatment: It must done one vector at the time, starting with the wider @@ -841,7 +856,6 @@ L(cross_page_4bytes): testl %eax, %eax jne L(cross_page_loop) subl %ecx, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRCMP) #endif diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S new file mode 100644 index 000000000..459eeed09 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -0,0 +1,1043 @@ +/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCMP +# define STRCMP __strcmp_evex +# endif + +# define PAGE_SIZE 4096 + +/* VEC_SIZE = Number of bytes in a ymm register */ +# define VEC_SIZE 32 + +/* Shift for dividing by (VEC_SIZE * 4). */ +# define DIVIDE_BY_VEC_4_SHIFT 7 +# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSCMP +/* Compare packed dwords. */ +# define VPCMP vpcmpd +# define SHIFT_REG32 r8d +# define SHIFT_REG64 r8 +/* 1 dword char == 4 bytes. */ +# define SIZE_OF_CHAR 4 +# else +/* Compare packed bytes. */ +# define VPCMP vpcmpb +# define SHIFT_REG32 ecx +# define SHIFT_REG64 rcx +/* 1 byte char == 1 byte. */ +# define SIZE_OF_CHAR 1 +# endif + +# define XMMZERO xmm16 +# define XMM0 xmm17 +# define XMM1 xmm18 + +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 +# define YMM2 ymm19 +# define YMM3 ymm20 +# define YMM4 ymm21 +# define YMM5 ymm22 +# define YMM6 ymm23 +# define YMM7 ymm24 + +/* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. + strcmp/strncmp have to use UNSIGNED comparison for elements. +*/ + +/* The main idea of the string comparison (byte or dword) using 256-bit + EVEX instructions consists of comparing (VPCMP) two ymm vectors. The + latter can be on either packed bytes or dwords depending on + USE_AS_WCSCMP. In order to check the null char, algorithm keeps the + matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2 + KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes) + are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd + instructions. Main loop (away from from page boundary) compares 4 + vectors are a time, effectively comparing 4 x VEC_SIZE bytes (128 + bytes) on each loop. + + The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic + is the same as strcmp, except that an a maximum offset is tracked. If + the maximum offset is reached before a difference is found, zero is + returned. */ + + .section .text.evex,"ax",@progbits +ENTRY (STRCMP) +# ifdef USE_AS_STRNCMP + /* Check for simple cases (0 or 1) in offset. */ + cmp $1, %RDX_LP + je L(char0) + jb L(zero) +# ifdef USE_AS_WCSCMP + /* Convert units: from wide to byte char. */ + shl $2, %RDX_LP +# endif + /* Register %r11 tracks the maximum offset. */ + mov %RDX_LP, %R11_LP +# endif + movl %edi, %eax + xorl %edx, %edx + /* Make %XMMZERO (%YMMZERO) all zeros in this function. */ + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + orl %esi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax + jg L(cross_page) + /* Start comparing 4 vectors. */ + VMOVU (%rdi), %YMM0 + VMOVU (%rsi), %YMM1 + + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + + /* Check for NULL in YMM0. */ + VPCMP $0, %YMMZERO, %YMM0, %k1 + /* Check for NULL in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + + /* Each bit in K1 represents: + 1. A mismatch in YMM0 and YMM1. Or + 2. A NULL in YMM0 or YMM1. + */ + kord %k0, %k1, %k1 + + ktestd %k1, %k1 + je L(next_3_vectors) + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx) is after the maximum + offset (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + je L(return) +L(wcscmp_return): + setl %al + negl %eax + orl $1, %eax +L(return): +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif + ret + + .p2align 4 +L(return_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after + the maximum offset (%r11). */ + addq $VEC_SIZE, %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl VEC_SIZE(%rdi, %rdx), %ecx + cmpl VEC_SIZE(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl VEC_SIZE(%rdi, %rdx), %eax + movzbl VEC_SIZE(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(return_2_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is + after the maximum offset (%r11). */ + addq $(VEC_SIZE * 2), %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx + cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(return_3_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is + after the maximum offset (%r11). */ + addq $(VEC_SIZE * 3), %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx + cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(next_3_vectors): + VMOVU VEC_SIZE(%rdi), %YMM0 + VMOVU VEC_SIZE(%rsi), %YMM1 + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_vec_size) + + VMOVU (VEC_SIZE * 2)(%rdi), %YMM2 + VMOVU (VEC_SIZE * 3)(%rdi), %YMM3 + VMOVU (VEC_SIZE * 2)(%rsi), %YMM4 + VMOVU (VEC_SIZE * 3)(%rsi), %YMM5 + + /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */ + VPCMP $4, %YMM2, %YMM4, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM4, %k2 + /* Each bit in K1 represents a NULL in YMM2 or YMM4. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_2_vec_size) + + /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */ + VPCMP $4, %YMM3, %YMM5, %k0 + VPCMP $0, %YMMZERO, %YMM3, %k1 + VPCMP $0, %YMMZERO, %YMM5, %k2 + /* Each bit in K1 represents a NULL in YMM3 or YMM5. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_3_vec_size) +L(main_loop_header): + leaq (VEC_SIZE * 4)(%rdi), %rdx + movl $PAGE_SIZE, %ecx + /* Align load via RAX. */ + andq $-(VEC_SIZE * 4), %rdx + subq %rdi, %rdx + leaq (%rdi, %rdx), %rax +# ifdef USE_AS_STRNCMP + /* Starting from this point, the maximum offset, or simply the + 'offset', DECREASES by the same amount when base pointers are + moved forward. Return 0 when: + 1) On match: offset <= the matched vector index. + 2) On mistmach, offset is before the mistmatched index. + */ + subq %rdx, %r11 + jbe L(zero) +# endif + addq %rsi, %rdx + movq %rdx, %rsi + andl $(PAGE_SIZE - 1), %esi + /* Number of bytes before page crossing. */ + subq %rsi, %rcx + /* Number of VEC_SIZE * 4 blocks before page crossing. */ + shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx + /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */ + movl %ecx, %esi + jmp L(loop_start) + + .p2align 4 +L(loop): +# ifdef USE_AS_STRNCMP + /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease + the maximum offset (%r11) by the same amount. */ + subq $(VEC_SIZE * 4), %r11 + jbe L(zero) +# endif + addq $(VEC_SIZE * 4), %rax + addq $(VEC_SIZE * 4), %rdx +L(loop_start): + testl %esi, %esi + leal -1(%esi), %esi + je L(loop_cross_page) +L(back_to_loop): + /* Main loop, comparing 4 vectors are a time. */ + VMOVA (%rax), %YMM0 + VMOVA VEC_SIZE(%rax), %YMM2 + VMOVA (VEC_SIZE * 2)(%rax), %YMM4 + VMOVA (VEC_SIZE * 3)(%rax), %YMM6 + VMOVU (%rdx), %YMM1 + VMOVU VEC_SIZE(%rdx), %YMM3 + VMOVU (VEC_SIZE * 2)(%rdx), %YMM5 + VMOVU (VEC_SIZE * 3)(%rdx), %YMM7 + + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + kord %k1, %k2, %k1 + /* Each bit in K4 represents a NULL or a mismatch in YMM0 and + YMM1. */ + kord %k0, %k1, %k4 + + VPCMP $4, %YMM2, %YMM3, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM3, %k2 + kord %k1, %k2, %k1 + /* Each bit in K5 represents a NULL or a mismatch in YMM2 and + YMM3. */ + kord %k0, %k1, %k5 + + VPCMP $4, %YMM4, %YMM5, %k0 + VPCMP $0, %YMMZERO, %YMM4, %k1 + VPCMP $0, %YMMZERO, %YMM5, %k2 + kord %k1, %k2, %k1 + /* Each bit in K6 represents a NULL or a mismatch in YMM4 and + YMM5. */ + kord %k0, %k1, %k6 + + VPCMP $4, %YMM6, %YMM7, %k0 + VPCMP $0, %YMMZERO, %YMM6, %k1 + VPCMP $0, %YMMZERO, %YMM7, %k2 + kord %k1, %k2, %k1 + /* Each bit in K7 represents a NULL or a mismatch in YMM6 and + YMM7. */ + kord %k0, %k1, %k7 + + kord %k4, %k5, %k0 + kord %k6, %k7, %k1 + + /* Test each mask (32 bits) individually because for VEC_SIZE + == 32 is not possible to OR the four masks and keep all bits + in a 64-bit integer register, differing from SSE2 strcmp + where ORing is possible. */ + kortestd %k0, %k1 + je L(loop) + ktestd %k4, %k4 + je L(test_vec) + kmovd %k4, %edi + tzcntl %edi, %ecx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_vec): +# ifdef USE_AS_STRNCMP + /* The first vector matched. Return 0 if the maximum offset + (%r11) <= VEC_SIZE. */ + cmpq $VEC_SIZE, %r11 + jbe L(zero) +# endif + ktestd %k5, %k5 + je L(test_2_vec) + kmovd %k5, %ecx + tzcntl %ecx, %edi +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edi +# endif +# ifdef USE_AS_STRNCMP + addq $VEC_SIZE, %rdi + cmpq %rdi, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rdi), %ecx + cmpl (%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rax, %rdi), %eax + movzbl (%rdx, %rdi), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl VEC_SIZE(%rsi, %rdi), %ecx + cmpl VEC_SIZE(%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl VEC_SIZE(%rax, %rdi), %eax + movzbl VEC_SIZE(%rdx, %rdi), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_2_vec): +# ifdef USE_AS_STRNCMP + /* The first 2 vectors matched. Return 0 if the maximum offset + (%r11) <= 2 * VEC_SIZE. */ + cmpq $(VEC_SIZE * 2), %r11 + jbe L(zero) +# endif + ktestd %k6, %k6 + je L(test_3_vec) + kmovd %k6, %ecx + tzcntl %ecx, %edi +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edi +# endif +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 2), %rdi + cmpq %rdi, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rdi), %ecx + cmpl (%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rax, %rdi), %eax + movzbl (%rdx, %rdi), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx + cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax + movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_3_vec): +# ifdef USE_AS_STRNCMP + /* The first 3 vectors matched. Return 0 if the maximum offset + (%r11) <= 3 * VEC_SIZE. */ + cmpq $(VEC_SIZE * 3), %r11 + jbe L(zero) +# endif + kmovd %k7, %esi + tzcntl %esi, %ecx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 3), %rcx + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %esi + cmpl (%rdx, %rcx), %esi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rsi, %rcx), %esi + cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(loop_cross_page): + xorl %r10d, %r10d + movq %rdx, %rcx + /* Align load via RDX. We load the extra ECX bytes which should + be ignored. */ + andl $((VEC_SIZE * 4) - 1), %ecx + /* R10 is -RCX. */ + subq %rcx, %r10 + + /* This works only if VEC_SIZE * 2 == 64. */ +# if (VEC_SIZE * 2) != 64 +# error (VEC_SIZE * 2) != 64 +# endif + + /* Check if the first VEC_SIZE * 2 bytes should be ignored. */ + cmpl $(VEC_SIZE * 2), %ecx + jge L(loop_cross_page_2_vec) + + VMOVU (%rax, %r10), %YMM2 + VMOVU VEC_SIZE(%rax, %r10), %YMM3 + VMOVU (%rdx, %r10), %YMM4 + VMOVU VEC_SIZE(%rdx, %r10), %YMM5 + + VPCMP $4, %YMM4, %YMM2, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM4, %k2 + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch in YMM2 and + YMM4. */ + kord %k0, %k1, %k1 + + VPCMP $4, %YMM5, %YMM3, %k3 + VPCMP $0, %YMMZERO, %YMM3, %k4 + VPCMP $0, %YMMZERO, %YMM5, %k5 + kord %k4, %k5, %k4 + /* Each bit in K3 represents a NULL or a mismatch in YMM3 and + YMM5. */ + kord %k3, %k4, %k3 + +# ifdef USE_AS_WCSCMP + /* NB: Each bit in K1/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k2 + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG32 + sarl $2, %SHIFT_REG32 +# else + kshiftlq $32, %k3, %k2 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rdi + + /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ + shrxq %SHIFT_REG64, %rdi, %rdi + testq %rdi, %rdi + je L(loop_cross_page_2_vec) + tzcntq %rdi, %rcx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(loop_cross_page_2_vec): + /* The first VEC_SIZE * 2 bytes match or are ignored. */ + VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 + VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 + VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2 + VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3 + + VPCMP $4, %YMM0, %YMM2, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM2, %k2 + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch in YMM0 and + YMM2. */ + kord %k0, %k1, %k1 + + VPCMP $4, %YMM1, %YMM3, %k3 + VPCMP $0, %YMMZERO, %YMM1, %k4 + VPCMP $0, %YMMZERO, %YMM3, %k5 + kord %k4, %k5, %k4 + /* Each bit in K3 represents a NULL or a mismatch in YMM1 and + YMM3. */ + kord %k3, %k4, %k3 + +# ifdef USE_AS_WCSCMP + /* NB: Each bit in K1/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k2 +# else + kshiftlq $32, %k3, %k2 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rdi + + xorl %r8d, %r8d + /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ + subl $(VEC_SIZE * 2), %ecx + jle 1f + /* R8 has number of bytes skipped. */ + movl %ecx, %r8d +# ifdef USE_AS_WCSCMP + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + sarl $2, %ecx +# endif + /* Skip ECX bytes. */ + shrq %cl, %rdi +1: + /* Before jumping back to the loop, set ESI to the number of + VEC_SIZE * 4 blocks before page crossing. */ + movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi + + testq %rdi, %rdi +# ifdef USE_AS_STRNCMP + /* At this point, if %rdi value is 0, it already tested + VEC_SIZE*4+%r10 byte starting from %rax. This label + checks whether strncmp maximum offset reached or not. */ + je L(string_nbyte_offset_check) +# else + je L(back_to_loop) +# endif + tzcntq %rdi, %rcx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif + addq %r10, %rcx + /* Adjust for number of bytes skipped. */ + addq %r8, %rcx +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 2), %rcx + subq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rsi, %rcx), %edi + cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + +# ifdef USE_AS_STRNCMP +L(string_nbyte_offset_check): + leaq (VEC_SIZE * 4)(%r10), %r10 + cmpq %r10, %r11 + jbe L(zero) + jmp L(back_to_loop) +# endif + + .p2align 4 +L(cross_page_loop): + /* Check one byte/dword at a time. */ +# ifdef USE_AS_WCSCMP + cmpl %ecx, %eax +# else + subl %ecx, %eax +# endif + jne L(different) + addl $SIZE_OF_CHAR, %edx + cmpl $(VEC_SIZE * 4), %edx + je L(main_loop_header) +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + movl (%rdi, %rdx), %eax + movl (%rsi, %rdx), %ecx +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx +# endif + /* Check null char. */ + testl %eax, %eax + jne L(cross_page_loop) + /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED + comparisons. */ + subl %ecx, %eax +# ifndef USE_AS_WCSCMP +L(different): +# endif + ret + +# ifdef USE_AS_WCSCMP + .p2align 4 +L(different): + /* Use movl to avoid modifying EFLAGS. */ + movl $0, %eax + setl %al + negl %eax + orl $1, %eax + ret +# endif + +# ifdef USE_AS_STRNCMP + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(char0): +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi), %ecx + cmpl (%rsi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rsi), %ecx + movzbl (%rdi), %eax + subl %ecx, %eax +# endif + ret +# endif + + .p2align 4 +L(last_vector): + addq %rdx, %rdi + addq %rdx, %rsi +# ifdef USE_AS_STRNCMP + subq %rdx, %r11 +# endif + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif + ret + + /* Comparing on page boundary region requires special treatment: + It must done one vector at the time, starting with the wider + ymm vector if possible, if not, with xmm. If fetching 16 bytes + (xmm) still passes the boundary, byte comparison must be done. + */ + .p2align 4 +L(cross_page): + /* Try one ymm vector at a time. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + jg L(cross_page_1_vector) +L(loop_1_vector): + VMOVU (%rdi, %rdx), %YMM0 + VMOVU (%rsi, %rdx), %YMM1 + + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + addl $VEC_SIZE, %edx + + addl $VEC_SIZE, %eax +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + jle L(loop_1_vector) +L(cross_page_1_vector): + /* Less than 32 bytes to check, try one xmm vector. */ + cmpl $(PAGE_SIZE - 16), %eax + jg L(cross_page_1_xmm) + VMOVU (%rdi, %rdx), %XMM0 + VMOVU (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + korw %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + korw %k0, %k1, %k1 + kmovw %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + addl $16, %edx +# ifndef USE_AS_WCSCMP + addl $16, %eax +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_1_xmm): +# ifndef USE_AS_WCSCMP + /* Less than 16 bytes to check, try 8 byte vector. NB: No need + for wcscmp nor wcsncmp since wide char is 4 bytes. */ + cmpl $(PAGE_SIZE - 8), %eax + jg L(cross_page_8bytes) + vmovq (%rdi, %rdx), %XMM0 + vmovq (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + +# ifdef USE_AS_WCSCMP + /* Only last 2 bits are valid. */ + andl $0x3, %ecx +# else + /* Only last 8 bits are valid. */ + andl $0xff, %ecx +# endif + + testl %ecx, %ecx + jne L(last_vector) + + addl $8, %edx + addl $8, %eax +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_8bytes): + /* Less than 8 bytes to check, try 4 byte vector. */ + cmpl $(PAGE_SIZE - 4), %eax + jg L(cross_page_4bytes) + vmovd (%rdi, %rdx), %XMM0 + vmovd (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + +# ifdef USE_AS_WCSCMP + /* Only the last bit is valid. */ + andl $0x1, %ecx +# else + /* Only last 4 bits are valid. */ + andl $0xf, %ecx +# endif + + testl %ecx, %ecx + jne L(last_vector) + + addl $4, %edx +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_4bytes): +# endif + /* Less than 4 bytes to check, try one byte/dword at a time. */ +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + movl (%rdi, %rdx), %eax + movl (%rsi, %rdx), %ecx +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx +# endif + testl %eax, %eax + jne L(cross_page_loop) + subl %ecx, %eax + ret +END (STRCMP) +#endif diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c index 4db7332ac..358fa9015 100644 --- a/sysdeps/x86_64/multiarch/strcmp.c +++ b/sysdeps/x86_64/multiarch/strcmp.c @@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S new file mode 100644 index 000000000..c2c581ecf --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCPY +# define STRCPY __strcpy_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcpy-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S index 3f2f9e817..1ce17253a 100644 --- a/sysdeps/x86_64/multiarch/strcpy-avx2.S +++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S @@ -37,6 +37,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + /* zero register */ #define xmmZ xmm0 #define ymmZ ymm0 @@ -46,7 +50,7 @@ # ifndef USE_AS_STRCAT - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCPY) # ifdef USE_AS_STRNCPY mov %RDX_LP, %R8_LP @@ -369,8 +373,8 @@ L(CopyVecSizeExit): lea 1(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(CopyTwoVecSize1): @@ -553,8 +557,7 @@ L(Exit1): lea 2(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit2): @@ -569,8 +572,7 @@ L(Exit2): lea 3(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit3): @@ -584,8 +586,7 @@ L(Exit3): lea 4(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit4_7): @@ -602,8 +603,7 @@ L(Exit4_7): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit8_15): @@ -620,8 +620,7 @@ L(Exit8_15): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit16_31): @@ -638,8 +637,7 @@ L(Exit16_31): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit32_63): @@ -656,8 +654,7 @@ L(Exit32_63): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_STRNCPY @@ -671,8 +668,7 @@ L(StrncpyExit1): # ifdef USE_AS_STRCAT movb $0, 1(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit2): @@ -684,8 +680,7 @@ L(StrncpyExit2): # ifdef USE_AS_STRCAT movb $0, 2(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit3_4): @@ -699,8 +694,7 @@ L(StrncpyExit3_4): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit5_8): @@ -714,8 +708,7 @@ L(StrncpyExit5_8): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit9_16): @@ -729,8 +722,7 @@ L(StrncpyExit9_16): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit17_32): @@ -744,8 +736,7 @@ L(StrncpyExit17_32): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit33_64): @@ -760,8 +751,7 @@ L(StrncpyExit33_64): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit65): @@ -778,50 +768,43 @@ L(StrncpyExit65): # ifdef USE_AS_STRCAT movb $0, 65(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifndef USE_AS_STRCAT .p2align 4 L(Fill1): mov %dl, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill2): mov %dx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill3_4): mov %dx, (%rdi) mov %dx, -2(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill5_8): mov %edx, (%rdi) mov %edx, -4(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill9_16): mov %rdx, (%rdi) mov %rdx, -8(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill17_32): vmovdqu %xmmZ, (%rdi) vmovdqu %xmmZ, -16(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(CopyVecSizeUnalignedVec2): @@ -898,8 +881,7 @@ L(Fill): cmp $1, %r8d ja L(Fill2) je L(Fill1) - VZEROUPPER - ret + VZEROUPPER_RETURN /* end of ifndef USE_AS_STRCAT */ # endif @@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3): # ifdef USE_AS_STRCAT movb $0, (VEC_SIZE * 4)(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(UnalignedFourVecSizeLeaveCase2): @@ -1001,16 +982,14 @@ L(StrncpyExit): # ifdef USE_AS_STRCAT movb $0, (%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(ExitZero): # ifndef USE_AS_STRCAT mov %rdi, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # endif diff --git a/sysdeps/x86_64/multiarch/strcpy-evex.S b/sysdeps/x86_64/multiarch/strcpy-evex.S new file mode 100644 index 000000000..a343a1a69 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcpy-evex.S @@ -0,0 +1,1003 @@ +/* strcpy with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# ifndef USE_AS_STRCAT +# include + +# ifndef STRCPY +# define STRCPY __strcpy_evex +# endif + +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +/* Number of bytes in a vector register */ +# ifndef VEC_SIZE +# define VEC_SIZE 32 +# endif + +# define XMM2 xmm18 +# define XMM3 xmm19 + +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 +# define YMM7 ymm23 + +# ifndef USE_AS_STRCAT + +/* zero register */ +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM1 ymm17 + + .section .text.evex,"ax",@progbits +ENTRY (STRCPY) +# ifdef USE_AS_STRNCPY + mov %RDX_LP, %R8_LP + test %R8_LP, %R8_LP + jz L(ExitZero) +# endif + mov %rsi, %rcx +# ifndef USE_AS_STPCPY + mov %rdi, %rax /* save result */ +# endif + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO +# endif + + and $((VEC_SIZE * 4) - 1), %ecx + cmp $(VEC_SIZE * 2), %ecx + jbe L(SourceStringAlignmentLessTwoVecSize) + + and $-VEC_SIZE, %rsi + and $(VEC_SIZE - 1), %ecx + + vpcmpb $0, (%rsi), %YMMZERO, %k0 + kmovd %k0, %edx + shr %cl, %rdx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + mov $VEC_SIZE, %r10 + sub %rcx, %r10 + cmp %r10, %r8 +# else + mov $(VEC_SIZE + 1), %r10 + sub %rcx, %r10 + cmp %r10, %r8 +# endif + jbe L(CopyVecSizeTailCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyVecSizeTail) + + vpcmpb $0, VEC_SIZE(%rsi), %YMMZERO, %k1 + kmovd %k1, %edx + +# ifdef USE_AS_STRNCPY + add $VEC_SIZE, %r10 + cmp %r10, %r8 + jbe L(CopyTwoVecSizeCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyTwoVecSize) + + VMOVU (%rsi, %rcx), %YMM2 /* copy VEC_SIZE bytes */ + VMOVU %YMM2, (%rdi) + +/* If source address alignment != destination address alignment */ + .p2align 4 +L(UnalignVecSizeBoth): + sub %rcx, %rdi +# ifdef USE_AS_STRNCPY + add %rcx, %r8 + sbb %rcx, %rcx + or %rcx, %r8 +# endif + mov $VEC_SIZE, %rcx + VMOVA (%rsi, %rcx), %YMM2 + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 3), %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec3) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM3, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM4 + vpcmpb $0, %YMM4, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec4) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM4, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 + VMOVU %YMM2, (%rdi, %rcx) + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec3) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM3, (%rdi, %rcx) + mov %rsi, %rdx + lea VEC_SIZE(%rsi, %rcx), %rsi + and $-(VEC_SIZE * 4), %rsi + sub %rsi, %rdx + sub %rdx, %rdi +# ifdef USE_AS_STRNCPY + lea (VEC_SIZE * 8)(%r8, %rdx), %r8 +# endif +L(UnalignedFourVecSizeLoop): + VMOVA (%rsi), %YMM4 + VMOVA VEC_SIZE(%rsi), %YMM5 + VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 + VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 + vpminub %YMM5, %YMM4, %YMM2 + vpminub %YMM7, %YMM6, %YMM3 + vpminub %YMM2, %YMM3, %YMM2 + /* If K7 != 0, there is a null byte. */ + vpcmpb $0, %YMM2, %YMMZERO, %k7 + kmovd %k7, %edx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 4), %r8 + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jnz L(UnalignedFourVecSizeLeave) + +L(UnalignedFourVecSizeLoop_start): + add $(VEC_SIZE * 4), %rdi + add $(VEC_SIZE * 4), %rsi + VMOVU %YMM4, -(VEC_SIZE * 4)(%rdi) + VMOVA (%rsi), %YMM4 + VMOVU %YMM5, -(VEC_SIZE * 3)(%rdi) + VMOVA VEC_SIZE(%rsi), %YMM5 + vpminub %YMM5, %YMM4, %YMM2 + VMOVU %YMM6, -(VEC_SIZE * 2)(%rdi) + VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 + VMOVU %YMM7, -VEC_SIZE(%rdi) + VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 + vpminub %YMM7, %YMM6, %YMM3 + vpminub %YMM2, %YMM3, %YMM2 + /* If K7 != 0, there is a null byte. */ + vpcmpb $0, %YMM2, %YMMZERO, %k7 + kmovd %k7, %edx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 4), %r8 + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jz L(UnalignedFourVecSizeLoop_start) + +L(UnalignedFourVecSizeLeave): + vpcmpb $0, %YMM4, %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(CopyVecSizeUnaligned_0) + + vpcmpb $0, %YMM5, %YMMZERO, %k2 + kmovd %k2, %ecx + test %ecx, %ecx + jnz L(CopyVecSizeUnaligned_16) + + vpcmpb $0, %YMM6, %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(CopyVecSizeUnaligned_32) + + vpcmpb $0, %YMM7, %YMMZERO, %k4 + kmovd %k4, %ecx + bsf %ecx, %edx + VMOVU %YMM4, (%rdi) + VMOVU %YMM5, VEC_SIZE(%rdi) + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 3)(%rdi, %rdx), %rax +# endif + VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) + add $(VEC_SIZE - 1), %r8 + sub %rdx, %r8 + lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $(VEC_SIZE * 3), %rsi + add $(VEC_SIZE * 3), %rdi + jmp L(CopyVecSizeExit) +# endif + +/* If source address alignment == destination address alignment */ + +L(SourceStringAlignmentLessTwoVecSize): + VMOVU (%rsi), %YMM3 + VMOVU VEC_SIZE(%rsi), %YMM2 + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + cmp $VEC_SIZE, %r8 +# else + cmp $(VEC_SIZE + 1), %r8 +# endif + jbe L(CopyVecSizeTail1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyVecSizeTail1) + + VMOVU %YMM3, (%rdi) + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + cmp $(VEC_SIZE * 2), %r8 +# else + cmp $((VEC_SIZE * 2) + 1), %r8 +# endif + jbe L(CopyTwoVecSize1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyTwoVecSize1) + + and $-VEC_SIZE, %rsi + and $(VEC_SIZE - 1), %ecx + jmp L(UnalignVecSizeBoth) + +/*------End of main part with loops---------------------*/ + +/* Case1 */ + +# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) + .p2align 4 +L(CopyVecSize): + add %rcx, %rdi +# endif +L(CopyVecSizeTail): + add %rcx, %rsi +L(CopyVecSizeTail1): + bsf %edx, %edx +L(CopyVecSizeExit): + cmp $32, %edx + jae L(Exit32_63) + cmp $16, %edx + jae L(Exit16_31) + cmp $8, %edx + jae L(Exit8_15) + cmp $4, %edx + jae L(Exit4_7) + cmp $3, %edx + je L(Exit3) + cmp $1, %edx + ja L(Exit2) + je L(Exit1) + movb $0, (%rdi) +# ifdef USE_AS_STPCPY + lea (%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $1, %r8 + lea 1(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(CopyTwoVecSize1): + add $VEC_SIZE, %rsi + add $VEC_SIZE, %rdi +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $VEC_SIZE, %r8 +# endif + jmp L(CopyVecSizeTail1) + + .p2align 4 +L(CopyTwoVecSize): + bsf %edx, %edx + add %rcx, %rsi + add $VEC_SIZE, %edx + sub %ecx, %edx + jmp L(CopyVecSizeExit) + + .p2align 4 +L(CopyVecSizeUnaligned_0): + bsf %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif + VMOVU %YMM4, (%rdi) + add $((VEC_SIZE * 4) - 1), %r8 + sub %rdx, %r8 + lea 1(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + jmp L(CopyVecSizeExit) +# endif + + .p2align 4 +L(CopyVecSizeUnaligned_16): + bsf %ecx, %edx + VMOVU %YMM4, (%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea VEC_SIZE(%rdi, %rdx), %rax +# endif + VMOVU %YMM5, VEC_SIZE(%rdi) + add $((VEC_SIZE * 3) - 1), %r8 + sub %rdx, %r8 + lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $VEC_SIZE, %rsi + add $VEC_SIZE, %rdi + jmp L(CopyVecSizeExit) +# endif + + .p2align 4 +L(CopyVecSizeUnaligned_32): + bsf %edx, %edx + VMOVU %YMM4, (%rdi) + VMOVU %YMM5, VEC_SIZE(%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 2)(%rdi, %rdx), %rax +# endif + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + add $((VEC_SIZE * 2) - 1), %r8 + sub %rdx, %r8 + lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $(VEC_SIZE * 2), %rsi + add $(VEC_SIZE * 2), %rdi + jmp L(CopyVecSizeExit) +# endif + +# ifdef USE_AS_STRNCPY +# ifndef USE_AS_STRCAT + .p2align 4 +L(CopyVecSizeUnalignedVec6): + VMOVU %YMM6, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec5): + VMOVU %YMM5, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec4): + VMOVU %YMM4, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec3): + VMOVU %YMM3, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) +# endif + +/* Case2 */ + + .p2align 4 +L(CopyVecSizeCase2): + add $VEC_SIZE, %r8 + add %rcx, %rdi + add %rcx, %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSizeCase2): + add %rcx, %rsi + bsf %edx, %edx + add $VEC_SIZE, %edx + sub %ecx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +L(CopyVecSizeTailCase2): + add %rcx, %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +L(CopyVecSizeTail1Case2): + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +/* Case2 or Case3, Case3 */ + + .p2align 4 +L(CopyVecSizeCase2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeCase2) +L(CopyVecSizeCase3): + add $VEC_SIZE, %r8 + add %rcx, %rdi + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSizeCase2OrCase3): + test %rdx, %rdx + jnz L(CopyTwoVecSizeCase2) + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyVecSizeTailCase2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeTailCase2) + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSize1Case2OrCase3): + add $VEC_SIZE, %rdi + add $VEC_SIZE, %rsi + sub $VEC_SIZE, %r8 +L(CopyVecSizeTail1Case2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeTail1Case2) + jmp L(StrncpyExit) +# endif + +/*------------End labels regarding with copying 1-VEC_SIZE bytes--and 1-(VEC_SIZE*2) bytes----*/ + + .p2align 4 +L(Exit1): + movzwl (%rsi), %edx + mov %dx, (%rdi) +# ifdef USE_AS_STPCPY + lea 1(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $2, %r8 + lea 2(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit2): + movzwl (%rsi), %ecx + mov %cx, (%rdi) + movb $0, 2(%rdi) +# ifdef USE_AS_STPCPY + lea 2(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $3, %r8 + lea 3(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit3): + mov (%rsi), %edx + mov %edx, (%rdi) +# ifdef USE_AS_STPCPY + lea 3(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $4, %r8 + lea 4(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit4_7): + mov (%rsi), %ecx + mov %ecx, (%rdi) + mov -3(%rsi, %rdx), %ecx + mov %ecx, -3(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit8_15): + mov (%rsi), %rcx + mov -7(%rsi, %rdx), %r9 + mov %rcx, (%rdi) + mov %r9, -7(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit16_31): + VMOVU (%rsi), %XMM2 + VMOVU -15(%rsi, %rdx), %XMM3 + VMOVU %XMM2, (%rdi) + VMOVU %XMM3, -15(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit32_63): + VMOVU (%rsi), %YMM2 + VMOVU -31(%rsi, %rdx), %YMM3 + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, -31(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + +# ifdef USE_AS_STRNCPY + + .p2align 4 +L(StrncpyExit1): + movzbl (%rsi), %edx + mov %dl, (%rdi) +# ifdef USE_AS_STPCPY + lea 1(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 1(%rdi) +# endif + ret + + .p2align 4 +L(StrncpyExit2): + movzwl (%rsi), %edx + mov %dx, (%rdi) +# ifdef USE_AS_STPCPY + lea 2(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 2(%rdi) +# endif + ret + + .p2align 4 +L(StrncpyExit3_4): + movzwl (%rsi), %ecx + movzwl -2(%rsi, %r8), %edx + mov %cx, (%rdi) + mov %dx, -2(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit5_8): + mov (%rsi), %ecx + mov -4(%rsi, %r8), %edx + mov %ecx, (%rdi) + mov %edx, -4(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit9_16): + mov (%rsi), %rcx + mov -8(%rsi, %r8), %rdx + mov %rcx, (%rdi) + mov %rdx, -8(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit17_32): + VMOVU (%rsi), %XMM2 + VMOVU -16(%rsi, %r8), %XMM3 + VMOVU %XMM2, (%rdi) + VMOVU %XMM3, -16(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit33_64): + /* 0/32, 31/16 */ + VMOVU (%rsi), %YMM2 + VMOVU -VEC_SIZE(%rsi, %r8), %YMM3 + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, -VEC_SIZE(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit65): + /* 0/32, 32/32, 64/1 */ + VMOVU (%rsi), %YMM2 + VMOVU 32(%rsi), %YMM3 + mov 64(%rsi), %cl + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, 32(%rdi) + mov %cl, 64(%rdi) +# ifdef USE_AS_STPCPY + lea 65(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 65(%rdi) +# endif + ret + +# ifndef USE_AS_STRCAT + + .p2align 4 +L(Fill1): + mov %dl, (%rdi) + ret + + .p2align 4 +L(Fill2): + mov %dx, (%rdi) + ret + + .p2align 4 +L(Fill3_4): + mov %dx, (%rdi) + mov %dx, -2(%rdi, %r8) + ret + + .p2align 4 +L(Fill5_8): + mov %edx, (%rdi) + mov %edx, -4(%rdi, %r8) + ret + + .p2align 4 +L(Fill9_16): + mov %rdx, (%rdi) + mov %rdx, -8(%rdi, %r8) + ret + + .p2align 4 +L(Fill17_32): + VMOVU %XMMZERO, (%rdi) + VMOVU %XMMZERO, -16(%rdi, %r8) + ret + + .p2align 4 +L(CopyVecSizeUnalignedVec2): + VMOVU %YMM2, (%rdi, %rcx) + + .p2align 4 +L(CopyVecSizeVecExit): + bsf %edx, %edx + add $(VEC_SIZE - 1), %r8 + add %rcx, %rdi +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif + sub %rdx, %r8 + lea 1(%rdi, %rdx), %rdi + + .p2align 4 +L(StrncpyFillTailWithZero): + xor %edx, %edx + sub $VEC_SIZE, %r8 + jbe L(StrncpyFillExit) + + VMOVU %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + + mov %rdi, %rsi + and $(VEC_SIZE - 1), %esi + sub %rsi, %rdi + add %rsi, %r8 + sub $(VEC_SIZE * 4), %r8 + jb L(StrncpyFillLessFourVecSize) + +L(StrncpyFillLoopVmovdqa): + VMOVA %YMMZERO, (%rdi) + VMOVA %YMMZERO, VEC_SIZE(%rdi) + VMOVA %YMMZERO, (VEC_SIZE * 2)(%rdi) + VMOVA %YMMZERO, (VEC_SIZE * 3)(%rdi) + add $(VEC_SIZE * 4), %rdi + sub $(VEC_SIZE * 4), %r8 + jae L(StrncpyFillLoopVmovdqa) + +L(StrncpyFillLessFourVecSize): + add $(VEC_SIZE * 2), %r8 + jl L(StrncpyFillLessTwoVecSize) + VMOVA %YMMZERO, (%rdi) + VMOVA %YMMZERO, VEC_SIZE(%rdi) + add $(VEC_SIZE * 2), %rdi + sub $VEC_SIZE, %r8 + jl L(StrncpyFillExit) + VMOVA %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + jmp L(Fill) + + .p2align 4 +L(StrncpyFillLessTwoVecSize): + add $VEC_SIZE, %r8 + jl L(StrncpyFillExit) + VMOVA %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + jmp L(Fill) + + .p2align 4 +L(StrncpyFillExit): + add $VEC_SIZE, %r8 +L(Fill): + cmp $17, %r8d + jae L(Fill17_32) + cmp $9, %r8d + jae L(Fill9_16) + cmp $5, %r8d + jae L(Fill5_8) + cmp $3, %r8d + jae L(Fill3_4) + cmp $1, %r8d + ja L(Fill2) + je L(Fill1) + ret + +/* end of ifndef USE_AS_STRCAT */ +# endif + + .p2align 4 +L(UnalignedLeaveCase2OrCase3): + test %rdx, %rdx + jnz L(UnalignedFourVecSizeLeaveCase2) +L(UnalignedFourVecSizeLeaveCase3): + lea (VEC_SIZE * 4)(%r8), %rcx + and $-VEC_SIZE, %rcx + add $(VEC_SIZE * 3), %r8 + jl L(CopyVecSizeCase3) + VMOVU %YMM4, (%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM5, VEC_SIZE(%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 4)(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (VEC_SIZE * 4)(%rdi) +# endif + ret + + .p2align 4 +L(UnalignedFourVecSizeLeaveCase2): + xor %ecx, %ecx + vpcmpb $0, %YMM4, %YMMZERO, %k1 + kmovd %k1, %edx + add $(VEC_SIZE * 3), %r8 + jle L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec4) +# else + jnz L(CopyVecSize) +# endif + vpcmpb $0, %YMM5, %YMMZERO, %k2 + kmovd %k2, %edx + VMOVU %YMM4, (%rdi) + add $VEC_SIZE, %rcx + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec5) +# else + jnz L(CopyVecSize) +# endif + + vpcmpb $0, %YMM6, %YMMZERO, %k3 + kmovd %k3, %edx + VMOVU %YMM5, VEC_SIZE(%rdi) + add $VEC_SIZE, %rcx + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec6) +# else + jnz L(CopyVecSize) +# endif + + vpcmpb $0, %YMM7, %YMMZERO, %k4 + kmovd %k4, %edx + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + lea VEC_SIZE(%rdi, %rcx), %rdi + lea VEC_SIZE(%rsi, %rcx), %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) +L(StrncpyExit): + cmp $65, %r8d + je L(StrncpyExit65) + cmp $33, %r8d + jae L(StrncpyExit33_64) + cmp $17, %r8d + jae L(StrncpyExit17_32) + cmp $9, %r8d + jae L(StrncpyExit9_16) + cmp $5, %r8d + jae L(StrncpyExit5_8) + cmp $3, %r8d + jae L(StrncpyExit3_4) + cmp $1, %r8d + ja L(StrncpyExit2) + je L(StrncpyExit1) +# ifdef USE_AS_STPCPY + mov %rdi, %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi) +# endif + ret + + .p2align 4 +L(ExitZero): +# ifndef USE_AS_STRCAT + mov %rdi, %rax +# endif + ret + +# endif + +# ifndef USE_AS_STRCAT +END (STRCPY) +# else +END (STRCAT) +# endif +#endif diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S new file mode 100644 index 000000000..75b4b7612 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRLEN +# define STRLEN __strlen_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strlen-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S index 73421ec1b..8cfb7391b 100644 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S @@ -27,370 +27,528 @@ # ifdef USE_AS_WCSLEN # define VPCMPEQ vpcmpeqd # define VPMINU vpminud +# define CHAR_SIZE 4 # else # define VPCMPEQ vpcmpeqb # define VPMINU vpminub +# define CHAR_SIZE 1 # endif # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRLEN) # ifdef USE_AS_STRNLEN - /* Check for zero length. */ + /* Check zero length. */ +# ifdef __ILP32__ + /* Clear upper bits. */ + and %RSI_LP, %RSI_LP +# else test %RSI_LP, %RSI_LP - jz L(zero) -# ifdef USE_AS_WCSLEN - shl $2, %RSI_LP -# elif defined __ILP32__ - /* Clear the upper 32 bits. */ - movl %esi, %esi # endif + jz L(zero) + /* Store max len in R8_LP before adjusting if using WCSLEN. */ mov %RSI_LP, %R8_LP # endif - movl %edi, %ecx + movl %edi, %eax movq %rdi, %rdx vpxor %xmm0, %xmm0, %xmm0 - + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. */ + andl $(PAGE_SIZE - 1), %eax /* Check if we may cross page boundary with one vector load. */ - andl $(2 * VEC_SIZE - 1), %ecx - cmpl $VEC_SIZE, %ecx - ja L(cros_page_boundary) + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) /* Check the first VEC_SIZE bytes. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 + VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - # ifdef USE_AS_STRNLEN - jnz L(first_vec_x0_check) - /* Adjust length and check the end of data. */ - subq $VEC_SIZE, %rsi - jbe L(max) -# else - jnz L(first_vec_x0) + /* If length < VEC_SIZE handle special. */ + cmpq $CHAR_PER_VEC, %rsi + jbe L(first_vec_x0) # endif - - /* Align data for aligned loads in the loop. */ - addq $VEC_SIZE, %rdi - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi + /* If empty continue to aligned_more. Otherwise return bit + position of first match. */ + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN # ifdef USE_AS_STRNLEN - /* Adjust length. */ - addq %rcx, %rsi +L(zero): + xorl %eax, %eax + ret - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) + .p2align 4 +L(first_vec_x0): + /* Set bit for max len so that tzcnt will return min of max len + and position of first match. */ +# ifdef USE_AS_WCSLEN + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %esi +# endif + btsq %rsi, %rax + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN # endif - jmp L(more_4x_vec) .p2align 4 -L(cros_page_boundary): - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - /* Remove the leading bytes. */ - sarl %cl, %eax - testl %eax, %eax - jz L(aligned_more) +L(first_vec_x1): tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 4 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + incl %edi + addl %edi, %eax # endif - addq %rdi, %rax - addq %rcx, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN - shrq $2, %rax + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(aligned_more): +L(first_vec_x2): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE" - with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE" - to void possible addition overflow. */ - negq %rcx - addq $VEC_SIZE, %rcx - - /* Check the end of data. */ - subq %rcx, %rsi - jbe L(max) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 3 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE + 1), %edi + addl %edi, %eax # endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN - addq $VEC_SIZE, %rdi + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 2 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE * 2 + 1), %edi + addl %edi, %eax +# endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE * 3 + 1), %edi + addl %edi, %eax # endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN -L(more_4x_vec): + .p2align 5 +L(aligned_more): + /* Align data to VEC_SIZE - 1. This is the same number of + instructions as using andq with -VEC_SIZE but saves 4 bytes of + code on the x4 check. */ + orq $(VEC_SIZE - 1), %rdi +L(cross_page_continue): /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time since data is only aligned to VEC_SIZE. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 +# ifdef USE_AS_STRNLEN + /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE + because it simplies the logic in last_4x_vec_or_less. */ + leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif +# endif + /* Load first VEC regardless. */ + VPCMPEQ 1(%rdi), %ymm0, %ymm1 +# ifdef USE_AS_STRNLEN + /* Adjust length. If near end handle specially. */ + subq %rcx, %rsi + jb L(last_4x_vec_or_less) +# endif vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x1) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x2) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x3) - addq $(VEC_SIZE * 4), %rdi - -# ifdef USE_AS_STRNLEN - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) -# endif - - /* Align data to 4 * VEC_SIZE. */ - movq %rdi, %rcx - andl $(4 * VEC_SIZE - 1), %ecx - andq $-(4 * VEC_SIZE), %rdi + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x4) + /* Align data to VEC_SIZE * 4 - 1. */ # ifdef USE_AS_STRNLEN - /* Adjust length. */ + /* Before adjusting length check if at last VEC_SIZE * 4. */ + cmpq $(CHAR_PER_VEC * 4 - 1), %rsi + jbe L(last_4x_vec_or_less_load) + incq %rdi + movl %edi, %ecx + orq $(VEC_SIZE * 4 - 1), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif + /* Readjust length. */ addq %rcx, %rsi +# else + incq %rdi + orq $(VEC_SIZE * 4 - 1), %rdi # endif - + /* Compare 4 * VEC at a time forward. */ .p2align 4 L(loop_4x_vec): - /* Compare 4 * VEC at a time forward. */ - vmovdqa (%rdi), %ymm1 - vmovdqa VEC_SIZE(%rdi), %ymm2 - vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3 - vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4 - VPMINU %ymm1, %ymm2, %ymm5 - VPMINU %ymm3, %ymm4, %ymm6 - VPMINU %ymm5, %ymm6, %ymm5 - +# ifdef USE_AS_STRNLEN + /* Break if at end of length. */ + subq $(CHAR_PER_VEC * 4), %rsi + jb L(last_4x_vec_or_less_cmpeq) +# endif + /* Save some code size by microfusing VPMINU with the load. + Since the matches in ymm2/ymm4 can only be returned if there + where no matches in ymm1/ymm3 respectively there is no issue + with overlap. */ + vmovdqa 1(%rdi), %ymm1 + VPMINU (VEC_SIZE + 1)(%rdi), %ymm1, %ymm2 + vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm3 + VPMINU (VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4 + + VPMINU %ymm2, %ymm4, %ymm5 VPCMPEQ %ymm5, %ymm0, %ymm5 - vpmovmskb %ymm5, %eax - testl %eax, %eax - jnz L(4x_vec_end) + vpmovmskb %ymm5, %ecx - addq $(VEC_SIZE * 4), %rdi + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) -# ifndef USE_AS_STRNLEN - jmp L(loop_4x_vec) -# else - subq $(VEC_SIZE * 4), %rsi - ja L(loop_4x_vec) -L(last_4x_vec_or_less): - /* Less than 4 * VEC and aligned to VEC_SIZE. */ - addl $(VEC_SIZE * 2), %esi - jle L(last_2x_vec) - - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm1 vpmovmskb %ymm1, %eax + subq %rdx, %rdi testl %eax, %eax - jnz L(first_vec_x1) + jnz L(last_vec_return_x0) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax + VPCMPEQ %ymm2, %ymm0, %ymm2 + vpmovmskb %ymm2, %eax testl %eax, %eax + jnz L(last_vec_return_x1) - jnz L(first_vec_x2_check) - subl $VEC_SIZE, %esi - jle L(max) + /* Combine last 2 VEC. */ + VPCMPEQ %ymm3, %ymm0, %ymm3 + vpmovmskb %ymm3, %eax + /* rcx has combined result from all 4 VEC. It will only be used + if the first 3 other VEC all did not contain a match. */ + salq $32, %rcx + orq %rcx, %rax + tzcntq %rax, %rax + subq $(VEC_SIZE * 2 - 1), %rdi + addq %rdi, %rax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +# endif + VZEROUPPER_RETURN - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x3_check) - movq %r8, %rax +# ifdef USE_AS_STRNLEN + .p2align 4 +L(last_4x_vec_or_less_load): + /* Depending on entry adjust rdi / prepare first VEC in ymm1. + */ + subq $-(VEC_SIZE * 4), %rdi +L(last_4x_vec_or_less_cmpeq): + VPCMPEQ 1(%rdi), %ymm0, %ymm1 +L(last_4x_vec_or_less): # ifdef USE_AS_WCSLEN - shrq $2, %rax + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %esi # endif - VZEROUPPER - ret - - .p2align 4 -L(last_2x_vec): - addl $(VEC_SIZE * 2), %esi - VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + /* If remaining length > VEC_SIZE * 2. This works if esi is off + by VEC_SIZE * 4. */ + testl $(VEC_SIZE * 2), %esi + jnz L(last_4x_vec) + + /* length may have been negative or positive by an offset of + VEC_SIZE * 4 depending on where this was called from. This fixes + that. */ + andl $(VEC_SIZE * 4 - 1), %esi testl %eax, %eax + jnz L(last_vec_x1_check) - jnz L(first_vec_x0_check) subl $VEC_SIZE, %esi - jle L(max) + jb L(max) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x1_check) - movq %r8, %rax -# ifdef USE_AS_WCSLEN - shrq $2, %rax -# endif - VZEROUPPER - ret - - .p2align 4 -L(first_vec_x0_check): tzcntl %eax, %eax /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + addl $(VEC_SIZE + 1), %eax addq %rdi, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN +# endif .p2align 4 -L(first_vec_x1_check): +L(last_vec_return_x0): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $VEC_SIZE, %rax + subq $(VEC_SIZE * 4 - 1), %rdi addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x2_check): +L(last_vec_return_x1): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $(VEC_SIZE * 2), %rax + subq $(VEC_SIZE * 3 - 1), %rdi addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN +# ifdef USE_AS_STRNLEN .p2align 4 -L(first_vec_x3_check): +L(last_vec_x1_check): + tzcntl %eax, %eax /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $(VEC_SIZE * 3), %rax + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + incl %eax addq %rdi, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN - .p2align 4 L(max): movq %r8, %rax + VZEROUPPER_RETURN + + .p2align 4 +L(last_4x_vec): + /* Test first 2x VEC normally. */ + testl %eax, %eax + jnz L(last_vec_x1) + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + /* Normalize length. */ + andl $(VEC_SIZE * 4 - 1), %esi + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + subl $(VEC_SIZE * 3), %esi + jb L(max) + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + addl $(VEC_SIZE * 3 + 1), %eax + addq %rdi, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN - .p2align 4 -L(zero): - xorl %eax, %eax - ret -# endif .p2align 4 -L(first_vec_x0): +L(last_vec_x1): + /* essentially duplicates of first_vec_x1 but use 64 bit + instructions. */ tzcntl %eax, %eax + subq %rdx, %rdi + incl %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x1): +L(last_vec_x2): + /* essentially duplicates of first_vec_x1 but use 64 bit + instructions. */ tzcntl %eax, %eax - addq $VEC_SIZE, %rax + subq %rdx, %rdi + addl $(VEC_SIZE + 1), %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x2): +L(last_vec_x3): tzcntl %eax, %eax - addq $(VEC_SIZE * 2), %rax + subl $(VEC_SIZE * 2), %esi + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + subq %rdx, %rdi + addl $(VEC_SIZE * 2 + 1), %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax +# endif + VZEROUPPER_RETURN +L(max_end): + movq %r8, %rax + VZEROUPPER_RETURN # endif - VZEROUPPER - ret + /* Cold case for crossing page with first load. */ .p2align 4 -L(4x_vec_end): - VPCMPEQ %ymm1, %ymm0, %ymm1 +L(cross_page_boundary): + /* Align data to VEC_SIZE - 1. */ + orq $(VEC_SIZE - 1), %rdi + VPCMPEQ -(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT + so no need to manually mod rdx. */ + sarxl %edx, %eax, %eax +# ifdef USE_AS_STRNLEN testl %eax, %eax - jnz L(first_vec_x0) - VPCMPEQ %ymm2, %ymm0, %ymm2 - vpmovmskb %ymm2, %eax - testl %eax, %eax - jnz L(first_vec_x1) - VPCMPEQ %ymm3, %ymm0, %ymm3 - vpmovmskb %ymm3, %eax + jnz L(cross_page_less_vec) + leaq 1(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %ecx +# endif + /* Check length. */ + cmpq %rsi, %rcx + jb L(cross_page_continue) + movq %r8, %rax +# else testl %eax, %eax - jnz L(first_vec_x2) - VPCMPEQ %ymm4, %ymm0, %ymm4 - vpmovmskb %ymm4, %eax -L(first_vec_x3): + jz L(cross_page_continue) tzcntl %eax, %eax - addq $(VEC_SIZE * 3), %rax - addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN - shrq $2, %rax +# ifdef USE_AS_WCSLEN + /* NB: Divide length by 4 to get wchar_t count. */ + shrl $2, %eax +# endif +# endif +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + +# ifdef USE_AS_STRNLEN + .p2align 4 +L(cross_page_less_vec): + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide by 4 to convert from byte-count to length. */ + shrl $2, %eax +# endif + cmpq %rax, %rsi + cmovb %esi, %eax + VZEROUPPER_RETURN # endif - VZEROUPPER - ret END (STRLEN) #endif diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S new file mode 100644 index 000000000..4bf6874b8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-evex.S @@ -0,0 +1,489 @@ +/* strlen/strnlen/wcslen/wcsnlen optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRLEN +# define STRLEN __strlen_evex +# endif + +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSLEN +# define VPCMP vpcmpd +# define VPMINU vpminud +# define SHIFT_REG ecx +# define CHAR_SIZE 4 +# else +# define VPCMP vpcmpb +# define VPMINU vpminub +# define SHIFT_REG edx +# define CHAR_SIZE 1 +# endif + +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + .section .text.evex,"ax",@progbits +ENTRY (STRLEN) +# ifdef USE_AS_STRNLEN + /* Check zero length. */ + test %RSI_LP, %RSI_LP + jz L(zero) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %esi, %esi +# endif + mov %RSI_LP, %R8_LP +# endif + movl %edi, %eax + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. */ + andl $(PAGE_SIZE - 1), %eax + /* Check if we may cross page boundary with one vector load. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. Each bit in K0 represents a + null byte. */ + VPCMP $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %eax +# ifdef USE_AS_STRNLEN + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rsi + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax + ret +# ifdef USE_AS_STRNLEN +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(first_vec_x0): + /* Set bit for max len so that tzcnt will return min of max len + and position of first match. */ + btsq %rsi, %rax + tzcntl %eax, %eax + ret +# endif + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal CHAR_PER_VEC(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax +# endif + ret + + .p2align 5 +L(aligned_more): + movq %rdi, %rdx + /* Align data to VEC_SIZE. */ + andq $-(VEC_SIZE), %rdi +L(cross_page_continue): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ +# ifdef USE_AS_STRNLEN + /* + CHAR_SIZE because it simplies the logic in + last_4x_vec_or_less. */ + leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif +# endif + /* Load first VEC regardless. */ + VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0 +# ifdef USE_AS_STRNLEN + /* Adjust length. If near end handle specially. */ + subq %rcx, %rsi + jb L(last_4x_vec_or_less) +# endif + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + test %eax, %eax + jnz L(first_vec_x2) + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x3) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x4) + + addq $VEC_SIZE, %rdi +# ifdef USE_AS_STRNLEN + /* Check if at last VEC_SIZE * 4 length. */ + cmpq $(CHAR_PER_VEC * 4 - 1), %rsi + jbe L(last_4x_vec_or_less_load) + movl %edi, %ecx + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif + /* Readjust length. */ + addq %rcx, %rsi +# endif + /* Align data to VEC_SIZE * 4. */ + andq $-(VEC_SIZE * 4), %rdi + + /* Compare 4 * VEC at a time forward. */ + .p2align 4 +L(loop_4x_vec): + /* Load first VEC regardless. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 +# ifdef USE_AS_STRNLEN + /* Break if at end of length. */ + subq $(CHAR_PER_VEC * 4), %rsi + jb L(last_4x_vec_or_less_cmpeq) +# endif + /* Save some code size by microfusing VPMINU with the load. Since + the matches in ymm2/ymm4 can only be returned if there where no + matches in ymm1/ymm3 respectively there is no issue with overlap. + */ + VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2 + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 + VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4 + + VPCMP $0, %YMM2, %YMMZERO, %k0 + VPCMP $0, %YMM4, %YMMZERO, %k1 + subq $-(VEC_SIZE * 4), %rdi + kortestd %k0, %k1 + jz L(loop_4x_vec) + + /* Check if end was in first half. */ + kmovd %k0, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + shrq $2, %rdi +# endif + testl %eax, %eax + jz L(second_vec_return) + + VPCMP $0, %YMM1, %YMMZERO, %k2 + kmovd %k2, %edx + /* Combine VEC1 matches (edx) with VEC2 matches (eax). */ +# ifdef USE_AS_WCSLEN + sall $CHAR_PER_VEC, %eax + orl %edx, %eax + tzcntl %eax, %eax +# else + salq $CHAR_PER_VEC, %rax + orq %rdx, %rax + tzcntq %rax, %rax +# endif + addq %rdi, %rax + ret + + +# ifdef USE_AS_STRNLEN + +L(last_4x_vec_or_less_load): + /* Depending on entry adjust rdi / prepare first VEC in YMM1. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 +L(last_4x_vec_or_less_cmpeq): + VPCMP $0, %YMM1, %YMMZERO, %k0 + addq $(VEC_SIZE * 3), %rdi +L(last_4x_vec_or_less): + kmovd %k0, %eax + /* If remaining length > VEC_SIZE * 2. This works if esi is off by + VEC_SIZE * 4. */ + testl $(CHAR_PER_VEC * 2), %esi + jnz L(last_4x_vec) + + /* length may have been negative or positive by an offset of + CHAR_PER_VEC * 4 depending on where this was called from. This + fixes that. */ + andl $(CHAR_PER_VEC * 4 - 1), %esi + testl %eax, %eax + jnz L(last_vec_x1_check) + + /* Check the end of data. */ + subl $CHAR_PER_VEC, %esi + jb L(max) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret +L(max): + movq %r8, %rax + ret +# endif + + /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less) + in the 4x VEC loop can use 2 byte encoding. */ + .p2align 4 +L(second_vec_return): + VPCMP $0, %YMM3, %YMMZERO, %k0 + /* Combine YMM3 matches (k0) with YMM4 matches (k1). */ +# ifdef USE_AS_WCSLEN + kunpckbw %k0, %k1, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax +# else + kunpckdq %k0, %k1, %k0 + kmovq %k0, %rax + tzcntq %rax, %rax +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret + + +# ifdef USE_AS_STRNLEN +L(last_vec_x1_check): + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_4x_vec): + /* Test first 2x VEC normally. */ + testl %eax, %eax + jnz L(last_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + /* Normalize length. */ + andl $(CHAR_PER_VEC * 4 - 1), %esi + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + /* Check the end of data. */ + subl $(CHAR_PER_VEC * 3), %esi + jb L(max) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x1): + tzcntl %eax, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x2): + tzcntl %eax, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x3): + tzcntl %eax, %eax + subl $(CHAR_PER_VEC * 2), %esi + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax + ret +L(max_end): + movq %r8, %rax + ret +# endif + + /* Cold case for crossing page with first load. */ + .p2align 4 +L(cross_page_boundary): + movq %rdi, %rdx + /* Align data to VEC_SIZE. */ + andq $-VEC_SIZE, %rdi + VPCMP $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + /* Remove the leading bytes. */ +# ifdef USE_AS_WCSLEN + /* NB: Divide shift count by 4 since each bit in K0 represent 4 + bytes. */ + movl %edx, %ecx + shrl $2, %ecx + andl $(CHAR_PER_VEC - 1), %ecx +# endif + /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */ + sarxl %SHIFT_REG, %eax, %eax + testl %eax, %eax +# ifndef USE_AS_STRNLEN + jz L(cross_page_continue) + tzcntl %eax, %eax + ret +# else + jnz L(cross_page_less_vec) +# ifndef USE_AS_WCSLEN + movl %edx, %ecx + andl $(CHAR_PER_VEC - 1), %ecx +# endif + movl $CHAR_PER_VEC, %eax + subl %ecx, %eax + /* Check the end of data. */ + cmpq %rax, %rsi + ja L(cross_page_continue) + movl %esi, %eax + ret +L(cross_page_less_vec): + tzcntl %eax, %eax + /* Select min of length and position of first null. */ + cmpq %rax, %rsi + cmovb %esi, %eax + ret +# endif + +END (STRLEN) +#endif diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S index 055fbbc69..812af73c1 100644 --- a/sysdeps/x86_64/multiarch/strlen-sse2.S +++ b/sysdeps/x86_64/multiarch/strlen-sse2.S @@ -20,4 +20,4 @@ # define strlen __strlen_sse2 #endif -#include "../strlen.S" +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S new file mode 100644 index 000000000..439e486a4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-vec.S @@ -0,0 +1,270 @@ +/* SSE2 version of strlen and SSE4.1 version of wcslen. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#ifdef AS_WCSLEN +# define PMINU pminud +# define PCMPEQ pcmpeqd +# define SHIFT_RETURN shrq $2, %rax +#else +# define PMINU pminub +# define PCMPEQ pcmpeqb +# define SHIFT_RETURN +#endif + +/* Long lived register in strlen(s), strnlen(s, n) are: + + %xmm3 - zero + %rdi - s + %r10 (s+n) & (~(64-1)) + %r11 s+n +*/ + + +.text +ENTRY(strlen) + +/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ +#define FIND_ZERO \ + PCMPEQ (%rax), %xmm0; \ + PCMPEQ 16(%rax), %xmm1; \ + PCMPEQ 32(%rax), %xmm2; \ + PCMPEQ 48(%rax), %xmm3; \ + pmovmskb %xmm0, %esi; \ + pmovmskb %xmm1, %edx; \ + pmovmskb %xmm2, %r8d; \ + pmovmskb %xmm3, %ecx; \ + salq $16, %rdx; \ + salq $16, %rcx; \ + orq %rsi, %rdx; \ + orq %r8, %rcx; \ + salq $32, %rcx; \ + orq %rcx, %rdx; + +#ifdef AS_STRNLEN +/* Do not read anything when n==0. */ + test %RSI_LP, %RSI_LP + jne L(n_nonzero) + xor %rax, %rax + ret +L(n_nonzero): +# ifdef AS_WCSLEN +/* Check for overflow from maxlen * sizeof(wchar_t). If it would + overflow the only way this program doesn't have undefined behavior + is if there is a null terminator in valid memory so wcslen will + suffice. */ + mov %RSI_LP, %R10_LP + sar $62, %R10_LP + test %R10_LP, %R10_LP + jnz __wcslen_sse4_1 + sal $2, %RSI_LP +# endif + + +/* Initialize long lived registers. */ + + add %RDI_LP, %RSI_LP +# ifdef AS_WCSLEN +/* Check for overflow again from s + maxlen * sizeof(wchar_t). */ + jbe __wcslen_sse4_1 +# endif + mov %RSI_LP, %R10_LP + and $-64, %R10_LP + mov %RSI_LP, %R11_LP +#endif + + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + movq %rdi, %rax + movq %rdi, %rcx + andq $4095, %rcx +/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ + cmpq $4047, %rcx +/* We cannot unify this branching as it would be ~6 cycles slower. */ + ja L(cross_page) + +#ifdef AS_STRNLEN +/* Test if end is among first 64 bytes. */ +# define STRNLEN_PROLOG \ + mov %r11, %rsi; \ + subq %rax, %rsi; \ + andq $-64, %rax; \ + testq $-64, %rsi; \ + je L(strnlen_ret) +#else +# define STRNLEN_PROLOG andq $-64, %rax; +#endif + +/* Ignore bits in mask that come before start of string. */ +#define PROLOG(lab) \ + movq %rdi, %rcx; \ + xorq %rax, %rcx; \ + STRNLEN_PROLOG; \ + sarq %cl, %rdx; \ + test %rdx, %rdx; \ + je L(lab); \ + bsfq %rdx, %rax; \ + SHIFT_RETURN; \ + ret + +#ifdef AS_STRNLEN + andq $-16, %rax + FIND_ZERO +#else + /* Test first 16 bytes unaligned. */ + movdqu (%rax), %xmm4 + PCMPEQ %xmm0, %xmm4 + pmovmskb %xmm4, %edx + test %edx, %edx + je L(next48_bytes) + bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ + SHIFT_RETURN + ret + +L(next48_bytes): +/* Same as FIND_ZERO except we do not check first 16 bytes. */ + andq $-16, %rax + PCMPEQ 16(%rax), %xmm1 + PCMPEQ 32(%rax), %xmm2 + PCMPEQ 48(%rax), %xmm3 + pmovmskb %xmm1, %edx + pmovmskb %xmm2, %r8d + pmovmskb %xmm3, %ecx + salq $16, %rdx + salq $16, %rcx + orq %r8, %rcx + salq $32, %rcx + orq %rcx, %rdx +#endif + + /* When no zero byte is found xmm1-3 are zero so we do not have to + zero them. */ + PROLOG(loop) + + .p2align 4 +L(cross_page): + andq $-64, %rax + FIND_ZERO + PROLOG(loop_init) + +#ifdef AS_STRNLEN +/* We must do this check to correctly handle strnlen (s, -1). */ +L(strnlen_ret): + bts %rsi, %rdx + sarq %cl, %rdx + test %rdx, %rdx + je L(loop_init) + bsfq %rdx, %rax + SHIFT_RETURN + ret +#endif + .p2align 4 +L(loop_init): + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 +#ifdef AS_STRNLEN + .p2align 4 +L(loop): + + addq $64, %rax + cmpq %rax, %r10 + je L(exit_end) + + movdqa (%rax), %xmm0 + PMINU 16(%rax), %xmm0 + PMINU 32(%rax), %xmm0 + PMINU 48(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit) + jmp L(loop) + + .p2align 4 +L(exit_end): + cmp %rax, %r11 + je L(first) /* Do not read when end is at page boundary. */ + pxor %xmm0, %xmm0 + FIND_ZERO + +L(first): + bts %r11, %rdx + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + + .p2align 4 +L(exit): + pxor %xmm0, %xmm0 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + +#else + + /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ + .p2align 4 +L(loop): + + movdqa 64(%rax), %xmm0 + PMINU 80(%rax), %xmm0 + PMINU 96(%rax), %xmm0 + PMINU 112(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit64) + + subq $-128, %rax + + movdqa (%rax), %xmm0 + PMINU 16(%rax), %xmm0 + PMINU 32(%rax), %xmm0 + PMINU 48(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit0) + jmp L(loop) + + .p2align 4 +L(exit64): + addq $64, %rax +L(exit0): + pxor %xmm0, %xmm0 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + +#endif + +END(strlen) diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S new file mode 100644 index 000000000..0dcea18db --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCAT +#define STRCAT __strncat_avx2_rtm +#include "strcat-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S new file mode 100644 index 000000000..8884f0237 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncat-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCAT +#define STRCAT __strncat_evex +#include "strcat-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S new file mode 100644 index 000000000..68bad365b --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRCMP __strncmp_avx2_rtm +#define USE_AS_STRNCMP 1 +#define OVERFLOW_STRCMP __strcmp_avx2_rtm +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S index 1678bcc23..f138e9f1f 100644 --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S @@ -1,3 +1,4 @@ #define STRCMP __strncmp_avx2 #define USE_AS_STRNCMP 1 +#define OVERFLOW_STRCMP __strcmp_avx2 #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S new file mode 100644 index 000000000..a1d53e8c9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncmp-evex.S @@ -0,0 +1,3 @@ +#define STRCMP __strncmp_evex +#define USE_AS_STRNCMP 1 +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c index 6b63b0ac2..dee2a41b0 100644 --- a/sysdeps/x86_64/multiarch/strncmp.c +++ b/sysdeps/x86_64/multiarch/strncmp.c @@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2) && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S new file mode 100644 index 000000000..79e708329 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCPY +#define STRCPY __strncpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S new file mode 100644 index 000000000..40e391f0d --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncpy-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCPY +#define STRCPY __strncpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S new file mode 100644 index 000000000..04f1626a5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRLEN __strnlen_avx2_rtm +#define USE_AS_STRNLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S new file mode 100644 index 000000000..722022f30 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -0,0 +1,4 @@ +#define STRLEN __strnlen_evex +#define USE_AS_STRNLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S new file mode 100644 index 000000000..5def14ec1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRRCHR +# define STRRCHR __strrchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S index 23077b4c4..bfb860ebb 100644 --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S @@ -36,9 +36,13 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRRCHR) movd %esi, %xmm4 movl %edi, %ecx @@ -166,8 +170,8 @@ L(return_value): # endif bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(match): @@ -198,8 +202,7 @@ L(find_nul): jz L(return_value) bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(char_and_nul): @@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec): jz L(return_null) bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_null): xorl %eax, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S new file mode 100644 index 000000000..f920b5a58 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S @@ -0,0 +1,265 @@ +/* strrchr/wcsrchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRRCHR +# define STRRCHR __strrchr_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSRCHR +# define VPBROADCAST vpbroadcastd +# define VPCMP vpcmpd +# define SHIFT_REG r8d +# else +# define VPBROADCAST vpbroadcastb +# define VPCMP vpcmpb +# define SHIFT_REG ecx +# endif + +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMMMATCH ymm17 +# define YMM1 ymm18 + +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (STRRCHR) + movl %edi, %ecx + /* Broadcast CHAR to YMMMATCH. */ + VPBROADCAST %esi, %YMMMATCH + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Check if we may cross page boundary with one vector load. */ + andl $(2 * VEC_SIZE - 1), %ecx + cmpl $VEC_SIZE, %ecx + ja L(cros_page_boundary) + + VMOVU (%rdi), %YMM1 + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + + addq $VEC_SIZE, %rdi + + testl %eax, %eax + jnz L(first_vec) + + testl %ecx, %ecx + jnz L(return_null) + + andq $-VEC_SIZE, %rdi + xorl %edx, %edx + jmp L(aligned_loop) + + .p2align 4 +L(first_vec): + /* Check if there is a null byte. */ + testl %ecx, %ecx + jnz L(char_and_nul_in_first_vec) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + andq $-VEC_SIZE, %rdi + jmp L(aligned_loop) + + .p2align 4 +L(cros_page_boundary): + andl $(VEC_SIZE - 1), %ecx + andq $-VEC_SIZE, %rdi + +# ifdef USE_AS_WCSRCHR + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG + sarl $2, %SHIFT_REG +# endif + + VMOVA (%rdi), %YMM1 + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %edx + kmovd %k1, %eax + + shrxl %SHIFT_REG, %edx, %edx + shrxl %SHIFT_REG, %eax, %eax + addq $VEC_SIZE, %rdi + + /* Check if there is a CHAR. */ + testl %eax, %eax + jnz L(found_char) + + testl %edx, %edx + jnz L(return_null) + + jmp L(aligned_loop) + + .p2align 4 +L(found_char): + testl %edx, %edx + jnz L(char_and_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + leaq (%rdi, %rcx), %rsi + + .p2align 4 +L(aligned_loop): + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + add $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jz L(aligned_loop) + + .p2align 4 +L(char_nor_null): + /* Find a CHAR or a null byte in a loop. */ + testl %eax, %eax + jnz L(match) +L(return_value): + testl %edx, %edx + jz L(return_null) + movl %edx, %eax + movq %rsi, %rdi + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(match): + /* Find a CHAR. Check if there is a null byte. */ + kmovd %k0, %ecx + testl %ecx, %ecx + jnz L(find_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + jmp L(aligned_loop) + + .p2align 4 +L(find_nul): + /* Mask out any matching bits after the null byte. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* If there is no CHAR here, return the remembered one. */ + jz L(return_value) + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(char_and_nul): + /* Find both a CHAR and a null byte. */ + addq %rcx, %rdi + movl %edx, %ecx +L(char_and_nul_in_first_vec): + /* Mask out any matching bits after the null byte. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* Return null pointer if the null byte comes first. */ + jz L(return_null) + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(return_null): + xorl %eax, %eax + ret + +END (STRRCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S new file mode 100644 index 000000000..d49dbbf0b --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRCHR __wcschr_avx2_rtm +#define USE_AS_WCSCHR 1 +#include "strchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S new file mode 100644 index 000000000..7cb8f1e41 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcschr-evex.S @@ -0,0 +1,3 @@ +#define STRCHR __wcschr_evex +#define USE_AS_WCSCHR 1 +#include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S new file mode 100644 index 000000000..d6ca2b806 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRCMP __wcscmp_avx2_rtm +#define USE_AS_WCSCMP 1 + +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S new file mode 100644 index 000000000..42e73e51e --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S @@ -0,0 +1,4 @@ +#define STRCMP __wcscmp_evex +#define USE_AS_WCSCMP 1 + +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S new file mode 100644 index 000000000..35658d736 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRLEN __wcslen_avx2_rtm +#define USE_AS_WCSLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S new file mode 100644 index 000000000..bdafa83bd --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-evex.S @@ -0,0 +1,4 @@ +#define STRLEN __wcslen_evex +#define USE_AS_WCSLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S new file mode 100644 index 000000000..7e62621af --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S @@ -0,0 +1,4 @@ +#define AS_WCSLEN +#define strlen __wcslen_sse4_1 + +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c index bb97438c7..26b5fdffd 100644 --- a/sysdeps/x86_64/multiarch/wcslen.c +++ b/sysdeps/x86_64/multiarch/wcslen.c @@ -24,7 +24,7 @@ # undef __wcslen # define SYMBOL_NAME wcslen -# include "ifunc-avx2.h" +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ()); weak_alias (__wcslen, wcslen); diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S new file mode 100644 index 000000000..f467582cb --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S @@ -0,0 +1,5 @@ +#define STRCMP __wcsncmp_avx2_rtm +#define USE_AS_STRNCMP 1 +#define USE_AS_WCSCMP 1 +#define OVERFLOW_STRCMP __wcscmp_avx2_rtm +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S index 4fa1de4d3..e9ede522b 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S @@ -1,5 +1,5 @@ #define STRCMP __wcsncmp_avx2 #define USE_AS_STRNCMP 1 #define USE_AS_WCSCMP 1 - +#define OVERFLOW_STRCMP __wcscmp_avx2 #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S new file mode 100644 index 000000000..8a8e31071 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S @@ -0,0 +1,5 @@ +#define STRCMP __wcsncmp_evex +#define USE_AS_STRNCMP 1 +#define USE_AS_WCSCMP 1 + +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S new file mode 100644 index 000000000..7437ebee2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S @@ -0,0 +1,5 @@ +#define STRLEN __wcsnlen_avx2_rtm +#define USE_AS_WCSLEN 1 +#define USE_AS_STRNLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S new file mode 100644 index 000000000..24773bb4e --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S @@ -0,0 +1,5 @@ +#define STRLEN __wcsnlen_evex +#define USE_AS_WCSLEN 1 +#define USE_AS_STRNLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S index a8cab0cb0..5fa51fe07 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S @@ -2,4 +2,4 @@ #define AS_STRNLEN #define strlen __wcsnlen_sse4_1 -#include "../strlen.S" +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index 8c1fc1a57..f15c1b328 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,27 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen -# include - -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; - -static inline void * -IFUNC_SELECTOR (void) -{ - const struct cpu_features* cpu_features = __get_cpu_features (); - - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); - - if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) - return OPTIMIZE (sse4_1); - - return OPTIMIZE (sse2); -} +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); weak_alias (__wcsnlen, wcsnlen); diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S new file mode 100644 index 000000000..9bf760833 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRRCHR __wcsrchr_avx2_rtm +#define USE_AS_WCSRCHR 1 +#include "strrchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S new file mode 100644 index 000000000..c64602f7d --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S @@ -0,0 +1,3 @@ +#define STRRCHR __wcsrchr_evex +#define USE_AS_WCSRCHR 1 +#include "strrchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S new file mode 100644 index 000000000..58ed21db0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S @@ -0,0 +1,4 @@ +#define MEMCHR __wmemchr_avx2_rtm +#define USE_AS_WMEMCHR 1 + +#include "memchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S new file mode 100644 index 000000000..06cd0f9f5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S @@ -0,0 +1,4 @@ +#define MEMCHR __wmemchr_evex +#define USE_AS_WMEMCHR 1 + +#include "memchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S new file mode 100644 index 000000000..31104d121 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S @@ -0,0 +1,4 @@ +#define MEMCMP __wmemcmp_avx2_movbe_rtm +#define USE_AS_WMEMCMP 1 + +#include "memcmp-avx2-movbe-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S new file mode 100644 index 000000000..4726d74aa --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S @@ -0,0 +1,4 @@ +#define MEMCMP __wmemcmp_evex_movbe +#define USE_AS_WMEMCMP 1 + +#include "memcmp-evex-movbe.S" diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index 2e226d0d5..8422c15cc 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -1,5 +1,5 @@ -/* SSE2 version of strlen/wcslen. - Copyright (C) 2012-2020 Free Software Foundation, Inc. +/* SSE2 version of strlen. + Copyright (C) 2021 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,243 +16,6 @@ License along with the GNU C Library; if not, see . */ -#include +#include "multiarch/strlen-vec.S" -#ifdef AS_WCSLEN -# define PMINU pminud -# define PCMPEQ pcmpeqd -# define SHIFT_RETURN shrq $2, %rax -#else -# define PMINU pminub -# define PCMPEQ pcmpeqb -# define SHIFT_RETURN -#endif - -/* Long lived register in strlen(s), strnlen(s, n) are: - - %xmm3 - zero - %rdi - s - %r10 (s+n) & (~(64-1)) - %r11 s+n -*/ - - -.text -ENTRY(strlen) - -/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ -#define FIND_ZERO \ - PCMPEQ (%rax), %xmm0; \ - PCMPEQ 16(%rax), %xmm1; \ - PCMPEQ 32(%rax), %xmm2; \ - PCMPEQ 48(%rax), %xmm3; \ - pmovmskb %xmm0, %esi; \ - pmovmskb %xmm1, %edx; \ - pmovmskb %xmm2, %r8d; \ - pmovmskb %xmm3, %ecx; \ - salq $16, %rdx; \ - salq $16, %rcx; \ - orq %rsi, %rdx; \ - orq %r8, %rcx; \ - salq $32, %rcx; \ - orq %rcx, %rdx; - -#ifdef AS_STRNLEN -/* Do not read anything when n==0. */ - test %RSI_LP, %RSI_LP - jne L(n_nonzero) - xor %rax, %rax - ret -L(n_nonzero): -# ifdef AS_WCSLEN - shl $2, %RSI_LP -# endif - -/* Initialize long lived registers. */ - - add %RDI_LP, %RSI_LP - mov %RSI_LP, %R10_LP - and $-64, %R10_LP - mov %RSI_LP, %R11_LP -#endif - - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - movq %rdi, %rax - movq %rdi, %rcx - andq $4095, %rcx -/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ - cmpq $4047, %rcx -/* We cannot unify this branching as it would be ~6 cycles slower. */ - ja L(cross_page) - -#ifdef AS_STRNLEN -/* Test if end is among first 64 bytes. */ -# define STRNLEN_PROLOG \ - mov %r11, %rsi; \ - subq %rax, %rsi; \ - andq $-64, %rax; \ - testq $-64, %rsi; \ - je L(strnlen_ret) -#else -# define STRNLEN_PROLOG andq $-64, %rax; -#endif - -/* Ignore bits in mask that come before start of string. */ -#define PROLOG(lab) \ - movq %rdi, %rcx; \ - xorq %rax, %rcx; \ - STRNLEN_PROLOG; \ - sarq %cl, %rdx; \ - test %rdx, %rdx; \ - je L(lab); \ - bsfq %rdx, %rax; \ - SHIFT_RETURN; \ - ret - -#ifdef AS_STRNLEN - andq $-16, %rax - FIND_ZERO -#else - /* Test first 16 bytes unaligned. */ - movdqu (%rax), %xmm4 - PCMPEQ %xmm0, %xmm4 - pmovmskb %xmm4, %edx - test %edx, %edx - je L(next48_bytes) - bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ - SHIFT_RETURN - ret - -L(next48_bytes): -/* Same as FIND_ZERO except we do not check first 16 bytes. */ - andq $-16, %rax - PCMPEQ 16(%rax), %xmm1 - PCMPEQ 32(%rax), %xmm2 - PCMPEQ 48(%rax), %xmm3 - pmovmskb %xmm1, %edx - pmovmskb %xmm2, %r8d - pmovmskb %xmm3, %ecx - salq $16, %rdx - salq $16, %rcx - orq %r8, %rcx - salq $32, %rcx - orq %rcx, %rdx -#endif - - /* When no zero byte is found xmm1-3 are zero so we do not have to - zero them. */ - PROLOG(loop) - - .p2align 4 -L(cross_page): - andq $-64, %rax - FIND_ZERO - PROLOG(loop_init) - -#ifdef AS_STRNLEN -/* We must do this check to correctly handle strnlen (s, -1). */ -L(strnlen_ret): - bts %rsi, %rdx - sarq %cl, %rdx - test %rdx, %rdx - je L(loop_init) - bsfq %rdx, %rax - SHIFT_RETURN - ret -#endif - .p2align 4 -L(loop_init): - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 -#ifdef AS_STRNLEN - .p2align 4 -L(loop): - - addq $64, %rax - cmpq %rax, %r10 - je L(exit_end) - - movdqa (%rax), %xmm0 - PMINU 16(%rax), %xmm0 - PMINU 32(%rax), %xmm0 - PMINU 48(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit) - jmp L(loop) - - .p2align 4 -L(exit_end): - cmp %rax, %r11 - je L(first) /* Do not read when end is at page boundary. */ - pxor %xmm0, %xmm0 - FIND_ZERO - -L(first): - bts %r11, %rdx - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - - .p2align 4 -L(exit): - pxor %xmm0, %xmm0 - FIND_ZERO - - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - -#else - - /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ - .p2align 4 -L(loop): - - movdqa 64(%rax), %xmm0 - PMINU 80(%rax), %xmm0 - PMINU 96(%rax), %xmm0 - PMINU 112(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit64) - - subq $-128, %rax - - movdqa (%rax), %xmm0 - PMINU 16(%rax), %xmm0 - PMINU 32(%rax), %xmm0 - PMINU 48(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit0) - jmp L(loop) - - .p2align 4 -L(exit64): - addq $64, %rax -L(exit0): - pxor %xmm0, %xmm0 - FIND_ZERO - - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - -#endif - -END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h index 0b73674f6..c8ad778fe 100644 --- a/sysdeps/x86_64/sysdep.h +++ b/sysdeps/x86_64/sysdep.h @@ -95,6 +95,28 @@ lose: \ #define R14_LP r14 #define R15_LP r15 +/* Zero upper vector registers and return with xtest. NB: Use VZEROALL + to avoid RTM abort triggered by VZEROUPPER inside transactionally. */ +#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \ + xtest; \ + jz 1f; \ + vzeroall; \ + ret; \ +1: \ + vzeroupper; \ + ret + +/* Zero upper vector registers and return. */ +#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN +# define ZERO_UPPER_VEC_REGISTERS_RETURN \ + VZEROUPPER; \ + ret +#endif + +#ifndef VZEROUPPER_RETURN +# define VZEROUPPER_RETURN VZEROUPPER; ret +#endif + #else /* __ASSEMBLER__ */ /* Long and pointer size in bytes. */ diff --git a/sysdeps/x86_64/tst-rsi-strlen.c b/sysdeps/x86_64/tst-rsi-strlen.c new file mode 100644 index 000000000..a80c4f85c --- /dev/null +++ b/sysdeps/x86_64/tst-rsi-strlen.c @@ -0,0 +1,81 @@ +/* Test strlen with 0 in the RSI register. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef WIDE +# define TEST_NAME "wcslen" +#else +# define TEST_NAME "strlen" +#endif /* WIDE */ + +#define TEST_MAIN +#include + +#ifdef WIDE +# include +# define STRLEN wcslen +# define CHAR wchar_t +#else +# define STRLEN strlen +# define CHAR char +#endif /* WIDE */ + +IMPL (STRLEN, 1) + +typedef size_t (*proto_t) (const CHAR *); + +typedef struct +{ + void (*fn) (void); +} parameter_t; + +size_t +__attribute__ ((weak, noinline, noclone)) +do_strlen (parameter_t *a, int zero, const CHAR *str) +{ + return CALL (a, str); +} + +static int +test_main (void) +{ + test_init (); + + size_t size = page_size / sizeof (CHAR) - 1; + CHAR *buf = (CHAR *) buf2; + buf[size] = 0; + + parameter_t a; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + a.fn = impl->fn; + /* NB: Pass 0 in RSI. */ + size_t res = do_strlen (&a, 0, buf); + if (res != size) + { + error (0, 0, "Wrong result in function %s: %zu != %zu", + impl->name, res, size); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/tst-rsi-wcslen.c b/sysdeps/x86_64/tst-rsi-wcslen.c new file mode 100644 index 000000000..f45a7dfb5 --- /dev/null +++ b/sysdeps/x86_64/tst-rsi-wcslen.c @@ -0,0 +1,20 @@ +/* Test wcslen with 0 in the RSI register. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-rsi-strlen.c" -- 2.30.2