From 4c1bed32a1fd98776d3aab46356e0be4f0dccfc9 Mon Sep 17 00:00:00 2001 From: GNU Libc Maintainers Date: Tue, 27 May 2025 11:40:04 +0100 Subject: [PATCH] git-updates GIT update of https://sourceware.org/git/glibc.git/release/2.31/master from glibc-2.31 GIT update of https://sourceware.org/git/glibc.git/release/2.31/master from glibc-2.31 Gbp-Pq: Name git-updates.diff --- INSTALL | 11 +- Makeconfig | 25 +- Makerules | 4 +- NEWS | 100 ++ Rules | 19 +- configure | 2 +- configure.ac | 2 +- debug/Makefile | 2 + debug/backtrace.c | 5 + elf/Makefile | 4 +- elf/dl-tunables.c | 56 +- elf/ifuncmain1.c | 13 + elf/ifuncmain5.c | 9 + elf/ifuncmain6pie.c | 14 +- elf/ifuncmod6.c | 8 +- elf/rtld-Rules | 2 +- elf/tst-env-setuid-tunables.c | 118 +- elf/tst-env-setuid.c | 197 +--- iconv/Makefile | 17 +- iconv/Versions | 3 + iconv/gconv_charset.c | 228 ++++ iconv/gconv_charset.h | 34 +- iconv/gconv_int.h | 40 +- iconv/gconv_open.c | 64 +- iconv/gconv_simple.c | 16 +- iconv/iconv_open.c | 46 +- iconv/iconv_prog.c | 63 +- iconv/tst-iconv-opt.c | 347 ++++++ iconv/tst-iconv8.c | 50 + iconv/tst-iconv_prog.sh | 284 +++++ iconvdata/Makefile | 11 +- iconvdata/bug-iconv13.c | 53 + iconvdata/bug-iconv14.c | 127 ++ iconvdata/bug-iconv15.c | 60 + iconvdata/euc-kr.c | 6 +- iconvdata/ibm1364.c | 14 +- iconvdata/iso-2022-cn-ext.c | 12 + iconvdata/iso-2022-jp-3.c | 77 +- iconvdata/ksc5601.h | 6 +- iconvdata/tst-iconv-iso-2022-cn-ext.c | 128 ++ include/libc-symbols.h | 26 +- include/sys/prctl.h | 1 + include/sys/un.h | 12 + intl/dcigettext.c | 16 +- intl/tst-codeset.c | 34 +- localedata/locales/oc_FR | 6 +- malloc/Makefile | 20 + malloc/tst-mallocfork2.c | 39 +- math/Makefile | 122 ++ misc/Makefile | 2 +- misc/tst-syscalls.c | 167 +++ nptl/Makefile | 2 +- nptl/descr.h | 8 +- nptl/tst-setgroups.c | 79 ++ nscd/netgroupcache.c | 249 ++-- nscd/selinux.c | 15 + nss/makedb.c | 9 + nss/nss_compat/compat-grp.c | 15 +- nss/nss_compat/compat-initgroups.c | 13 +- nss/nss_compat/compat-pwd.c | 15 +- nss/nss_compat/compat-spwd.c | 14 +- posix/glob.c | 25 +- posix/wordexp-test.c | 1 + posix/wordexp.c | 2 +- socket/Makefile | 6 +- socket/sockaddr_un_set.c | 41 + socket/tst-sockaddr_un_set.c | 62 + stdio-common/Makefile | 13 +- stdio-common/bug22.c | 2 +- stdio-common/tst-grouping2.c | 39 + stdio-common/tst-printf-bz25691.c | 108 ++ stdio-common/tst-vfprintf-width-prec-alloc.c | 41 + stdio-common/vfprintf-internal.c | 447 +++---- stdlib/Makefile | 3 +- stdlib/tst-secure-getenv.c | 199 +--- stdlib/tst-system.c | 124 +- string/test-memchr.c | 39 +- string/test-strncat.c | 61 + string/test-strncmp.c | 13 + string/test-strnlen.c | 87 +- sunrpc/Makefile | 5 +- sunrpc/clnt_gen.c | 10 +- sunrpc/svc_unix.c | 11 +- sunrpc/tst-bug22542.c | 44 + sunrpc/tst-bug28768.c | 42 + support/Makefile | 2 + support/capture_subprocess.h | 6 + support/shell-container.c | 40 +- support/subprocess.h | 5 + support/support.h | 1 + support/support_capture_subprocess.c | 128 +- support/support_subprocess.c | 21 +- support/temp_file.c | 173 ++- support/temp_file.h | 16 + support/xchdir.c | 28 + support/xclone.c | 49 + support/xsched.h | 34 + support/xunistd.h | 1 + sysdeps/aarch64/dl-machine.h | 12 +- sysdeps/aarch64/memcpy.S | 332 +++--- sysdeps/aarch64/multiarch/memcpy.c | 2 +- sysdeps/aarch64/strcpy.S | 5 + sysdeps/aarch64/strnlen.S | 5 + sysdeps/aarch64/sysdep.h | 2 +- sysdeps/arm/armv7/multiarch/memcpy_impl.S | 22 +- sysdeps/arm/be/nofpu/Implies | 1 + sysdeps/arm/le/nofpu/Implies | 1 + sysdeps/arm/memcpy.S | 24 +- sysdeps/arm/memmove.S | 24 +- sysdeps/generic/unwind-arch.h | 30 + sysdeps/gnu/Makefile | 3 +- sysdeps/hppa/dl-fptr.c | 26 +- sysdeps/hppa/dl-machine.h | 36 +- sysdeps/hppa/dl-runtime.c | 58 + sysdeps/hppa/dl-trampoline.S | 74 +- sysdeps/i386/dl-machine.h | 16 +- sysdeps/i386/sysdep.h | 5 +- sysdeps/ieee754/ldbl-96/Makefile | 5 +- sysdeps/ieee754/ldbl-96/e_rem_pio2l.c | 12 + sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c | 41 + sysdeps/posix/getcwd.c | 8 + sysdeps/posix/system.c | 18 +- sysdeps/powerpc/powerpc32/sysdep.h | 4 +- sysdeps/powerpc/powerpc64/backtrace.c | 13 +- sysdeps/s390/configure | 8 +- sysdeps/s390/configure.ac | 8 +- sysdeps/s390/memmove.c | 2 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 3 +- sysdeps/sh/be/sh4/fpu/Implies | 1 + sysdeps/sh/le/sh4/fpu/Implies | 1 + sysdeps/unix/make-syscalls.sh | 24 + sysdeps/unix/syscall-template.S | 49 +- sysdeps/unix/syscalls.list | 22 +- sysdeps/unix/sysv/linux/Makefile | 6 +- .../unix/sysv/linux/aarch64/arch-syscall.h | 1 + .../unix/sysv/linux/aarch64/cpu-features.h | 8 +- sysdeps/unix/sysv/linux/aarch64/localplt.data | 3 + sysdeps/unix/sysv/linux/getpt.c | 67 +- sysdeps/unix/sysv/linux/grantpt.c | 73 +- sysdeps/unix/sysv/linux/hppa/atomic-machine.h | 28 + sysdeps/unix/sysv/linux/microblaze/sysdep.h | 91 +- .../sysv/linux/mips/mips32/mips-syscall5.S | 6 +- .../sysv/linux/mips/mips32/mips-syscall6.S | 6 +- .../sysv/linux/mips/mips32/mips-syscall7.S | 7 +- .../linux/mips/mips32/mips16/mips16-syscall.h | 64 +- .../mips/mips32/mips16/mips16-syscall0.c | 4 +- .../mips/mips32/mips16/mips16-syscall1.c | 6 +- .../mips/mips32/mips16/mips16-syscall2.c | 6 +- .../mips/mips32/mips16/mips16-syscall3.c | 6 +- .../mips/mips32/mips16/mips16-syscall4.c | 6 +- sysdeps/unix/sysv/linux/mips/mips32/sysdep.h | 145 +-- .../unix/sysv/linux/mips/mips64/n32/sysdep.h | 121 +- .../unix/sysv/linux/mips/mips64/n64/sysdep.h | 119 +- sysdeps/unix/sysv/linux/mips/mips64/syscall.S | 2 +- sysdeps/unix/sysv/linux/mips/sysdep.h | 4 +- sysdeps/unix/sysv/linux/mips/unwind-arch.h | 67 ++ sysdeps/unix/sysv/linux/msgctl.c | 10 +- .../sysv/linux/powerpc/powerpc32/sysdep.h | 24 +- .../sysv/linux/powerpc/powerpc64/sysdep.h | 24 +- sysdeps/unix/sysv/linux/prctl.c | 42 + sysdeps/unix/sysv/linux/process_vm_readv.c | 32 + sysdeps/unix/sysv/linux/process_vm_writev.c | 32 + sysdeps/unix/sysv/linux/ptsname.c | 95 +- sysdeps/unix/sysv/linux/riscv/sysdep.h | 84 +- sysdeps/unix/sysv/linux/semctl.c | 10 +- sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies | 1 + sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies | 1 + sysdeps/unix/sysv/linux/shmctl.c | 10 +- sysdeps/unix/sysv/linux/sparc/Makefile | 8 +- .../unix/sysv/linux/sparc/sparc32/sigaction.c | 26 +- .../sysv/linux/sparc/sparc32/sigreturn_stub.S | 34 + .../unix/sysv/linux/sparc/sparc64/sigaction.c | 14 +- .../sysv/linux/sparc/sparc64/sigreturn_stub.S | 29 + sysdeps/unix/sysv/linux/syscall-names.list | 4 +- sysdeps/unix/sysv/linux/syscalls.list | 41 +- .../unix/sysv/linux/tst-getcwd-smallbuff.c | 259 ++++ sysdeps/unix/sysv/linux/x86_64/sysdep.h | 86 +- sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h | 35 + sysdeps/x86/Makefile | 36 + sysdeps/x86/cacheinfo.c | 92 +- sysdeps/x86/cpu-features.c | 60 +- sysdeps/x86/cpu-features.h | 5 + sysdeps/x86/cpu-tunables.c | 3 + sysdeps/x86/dl-cet.c | 6 +- sysdeps/x86/tst-get-cpu-features.c | 1 + sysdeps/x86/tst-memchr-rtm.c | 54 + sysdeps/x86/tst-memcmp-rtm.c | 52 + sysdeps/x86/tst-memmove-rtm.c | 53 + sysdeps/x86/tst-memrchr-rtm.c | 54 + sysdeps/x86/tst-memset-rtm.c | 45 + sysdeps/x86/tst-setjmp-cet.c | 1 + sysdeps/x86/tst-strchr-rtm.c | 54 + sysdeps/x86/tst-strcpy-rtm.c | 53 + sysdeps/x86/tst-string-rtm.h | 72 ++ sysdeps/x86/tst-strlen-rtm.c | 53 + sysdeps/x86/tst-strncmp-rtm.c | 81 ++ sysdeps/x86/tst-strrchr-rtm.c | 53 + .../tst-wcsncmp-rtm.c} | 11 +- sysdeps/x86_64/Makefile | 7 + sysdeps/x86_64/configure | 33 - sysdeps/x86_64/configure.ac | 25 - sysdeps/x86_64/dl-machine.h | 16 +- sysdeps/x86_64/ffsll.c | 10 +- sysdeps/x86_64/memchr.S | 77 +- sysdeps/x86_64/multiarch/Makefile | 60 +- sysdeps/x86_64/multiarch/ifunc-avx2.h | 18 +- sysdeps/x86_64/multiarch/ifunc-impl-list.c | 448 ++++++- sysdeps/x86_64/multiarch/ifunc-memcmp.h | 17 +- sysdeps/x86_64/multiarch/ifunc-memmove.h | 45 +- sysdeps/x86_64/multiarch/ifunc-memset.h | 49 +- sysdeps/x86_64/multiarch/ifunc-strcpy.h | 17 +- sysdeps/x86_64/multiarch/ifunc-wcslen.h | 52 + sysdeps/x86_64/multiarch/ifunc-wmemset.h | 22 +- sysdeps/x86_64/multiarch/memchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/memchr-avx2.S | 494 ++++---- sysdeps/x86_64/multiarch/memchr-evex.S | 478 ++++++++ .../x86_64/multiarch/memcmp-avx2-movbe-rtm.S | 12 + sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S | 28 +- sysdeps/x86_64/multiarch/memcmp-evex-movbe.S | 440 +++++++ .../memmove-avx-unaligned-erms-rtm.S | 17 + .../multiarch/memmove-avx512-unaligned-erms.S | 18 +- .../multiarch/memmove-evex-unaligned-erms.S | 26 + .../multiarch/memmove-vec-unaligned-erms.S | 84 +- sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/memrchr-avx2.S | 53 +- sysdeps/x86_64/multiarch/memrchr-evex.S | 337 ++++++ .../memset-avx2-unaligned-erms-rtm.S | 10 + .../multiarch/memset-avx2-unaligned-erms.S | 12 +- .../multiarch/memset-avx512-unaligned-erms.S | 16 +- .../multiarch/memset-evex-unaligned-erms.S | 24 + .../multiarch/memset-vec-unaligned-erms.S | 61 +- sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/rawmemchr-evex.S | 4 + sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/stpcpy-evex.S | 3 + sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/stpncpy-evex.S | 4 + sysdeps/x86_64/multiarch/strcat-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcat-avx2.S | 6 +- sysdeps/x86_64/multiarch/strcat-evex.S | 283 +++++ sysdeps/x86_64/multiarch/strchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strchr-avx2.S | 22 +- sysdeps/x86_64/multiarch/strchr-evex.S | 335 ++++++ sysdeps/x86_64/multiarch/strchr.c | 18 +- sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strchrnul-evex.S | 3 + sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcmp-avx2.S | 80 +- sysdeps/x86_64/multiarch/strcmp-evex.S | 1043 +++++++++++++++++ sysdeps/x86_64/multiarch/strcmp.c | 19 +- sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strcpy-avx2.S | 85 +- sysdeps/x86_64/multiarch/strcpy-evex.S | 1003 ++++++++++++++++ sysdeps/x86_64/multiarch/strlen-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strlen-avx2.S | 610 ++++++---- sysdeps/x86_64/multiarch/strlen-evex.S | 489 ++++++++ sysdeps/x86_64/multiarch/strlen-sse2.S | 2 +- sysdeps/x86_64/multiarch/strlen-vec.S | 270 +++++ sysdeps/x86_64/multiarch/strncat-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strncat-evex.S | 3 + sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/strncmp-avx2.S | 1 + sysdeps/x86_64/multiarch/strncmp-evex.S | 3 + sysdeps/x86_64/multiarch/strncmp.c | 19 +- sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/strncpy-evex.S | 3 + sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/strnlen-evex.S | 4 + sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S | 12 + sysdeps/x86_64/multiarch/strrchr-avx2.S | 19 +- sysdeps/x86_64/multiarch/strrchr-evex.S | 265 +++++ sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/wcschr-evex.S | 3 + sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wcscmp-evex.S | 4 + sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wcslen-evex.S | 4 + sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 4 + sysdeps/x86_64/multiarch/wcslen.c | 2 +- sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S | 5 + sysdeps/x86_64/multiarch/wcsncmp-avx2.S | 2 +- sysdeps/x86_64/multiarch/wcsncmp-evex.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-evex.S | 5 + sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S | 2 +- sysdeps/x86_64/multiarch/wcsnlen.c | 22 +- sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S | 3 + sysdeps/x86_64/multiarch/wcsrchr-evex.S | 3 + sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S | 4 + sysdeps/x86_64/multiarch/wmemchr-evex.S | 4 + .../x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S | 4 + sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S | 4 + sysdeps/x86_64/strlen.S | 243 +--- sysdeps/x86_64/sysdep.h | 22 + sysdeps/x86_64/tst-rsi-strlen.c | 81 ++ sysdeps/x86_64/tst-rsi-wcslen.c | 20 + 296 files changed, 13608 insertions(+), 3133 deletions(-) create mode 100644 iconv/gconv_charset.c create mode 100644 iconv/tst-iconv-opt.c create mode 100644 iconv/tst-iconv8.c create mode 100644 iconv/tst-iconv_prog.sh create mode 100644 iconvdata/bug-iconv13.c create mode 100644 iconvdata/bug-iconv14.c create mode 100644 iconvdata/bug-iconv15.c create mode 100644 iconvdata/tst-iconv-iso-2022-cn-ext.c create mode 100644 misc/tst-syscalls.c create mode 100644 nptl/tst-setgroups.c create mode 100644 socket/sockaddr_un_set.c create mode 100644 socket/tst-sockaddr_un_set.c create mode 100644 stdio-common/tst-grouping2.c create mode 100644 stdio-common/tst-printf-bz25691.c create mode 100644 stdio-common/tst-vfprintf-width-prec-alloc.c create mode 100644 sunrpc/tst-bug22542.c create mode 100644 sunrpc/tst-bug28768.c create mode 100644 support/xchdir.c create mode 100644 support/xclone.c create mode 100644 support/xsched.h create mode 100644 sysdeps/arm/be/nofpu/Implies create mode 100644 sysdeps/arm/le/nofpu/Implies create mode 100644 sysdeps/generic/unwind-arch.h create mode 100644 sysdeps/hppa/dl-runtime.c create mode 100644 sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c create mode 100644 sysdeps/sh/be/sh4/fpu/Implies create mode 100644 sysdeps/sh/le/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/mips/unwind-arch.h create mode 100644 sysdeps/unix/sysv/linux/prctl.c create mode 100644 sysdeps/unix/sysv/linux/process_vm_readv.c create mode 100644 sysdeps/unix/sysv/linux/process_vm_writev.c create mode 100644 sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies create mode 100644 sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S create mode 100644 sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S create mode 100644 sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c create mode 100644 sysdeps/x86/tst-memchr-rtm.c create mode 100644 sysdeps/x86/tst-memcmp-rtm.c create mode 100644 sysdeps/x86/tst-memmove-rtm.c create mode 100644 sysdeps/x86/tst-memrchr-rtm.c create mode 100644 sysdeps/x86/tst-memset-rtm.c create mode 100644 sysdeps/x86/tst-setjmp-cet.c create mode 100644 sysdeps/x86/tst-strchr-rtm.c create mode 100644 sysdeps/x86/tst-strcpy-rtm.c create mode 100644 sysdeps/x86/tst-string-rtm.h create mode 100644 sysdeps/x86/tst-strlen-rtm.c create mode 100644 sysdeps/x86/tst-strncmp-rtm.c create mode 100644 sysdeps/x86/tst-strrchr-rtm.c rename sysdeps/{unix/sysv/linux/nios2/kernel-features.h => x86/tst-wcsncmp-rtm.c} (74%) mode change 100644 => 100755 sysdeps/x86_64/configure create mode 100644 sysdeps/x86_64/multiarch/ifunc-wcslen.h create mode 100644 sysdeps/x86_64/multiarch/memchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memcmp-evex-movbe.S create mode 100644 sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S create mode 100644 sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S create mode 100644 sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S create mode 100644 sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/rawmemchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/stpcpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/stpncpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcat-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcat-evex.S create mode 100644 sysdeps/x86_64/multiarch/strchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strchrnul-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strcpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/strlen-vec.S create mode 100644 sysdeps/x86_64/multiarch/strncat-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncat-evex.S create mode 100644 sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strncpy-evex.S create mode 100644 sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strnlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/strrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcschr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcscmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcslen-sse4_1.S create mode 100644 sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsncmp-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-evex.S create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wmemchr-evex.S create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S create mode 100644 sysdeps/x86_64/tst-rsi-strlen.c create mode 100644 sysdeps/x86_64/tst-rsi-wcslen.c diff --git a/INSTALL b/INSTALL index 242cb06f9..b487e1863 100644 --- a/INSTALL +++ b/INSTALL @@ -184,14 +184,9 @@ if 'CFLAGS' is specified it must enable optimization. For example: '--enable-pt_chown' The file 'pt_chown' is a helper binary for 'grantpt' (*note Pseudo-Terminals: Allocation.) that is installed setuid root to fix - up pseudo-terminal ownership. It is not built by default because - systems using the Linux kernel are commonly built with the 'devpts' - filesystem enabled and mounted at '/dev/pts', which manages - pseudo-terminal ownership automatically. By using - '--enable-pt_chown', you may build 'pt_chown' and install it setuid - and owned by 'root'. The use of 'pt_chown' introduces additional - security risks to the system and you should enable it only if you - understand and accept those risks. + up pseudo-terminal ownership on GNU/Hurd. It is not required on + GNU/Linux, and the GNU C Library will not use the installed + 'pt_chown' program when configured with '--enable-pt_chown'. '--disable-werror' By default, the GNU C Library is built with '-Werror'. If you wish diff --git a/Makeconfig b/Makeconfig index f25284297..3ce38059f 100644 --- a/Makeconfig +++ b/Makeconfig @@ -42,6 +42,22 @@ else objdir must be defined by the build-directory Makefile. endif +# Did we request 'make -s' run? "yes" or "no". +# Starting from make-4.4 MAKEFLAGS now contains long +# options like '--shuffle'. To detect presence of 's' +# we pick first word with short options. Long options +# are guaranteed to come after whitespace. We use '-' +# prefix to always have a word before long options +# even if no short options were passed. +# Typical MAKEFLAGS values to watch for: +# "rs --shuffle=42" (silent) +# " --shuffle" (not silent) +ifeq ($(findstring s, $(firstword -$(MAKEFLAGS))),) +silent-make := no +else +silent-make := yes +endif + # Root of the sysdeps tree. sysdep_dir := $(..)sysdeps export sysdep_dir := $(sysdep_dir) @@ -557,9 +573,12 @@ link-libc-rpath-link = -Wl,-rpath-link=$(rpath-link) # before the expansion of LDLIBS-* variables). # Tests use -Wl,-rpath instead of -Wl,-rpath-link for -# build-hardcoded-path-in-tests. +# build-hardcoded-path-in-tests. Add -Wl,--disable-new-dtags to force +# DT_RPATH instead of DT_RUNPATH which only applies to DT_NEEDED entries +# in the executable and doesn't applies to DT_NEEDED entries in shared +# libraries which are loaded via DT_NEEDED entries in the executable. ifeq (yes,$(build-hardcoded-path-in-tests)) -link-libc-tests-rpath-link = $(link-libc-rpath) +link-libc-tests-rpath-link = $(link-libc-rpath) -Wl,--disable-new-dtags else link-libc-tests-rpath-link = $(link-libc-rpath-link) endif # build-hardcoded-path-in-tests @@ -892,7 +911,7 @@ endif # umpteen zillion filenames along with it (we use `...' instead) # but we don't want this echoing done when the user has said # he doesn't want to see commands echoed by using -s. -ifneq "$(findstring s,$(MAKEFLAGS))" "" # if -s +ifeq ($(silent-make),yes) # if -s +cmdecho := echo >/dev/null else # not -s +cmdecho := echo diff --git a/Makerules b/Makerules index 1e9c18f0d..e07a42e20 100644 --- a/Makerules +++ b/Makerules @@ -805,7 +805,7 @@ endif # Maximize efficiency by minimizing the number of rules. .SUFFIXES: # Clear the suffix list. We don't use suffix rules. # Don't define any builtin rules. -MAKEFLAGS := $(MAKEFLAGS)r +MAKEFLAGS := $(MAKEFLAGS) -r # Generic rule for making directories. %/: @@ -822,7 +822,7 @@ MAKEFLAGS := $(MAKEFLAGS)r .PRECIOUS: $(foreach l,$(libtypes),$(patsubst %,$(common-objpfx)$l,c)) # Use the verbose option of ar and tar when not running silently. -ifeq "$(findstring s,$(MAKEFLAGS))" "" # if not -s +ifeq ($(silent-make),no) # if not -s verbose := v else # -s verbose := diff --git a/NEWS b/NEWS index 292fbc595..8a20d3c4e 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,94 @@ See the end for copying conditions. Please send GNU C library bug reports via using `glibc' in the "product" field. +Version 2.31.1 + +The following bugs are resolved with this release: + [14231] stdio-common tests memory requirements + [19519] iconv(1) with -c option hangs on illegal multi-byte sequences + (CVE-2016-10228) + [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT + [20543] Please move from .gnu.linkonce to comdat + [22542] CVE-2022-23219: Buffer overflow in sunrpc clnt_create for "unix" + [23296] Data race in setting function descriptor during lazy binding + [24973] iconv encounters segmentation fault when converting 0x00 0xfe in + EUC-KR to UTF-8 (CVE-2019-25013) + [25487] sinl() stack corruption from crafted input (CVE-2020-10029) + [25523] MIPS/Linux inline syscall template is miscompiled + [25623] test-sysvmsg, test-sysvsem, test-sysvshm fail with 2.31 on 32 bit and + old kernel + [25635] arm: Wrong sysdep order selection for soft-fp + [25639] localedata: Some names of days and months wrongly spelt in + Occitan + [25691] stdio: Remove memory leak from multibyte convertion + [25715] system() returns wrong errors when posix_spawn fails + [25810] x32: Incorrect syscall entries with pointer, off_t and size_t + [25896] Incorrect prctl + [25902] Bad LOADARGS_N + [25933] Off by one error in __strncmp_avx2 + [25966] Incorrect access of __x86_shared_non_temporal_threshold for x32 + [25976] nss_compat: internal_end*ent may clobber errno, hiding ERANGE + [26211] printf integer overflow calculating allocation size + [26224] iconv hangs when converting some invalid inputs from several IBM + character sets (CVE-2020-27618) + [26248] Incorrect argument types for INLINE_SETXID_SYSCALL + [26332] Incorrect cache line size load causes memory corruption in memset + [26383] bind_textdomain_codeset doesn't accept //TRANSLIT anymore + [26923] Assertion failure in iconv when converting invalid UCS4 (CVE-2020-29562) + [26932] libc: sh: Multiple floating point functions defined as stubs only + [27130] "rep movsb" performance issue + [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work + [27457] vzeroupper use in AVX2 multiarch string functions cause HTM aborts + [27974] Overflow bug in some implementation of wcsnlen, wmemchr, and wcsncat + [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs + [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex + [28768] CVE-2022-23218: Buffer overflow in sunrpc svcunix_create + [28769] CVE-2021-3999: Off-by-one buffer overflow/underflow in getcwd() + [28896] strncmp-avx2-rtm and wcsncmp-avx2-rtm fallback on non-rtm + variants when avoiding overflow + [29530] segfault in printf handling thousands separator + +Security related changes: + + CVE-2016-10228: An infinite loop has been fixed in the iconv program when + invoked with the -c option and when processing invalid multi-byte input + sequences. Reported by Jan Engelhardt. + + CVE-2020-10029: Trigonometric functions on x86 targets suffered from stack + corruption when they were passed a pseudo-zero argument. Reported by Guido + Vranken / ForAllSecure Mayhem. + + CVE-2020-1751: A defect in the PowerPC backtrace function could cause an + out-of-bounds write when executed in a signal frame context. + + CVE-2020-1752: A use-after-free vulnerability in the glob function when + expanding ~user has been fixed. + + CVE-2020-6096: A signed comparison vulnerability in the ARMv7 memcpy and + memmove functions has been fixed. Discovered by Jason Royes and Samual + Dytrych of the Cisco Security Assessment and Penetration Team (See + TALOS-2020-1019). + + CVE-2020-27618: An infinite loop has been fixed in the iconv program when + invoked with input containing redundant shift sequences in the IBM1364, + IBM1371, IBM1388, IBM1390, or IBM1399 character sets. + + CVE-2020-29562: An assertion failure has been fixed in the iconv function + when invoked with UCS4 input containing an invalid character. + + CVE-2021-3999: Passing a buffer of size exactly 1 byte to the getcwd + function may result in an off-by-one buffer underflow and overflow + when the current working directory is longer than PATH_MAX and also + corresponds to the / directory through an unprivileged mount + namespace. Reported by Qualys. + + CVE-2022-23219: Passing an overlong file name to the clnt_create + legacy function could result in a stack-based buffer overflow when + using the "unix" protocol. Reported by Martin Sebor. + + CVE-2022-23218: Passing an overlong file name to the svcunix_create + legacy function could result in a stack-based buffer overflow. + Version 2.31 Major new features: @@ -141,6 +229,18 @@ Changes to build and runtime requirements: source tree. ChangeLog files are located in the ChangeLog.old directory as ChangeLog.N where the highest N has the latest entries. +* On Linux, the system administrator needs to configure /dev/pts with + the intended access modes for pseudo-terminals. glibc no longer + attemps to adjust permissions of terminal devices. The previous glibc + defaults ("tty" group, user read/write and group write) already + corresponded to what most systems used, so that grantpt did not + perform any adjustments. + +* On Linux, the posix_openpt and getpt functions no longer attempt to + use legacy (BSD) pseudo-terminals and assume that if /dev/ptmx exists + (and pseudo-terminals are supported), a devpts file system is mounted + on /dev/pts. Current systems already meet these requirements. + Security related changes: CVE-2019-19126: ld.so failed to ignore the LD_PREFER_MAP_32BIT_EXEC diff --git a/Rules b/Rules index 8b771f609..beab969fd 100644 --- a/Rules +++ b/Rules @@ -155,6 +155,7 @@ xtests: tests $(xtests-special) else tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ $(tests-container:%=$(objpfx)%.out) \ + $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ $(tests-special) $(tests-printers-out) xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) endif @@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no) tests-expected = else tests-expected = $(tests) $(tests-internal) $(tests-printers) \ - $(tests-container) + $(tests-container) $(tests-mcheck:%=%-mcheck) endif tests: $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \ @@ -191,6 +192,7 @@ else binaries-pie-tests = binaries-pie-notests = endif +binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) else binaries-all-notests = binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) @@ -200,6 +202,7 @@ binaries-static-tests = binaries-static = binaries-pie-tests = binaries-pie-notests = +binaries-mcheck-tests = endif binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) @@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \ $(+link-tests) endif +ifneq "$(strip $(binaries-mcheck-tests))" "" +$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \ + $(link-extra-libs-tests) \ + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) + $(+link-tests) +endif + ifneq "$(strip $(binaries-pie-tests))" "" $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ $(link-extra-libs-tests) \ @@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \ $(+link-static-tests) endif +# All mcheck tests will be run with MALLOC_CHECK_=3 +define mcheck-ENVS +$(1)-mcheck-ENV = MALLOC_CHECK_=3 +endef +$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t)))) + ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" "" # These are the implicit rules for making test outputs # from the test programs and whatever input files are present. diff --git a/configure b/configure index b959d2d98..3b98ec312 100755 --- a/configure +++ b/configure @@ -4035,7 +4035,7 @@ if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \ -o conftest conftest.S 1>&5 2>&5; then # Do a link to see if the backend supports IFUNC relocs. $READELF -r conftest 1>&5 - LC_ALL=C $READELF -r conftest | grep 'no relocations' >/dev/null || { + LC_ALL=C $READELF -Wr conftest | grep -q 'IRELATIVE\|R_SPARC_JMP_IREL' && { libc_cv_ld_gnu_indirect_function=yes } fi diff --git a/configure.ac b/configure.ac index 49b900c1e..e20034f30 100644 --- a/configure.ac +++ b/configure.ac @@ -649,7 +649,7 @@ if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \ -o conftest conftest.S 1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD; then # Do a link to see if the backend supports IFUNC relocs. $READELF -r conftest 1>&AS_MESSAGE_LOG_FD - LC_ALL=C $READELF -r conftest | grep 'no relocations' >/dev/null || { + LC_ALL=C $READELF -Wr conftest | grep -q 'IRELATIVE\|R_SPARC_JMP_IREL' && { libc_cv_ld_gnu_indirect_function=yes } fi diff --git a/debug/Makefile b/debug/Makefile index c62b2154b..3a6b44223 100644 --- a/debug/Makefile +++ b/debug/Makefile @@ -168,6 +168,8 @@ extra-libs-others = $(extra-libs) libSegFault-routines = segfault libSegFault-inhibit-o = $(filter-out .os,$(object-suffixes)) +# libSegFault.so installs a signal handler in its ELF constructor. +LDFLAGS-SegFault.so = -Wl,--enable-new-dtags,-z,nodelete libpcprofile-routines = pcprofile libpcprofile-inhibit-o = $(filter-out .os,$(object-suffixes)) diff --git a/debug/backtrace.c b/debug/backtrace.c index cc4b9a5c9..69cf4c23c 100644 --- a/debug/backtrace.c +++ b/debug/backtrace.c @@ -23,6 +23,7 @@ #include #include #include +#include struct trace_arg { @@ -78,6 +79,10 @@ backtrace_helper (struct _Unwind_Context *ctx, void *a) if (arg->cnt != -1) { arg->array[arg->cnt] = (void *) unwind_getip (ctx); + if (arg->cnt > 0) + arg->array[arg->cnt] + = unwind_arch_adjustment (arg->array[arg->cnt - 1], + arg->array[arg->cnt]); /* Check whether we make any progress. */ _Unwind_Word cfa = unwind_getcfa (ctx); diff --git a/elf/Makefile b/elf/Makefile index 632a4d8b0..f9646f9c8 100644 --- a/elf/Makefile +++ b/elf/Makefile @@ -1348,6 +1348,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag) CFLAGS-ifuncmain9pie.c += $(pie-ccflag) CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag) +LDFLAGS-ifuncmain6pie = -Wl,-z,lazy + $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so @@ -1581,8 +1583,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \ tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \ LD_HWCAP_MASK=0x1 -tst-env-setuid-tunables-ENV = \ - GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096 $(objpfx)tst-debug1: $(libdl) $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c index 44d06665b..2296ad387 100644 --- a/elf/dl-tunables.c +++ b/elf/dl-tunables.c @@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring) return; char *p = tunestr; + size_t off = 0; while (true) { @@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring) /* If we reach the end of the string before getting a valid name-value pair, bail out. */ if (p[len] == '\0') - return; + { + if (__libc_enable_secure) + tunestr[off] = '\0'; + return; + } /* We did not find a valid name-value pair before encountering the colon. */ @@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring) if (tunable_is_name (cur->name, name)) { - /* If we are in a secure context (AT_SECURE) then ignore the tunable - unless it is explicitly marked as secure. Tunable values take - precendence over their envvar aliases. */ + /* If we are in a secure context (AT_SECURE) then ignore the + tunable unless it is explicitly marked as secure. Tunable + values take precedence over their envvar aliases. We write + the tunables that are not SXID_ERASE back to TUNESTR, thus + dropping all SXID_ERASE tunables and any invalid or + unrecognized tunables. */ if (__libc_enable_secure) { - if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE) + if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE) { - if (p[len] == '\0') - { - /* Last tunable in the valstring. Null-terminate and - return. */ - *name = '\0'; - return; - } - else - { - /* Remove the current tunable from the string. We do - this by overwriting the string starting from NAME - (which is where the current tunable begins) with - the remainder of the string. We then have P point - to NAME so that we continue in the correct - position in the valstring. */ - char *q = &p[len + 1]; - p = name; - while (*q != '\0') - *name++ = *q++; - name[0] = '\0'; - len = 0; - } + if (off > 0) + tunestr[off++] = ':'; + + const char *n = cur->name; + + while (*n != '\0') + tunestr[off++] = *n++; + + tunestr[off++] = '='; + + for (size_t j = 0; j < len; j++) + tunestr[off++] = value[j]; } if (cur->security_level != TUNABLE_SECLEVEL_NONE) @@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring) } } - if (p[len] == '\0') - return; - else + if (p[len] != '\0') p += len + 1; } } diff --git a/elf/ifuncmain1.c b/elf/ifuncmain1.c index 747fc0264..6effce3d7 100644 --- a/elf/ifuncmain1.c +++ b/elf/ifuncmain1.c @@ -19,7 +19,14 @@ typedef int (*foo_p) (void); #endif foo_p foo_ptr = foo; + +/* Address-significant access to protected symbols is not supported in + position-dependent mode on several architectures because GCC + generates relocations that assume that the address is local to the + main program. */ +#ifdef __PIE__ foo_p foo_procted_ptr = foo_protected; +#endif extern foo_p get_foo_p (void); extern foo_p get_foo_hidden_p (void); @@ -37,12 +44,16 @@ main (void) if ((*foo_ptr) () != -1) abort (); +#ifdef __PIE__ if (foo_procted_ptr != foo_protected) abort (); +#endif if (foo_protected () != 0) abort (); +#ifdef __PIE__ if ((*foo_procted_ptr) () != 0) abort (); +#endif p = get_foo_p (); if (p != foo) @@ -55,8 +66,10 @@ main (void) abort (); p = get_foo_protected_p (); +#ifdef __PIE__ if (p != foo_protected) abort (); +#endif if (ret_foo_protected != 0 || (*p) () != ret_foo_protected) abort (); diff --git a/elf/ifuncmain5.c b/elf/ifuncmain5.c index f398085cb..6fda768fb 100644 --- a/elf/ifuncmain5.c +++ b/elf/ifuncmain5.c @@ -14,12 +14,19 @@ get_foo (void) return foo; } + +/* Address-significant access to protected symbols is not supported in + position-dependent mode on several architectures because GCC + generates relocations that assume that the address is local to the + main program. */ +#ifdef __PIE__ foo_p __attribute__ ((noinline)) get_foo_protected (void) { return foo_protected; } +#endif int main (void) @@ -30,9 +37,11 @@ main (void) if ((*p) () != -1) abort (); +#ifdef __PIE__ p = get_foo_protected (); if ((*p) () != 0) abort (); +#endif return 0; } diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c index 04faeb86e..4a0190683 100644 --- a/elf/ifuncmain6pie.c +++ b/elf/ifuncmain6pie.c @@ -9,7 +9,6 @@ #include "ifunc-sel.h" typedef int (*foo_p) (void); -extern foo_p foo_ptr; static int one (void) @@ -28,20 +27,17 @@ foo_ifunc (void) } extern int foo (void); -extern foo_p get_foo (void); +extern int call_foo (void); extern foo_p get_foo_p (void); -foo_p my_foo_ptr = foo; +foo_p foo_ptr = foo; int main (void) { foo_p p; - p = get_foo (); - if (p != foo) - abort (); - if ((*p) () != -30) + if (call_foo () != -30) abort (); p = get_foo_p (); @@ -52,12 +48,8 @@ main (void) if (foo_ptr != foo) abort (); - if (my_foo_ptr != foo) - abort (); if ((*foo_ptr) () != -30) abort (); - if ((*my_foo_ptr) () != -30) - abort (); if (foo () != -30) abort (); diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c index 2e16c1d06..2f6d0715e 100644 --- a/elf/ifuncmod6.c +++ b/elf/ifuncmod6.c @@ -4,7 +4,7 @@ extern int foo (void); typedef int (*foo_p) (void); -foo_p foo_ptr = foo; +extern foo_p foo_ptr; foo_p get_foo_p (void) @@ -12,8 +12,8 @@ get_foo_p (void) return foo_ptr; } -foo_p -get_foo (void) +int +call_foo (void) { - return foo; + return foo (); } diff --git a/elf/rtld-Rules b/elf/rtld-Rules index 7e0254cc4..3b73937d4 100644 --- a/elf/rtld-Rules +++ b/elf/rtld-Rules @@ -52,7 +52,7 @@ $(objpfx)rtld-libc.a: $(foreach dir,$(rtld-subdirs),\ mv -f $@T $@ # Use the verbose option of ar and tar when not running silently. -ifeq "$(findstring s,$(MAKEFLAGS))" "" # if not -s +ifeq ($(silent-make),no) # if not -s verbose := v else # -s verbose := diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c index 971d5892b..ca0c8c245 100644 --- a/elf/tst-env-setuid-tunables.c +++ b/elf/tst-env-setuid-tunables.c @@ -25,35 +25,76 @@ #include "config.h" #undef _LIBC -#define test_parent test_parent_tunables -#define test_child test_child_tunables - -static int test_child_tunables (void); -static int test_parent_tunables (void); - -#include "tst-env-setuid.c" - -#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096" -#define PARENT_VALSTRING_VALUE \ - "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +const char *teststrings[] = +{ + "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2", + "glibc.malloc.perturb=0x800", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096", + "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2", + "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096", + ":glibc.malloc.garbage=2:glibc.malloc.check=1", + "glibc.malloc.check=1:glibc.malloc.check=2", + "not_valid.malloc.check=2", + "glibc.not_valid.check=2", +}; + +const char *resultstrings[] = +{ + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "glibc.malloc.mmap_threshold=4096", + "", + "", + "", + "", + "", + "", +}; static int -test_child_tunables (void) +test_child (int off) { const char *val = getenv ("GLIBC_TUNABLES"); #if HAVE_TUNABLES - if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0) + if (val != NULL && strcmp (val, resultstrings[off]) == 0) return 0; if (val != NULL) - printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); + printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val); return 1; #else if (val != NULL) { - printf ("GLIBC_TUNABLES not cleared\n"); + printf ("[%d] GLIBC_TUNABLES not cleared\n", off); return 1; } return 0; @@ -61,15 +102,48 @@ test_child_tunables (void) } static int -test_parent_tunables (void) +do_test (int argc, char **argv) { - const char *val = getenv ("GLIBC_TUNABLES"); + /* Setgid child process. */ + if (argc == 2) + { + if (getgid () == getegid ()) + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); - if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0) - return 0; + int ret = test_child (atoi (argv[1])); - if (val != NULL) - printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val); + if (ret != 0) + exit (1); - return 1; + exit (EXIT_SUCCESS); + } + else + { + int ret = 0; + + /* Spawn tests. */ + for (int i = 0; i < array_length (teststrings); i++) + { + char buf[INT_BUFSIZE_BOUND (int)]; + + printf ("Spawned test for %s (%d)\n", teststrings[i], i); + snprintf (buf, sizeof (buf), "%d\n", i); + if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0) + exit (1); + + int status = support_capture_subprogram_self_sgid (buf); + + /* Bail out early if unsupported. */ + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + ret |= status; + } + return ret; + } } + +#define TEST_FUNCTION_ARGV do_test +#include diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c index 41dc79e83..2dbccdb69 100644 --- a/elf/tst-env-setuid.c +++ b/elf/tst-env-setuid.c @@ -29,173 +29,12 @@ #include #include +#include #include #include +#include static char SETGID_CHILD[] = "setgid-child"; -#define CHILD_STATUS 42 - -/* Return a GID which is not our current GID, but is present in the - supplementary group list. */ -static gid_t -choose_gid (void) -{ - const int count = 64; - gid_t groups[count]; - int ret = getgroups (count, groups); - if (ret < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t current = getgid (); - for (int i = 0; i < ret; ++i) - { - if (groups[i] != current) - return groups[i]; - } - return 0; -} - -/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */ -static pid_t -do_execve (char **args) -{ - pid_t kid = vfork (); - - if (kid < 0) - { - printf ("vfork: %m\n"); - return -1; - } - - if (kid == 0) - { - /* Child process. */ - execve (args[0], args, environ); - _exit (-errno); - } - - if (kid < 0) - return 1; - - int status; - - if (waitpid (kid, &status, 0) < 0) - { - printf ("waitpid: %m\n"); - return 1; - } - - if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) - return EXIT_UNSUPPORTED; - - if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS) - { - printf ("Unexpected exit status %d from child process\n", - WEXITSTATUS (status)); - return 1; - } - return 0; -} - -/* Copies the executable into a restricted directory, so that we can - safely make it SGID with the TARGET group ID. Then runs the - executable. */ -static int -run_executable_sgid (gid_t target) -{ - char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", - test_dir, (intmax_t) getpid ()); - char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = 0; - if (mkdir (dirname, 0700) < 0) - { - printf ("mkdir: %m\n"); - goto err; - } - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) - { - printf ("open (/proc/self/exe): %m\n"); - goto err; - } - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - if (outfd < 0) - { - printf ("open (%s): %m\n", execname); - goto err; - } - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - if (rdcount < 0) - { - printf ("read: %m\n"); - goto err; - } - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - if (wrcount <= 0) - { - printf ("write: %m\n"); - goto err; - } - p += wrcount; - } - } - if (fchown (outfd, getuid (), target) < 0) - { - printf ("fchown (%s): %m\n", execname); - goto err; - } - if (fchmod (outfd, 02750) < 0) - { - printf ("fchmod (%s): %m\n", execname); - goto err; - } - if (close (outfd) < 0) - { - printf ("close (outfd): %m\n"); - goto err; - } - if (close (infd) < 0) - { - printf ("close (infd): %m\n"); - goto err; - } - - char *args[] = {execname, SETGID_CHILD, NULL}; - - ret = do_execve (args); - -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname) - { - unlink (execname); - free (execname); - } - if (dirname) - { - rmdir (dirname); - free (dirname); - } - return ret; -} #ifndef test_child static int @@ -256,40 +95,32 @@ do_test (int argc, char **argv) if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0) { if (getgid () == getegid ()) - { - /* This can happen if the file system is mounted nosuid. */ - fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", - (intmax_t) getgid ()); - exit (EXIT_UNSUPPORTED); - } + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); int ret = test_child (); if (ret != 0) exit (1); - exit (CHILD_STATUS); + exit (EXIT_SUCCESS); } else { if (test_parent () != 0) exit (1); - /* Try running a setgid program. */ - gid_t target = choose_gid (); - if (target == 0) - { - fprintf (stderr, - "Could not find a suitable GID for user %jd, skipping test\n", - (intmax_t) getuid ()); - exit (0); - } + int status = support_capture_subprogram_self_sgid (SETGID_CHILD); - return run_executable_sgid (target); - } + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + if (!WIFEXITED (status)) + FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); - /* Something went wrong and our argv was corrupted. */ - _exit (1); + return 0; + } } #define TEST_FUNCTION_ARGV do_test diff --git a/iconv/Makefile b/iconv/Makefile index b8fe8c47d..f9b51e23e 100644 --- a/iconv/Makefile +++ b/iconv/Makefile @@ -26,7 +26,7 @@ headers = iconv.h gconv.h routines = iconv_open iconv iconv_close \ gconv_open gconv gconv_close gconv_db gconv_conf \ gconv_builtin gconv_simple gconv_trans gconv_cache -routines += gconv_dl +routines += gconv_dl gconv_charset vpath %.c ../locale/programs ../intl @@ -44,7 +44,7 @@ CFLAGS-linereader.c += -DNO_TRANSLITERATION CFLAGS-simple-hash.c += -I../locale tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \ - tst-iconv7 tst-iconv-mt + tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt others = iconv_prog iconvconfig install-others-programs = $(inst_bindir)/iconv @@ -61,6 +61,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left)) ifeq ($(run-built-tests),yes) xtests-special += $(objpfx)test-iconvconfig.out +tests-special += $(objpfx)tst-iconv_prog.out endif # Make a copy of the file because gconv module names are constructed @@ -81,6 +82,13 @@ endif include ../Rules +ifeq ($(run-built-tests),yes) +LOCALES := en_US.UTF-8 +include ../gen-locales.mk + +$(objpfx)tst-iconv-opt.out: $(gen-locales) +endif + $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force) $(do-install-program) @@ -95,3 +103,8 @@ $(objpfx)test-iconvconfig.out: /dev/null $(objpfx)iconvconfig cmp $$tmp $(inst_gconvdir)/gconv-modules.cache; \ rm -f $$tmp) > $@; \ $(evaluate-test) + +$(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog + $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \ + '$(run-program-env)' > $@; \ + $(evaluate-test) diff --git a/iconv/Versions b/iconv/Versions index 60ab10a27..d51af52fa 100644 --- a/iconv/Versions +++ b/iconv/Versions @@ -7,6 +7,9 @@ libc { # functions shared with iconv program __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db; + # functions used elsewhere in glibc + __gconv_open; __gconv_create_spec; __gconv_destroy_spec; + # function used by the gconv modules __gconv_transliterate; } diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c new file mode 100644 index 000000000..4ba0aa99f --- /dev/null +++ b/iconv/gconv_charset.c @@ -0,0 +1,228 @@ +/* Charset name normalization. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include +#include +#include +#include +#include +#include +#include "gconv_int.h" +#include "gconv_charset.h" + + +/* This function returns a pointer to the last suffix in a conversion code + string. Valid suffixes matched by this function are of the form: '/' or ',' + followed by arbitrary text that doesn't contain '/' or ','. It does not + edit the string in any way. The caller is expected to parse the suffix and + remove it (by e.g. truncating the string) before the next call. */ +static char * +find_suffix (char *s) +{ + /* The conversion code is in the form of a triplet, separated by '/' chars. + The third component of the triplet contains suffixes. If we don't have two + slashes, we don't have a suffix. */ + + int slash_count = 0; + char *suffix_term = NULL; + + for (int i = 0; s[i] != '\0'; i++) + switch (s[i]) + { + case '/': + slash_count++; + /* Fallthrough */ + case ',': + suffix_term = &s[i]; + } + + if (slash_count >= 2) + return suffix_term; + + return NULL; +} + + +struct gconv_parsed_code +{ + char *code; + bool translit; + bool ignore; +}; + + +/* This function parses an iconv_open encoding PC.CODE, strips any suffixes + (such as TRANSLIT or IGNORE) from it and sets corresponding flags in it. */ +static void +gconv_parse_code (struct gconv_parsed_code *pc) +{ + pc->translit = false; + pc->ignore = false; + + while (1) + { + /* First drop any trailing whitespaces and separators. */ + size_t len = strlen (pc->code); + while ((len > 0) + && (isspace (pc->code[len - 1]) + || pc->code[len - 1] == ',' + || pc->code[len - 1] == '/')) + len--; + + pc->code[len] = '\0'; + + if (len == 0) + return; + + char * suffix = find_suffix (pc->code); + if (suffix == NULL) + { + /* At this point, we have processed and removed all suffixes from the + code and what remains of the code is suffix free. */ + return; + } + else + { + /* A suffix is processed from the end of the code array going + backwards, one suffix at a time. The suffix is an index into the + code character array and points to: one past the end of the code + and any unprocessed suffixes, and to the beginning of the suffix + currently being processed during this iteration. We must process + this suffix and then drop it from the code by terminating the + preceding text with NULL. + + We want to allow and recognize suffixes such as: + + "/TRANSLIT" i.e. single suffix + "//TRANSLIT" i.e. single suffix and multiple separators + "//TRANSLIT/IGNORE" i.e. suffixes separated by "/" + "/TRANSLIT//IGNORE" i.e. suffixes separated by "//" + "//IGNORE,TRANSLIT" i.e. suffixes separated by "," + "//IGNORE," i.e. trailing "," + "//TRANSLIT/" i.e. trailing "/" + "//TRANSLIT//" i.e. trailing "//" + "/" i.e. empty suffix. + + Unknown suffixes are silently discarded and ignored. */ + + if ((__strcasecmp_l (suffix, + GCONV_TRIPLE_SEPARATOR + GCONV_TRANSLIT_SUFFIX, + _nl_C_locobj_ptr) == 0) + || (__strcasecmp_l (suffix, + GCONV_SUFFIX_SEPARATOR + GCONV_TRANSLIT_SUFFIX, + _nl_C_locobj_ptr) == 0)) + pc->translit = true; + + if ((__strcasecmp_l (suffix, + GCONV_TRIPLE_SEPARATOR + GCONV_IGNORE_ERRORS_SUFFIX, + _nl_C_locobj_ptr) == 0) + || (__strcasecmp_l (suffix, + GCONV_SUFFIX_SEPARATOR + GCONV_IGNORE_ERRORS_SUFFIX, + _nl_C_locobj_ptr) == 0)) + pc->ignore = true; + + /* We just processed this suffix. We can now drop it from the + code string by truncating it at the suffix's position. */ + suffix[0] = '\0'; + } + } +} + + +/* This function accepts the charset names of the source and destination of the + conversion and populates *conv_spec with an equivalent conversion + specification that may later be used by __gconv_open. The charset names + might contain options in the form of suffixes that alter the conversion, + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring + and truncating any suffix options in fromcode, and processing and truncating + any suffix options in tocode. Supported suffix options ("TRANSLIT" or + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec + to be set to true. Unrecognized suffix options are silently discarded. If + the function succeeds, it returns conv_spec back to the caller. It returns + NULL upon failure. conv_spec must be allocated and freed by the caller. */ +struct gconv_spec * +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + const char *tocode) +{ + struct gconv_parsed_code pfc, ptc; + struct gconv_spec *ret = NULL; + + pfc.code = __strdup (fromcode); + ptc.code = __strdup (tocode); + + if ((pfc.code == NULL) + || (ptc.code == NULL)) + goto out; + + gconv_parse_code (&pfc); + gconv_parse_code (&ptc); + + /* We ignore suffixes in the fromcode because that is how the current + implementation has always handled them. Only suffixes in the tocode are + processed and handled. The reality is that invalid input in the input + character set should only be ignored if the fromcode specifies IGNORE. + The current implementation ignores invalid intput in the input character + set if the tocode contains IGNORE. We preserve this behavior for + backwards compatibility. In the future we may split the handling of + IGNORE to allow a finer grained specification of ignorning invalid input + and/or ignoring invalid output. */ + conv_spec->translit = ptc.translit; + conv_spec->ignore = ptc.ignore; + + /* 3 extra bytes because 1 extra for '\0', and 2 extra so strip might + be able to add one or two trailing '/' characters if necessary. */ + conv_spec->fromcode = malloc (strlen (fromcode) + 3); + if (conv_spec->fromcode == NULL) + goto out; + + conv_spec->tocode = malloc (strlen (tocode) + 3); + if (conv_spec->tocode == NULL) + { + free (conv_spec->fromcode); + conv_spec->fromcode = NULL; + goto out; + } + + /* Strip unrecognized characters and ensure that the code has two '/' + characters as per conversion code triplet specification. */ + strip (conv_spec->fromcode, pfc.code); + strip (conv_spec->tocode, ptc.code); + ret = conv_spec; + +out: + free (pfc.code); + free (ptc.code); + + return ret; +} +libc_hidden_def (__gconv_create_spec) + + +void +__gconv_destroy_spec (struct gconv_spec *conv_spec) +{ + free (conv_spec->fromcode); + free (conv_spec->tocode); + return; +} +libc_hidden_def (__gconv_destroy_spec) diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h index 348acc089..e9c122cf7 100644 --- a/iconv/gconv_charset.h +++ b/iconv/gconv_charset.h @@ -19,9 +19,41 @@ #include #include +#include +#include +#include +#include +#include "gconv_int.h" -static void +/* An iconv encoding is in the form of a triplet, with parts separated by + a '/' character. The first part is the standard name, the second part is + the character set, and the third part is the error handler. If the first + part is sufficient to identify both the standard and the character set + then the second part can be empty e.g. UTF-8//. If the first part is not + sufficient to identify both the standard and the character set then the + second part is required e.g. ISO-10646/UTF8/. If neither the first or + second parts are provided e.g. //, then the current locale is used. + The actual values used in the first and second parts are not entirely + relevant to the implementation. The values themselves are used in a hash + table to lookup modules and so the naming convention of the first two parts + is somewhat arbitrary and only helps locate the entries in the cache. + The third part is the error handler and is comprised of a ',' or '/' + separated list of suffixes. Currently, we support "TRANSLIT" for + transliteration and "IGNORE" for ignoring conversion errors due to + unrecognized input characters. */ +#define GCONV_TRIPLE_SEPARATOR "/" +#define GCONV_SUFFIX_SEPARATOR "," +#define GCONV_TRANSLIT_SUFFIX "TRANSLIT" +#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE" + + +/* This function copies in-order, characters from the source 's' that are + either alpha-numeric or one in one of these: "_-.,:/" - into the destination + 'wp' while dropping all other characters. In the process, it converts all + alphabetical characters to upper case. It then appends up to two '/' + characters so that the total number of '/'es in the destination is 2. */ +static inline void __attribute__ ((unused, always_inline)) strip (char *wp, const char *s) { int slash_count = 0; diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index fbaf12cee..f721ce30f 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -75,6 +75,15 @@ struct gconv_module }; +/* The specification of the conversion that needs to be performed. */ +struct gconv_spec +{ + char *fromcode; + char *tocode; + bool translit; + bool ignore; +}; + /* Flags for `gconv_open'. */ enum { @@ -136,10 +145,33 @@ __libc_lock_define (extern, __gconv_lock attribute_hidden) }) -/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */ -extern int __gconv_open (const char *toset, const char *fromset, - __gconv_t *handle, int flags) - attribute_hidden; +/* Return in *HANDLE, a decriptor for the transformation. The function expects + the specification of the transformation in the structure pointed to by + CONV_SPEC. It only reads *CONV_SPEC and does not take ownership of it. */ +extern int __gconv_open (struct gconv_spec *conv_spec, + __gconv_t *handle, int flags); +libc_hidden_proto (__gconv_open) + +/* This function accepts the charset names of the source and destination of the + conversion and populates *conv_spec with an equivalent conversion + specification that may later be used by __gconv_open. The charset names + might contain options in the form of suffixes that alter the conversion, + e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring + and truncating any suffix options in fromcode, and processing and truncating + any suffix options in tocode. Supported suffix options ("TRANSLIT" or + "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec + to be set to true. Unrecognized suffix options are silently discarded. If + the function succeeds, it returns conv_spec back to the caller. It returns + NULL upon failure. */ +extern struct gconv_spec * +__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode, + const char *tocode); +libc_hidden_proto (__gconv_create_spec) + +/* This function frees all heap memory allocated by __gconv_create_spec. */ +extern void +__gconv_destroy_spec (struct gconv_spec *conv_spec); +libc_hidden_proto (__gconv_destroy_spec) /* Free resources associated with transformation descriptor CD. */ extern int __gconv_close (__gconv_t cd) diff --git a/iconv/gconv_open.c b/iconv/gconv_open.c index b39626d25..287862095 100644 --- a/iconv/gconv_open.c +++ b/iconv/gconv_open.c @@ -31,7 +31,7 @@ int -__gconv_open (const char *toset, const char *fromset, __gconv_t *handle, +__gconv_open (struct gconv_spec *conv_spec, __gconv_t *handle, int flags) { struct __gconv_step *steps; @@ -40,77 +40,38 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, size_t cnt = 0; int res; int conv_flags = 0; - const char *errhand; - const char *ignore; bool translit = false; + char *tocode, *fromcode; /* Find out whether any error handling method is specified. */ - errhand = strchr (toset, '/'); - if (errhand != NULL) - errhand = strchr (errhand + 1, '/'); - if (__glibc_likely (errhand != NULL)) - { - if (*++errhand == '\0') - errhand = NULL; - else - { - /* Make copy without the error handling description. */ - char *newtoset = (char *) alloca (errhand - toset + 1); - char *tok; - char *ptr = NULL /* Work around a bogus warning */; - - newtoset[errhand - toset] = '\0'; - toset = memcpy (newtoset, toset, errhand - toset); + translit = conv_spec->translit; - /* Find the appropriate transliteration handlers. */ - tok = strdupa (errhand); + if (conv_spec->ignore) + conv_flags |= __GCONV_IGNORE_ERRORS; - tok = __strtok_r (tok, ",", &ptr); - while (tok != NULL) - { - if (__strcasecmp_l (tok, "TRANSLIT", _nl_C_locobj_ptr) == 0) - translit = true; - else if (__strcasecmp_l (tok, "IGNORE", _nl_C_locobj_ptr) == 0) - /* Set the flag to ignore all errors. */ - conv_flags |= __GCONV_IGNORE_ERRORS; - - tok = __strtok_r (NULL, ",", &ptr); - } - } - } - - /* For the source character set we ignore the error handler specification. - XXX Is this really always the best? */ - ignore = strchr (fromset, '/'); - if (ignore != NULL && (ignore = strchr (ignore + 1, '/')) != NULL - && *++ignore != '\0') - { - char *newfromset = (char *) alloca (ignore - fromset + 1); - - newfromset[ignore - fromset] = '\0'; - fromset = memcpy (newfromset, fromset, ignore - fromset); - } + tocode = conv_spec->tocode; + fromcode = conv_spec->fromcode; /* If the string is empty define this to mean the charset of the currently selected locale. */ - if (strcmp (toset, "//") == 0) + if (strcmp (tocode, "//") == 0) { const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); size_t len = strlen (codeset); char *dest; - toset = dest = (char *) alloca (len + 3); + tocode = dest = (char *) alloca (len + 3); memcpy (__mempcpy (dest, codeset, len), "//", 3); } - if (strcmp (fromset, "//") == 0) + if (strcmp (fromcode, "//") == 0) { const char *codeset = _NL_CURRENT (LC_CTYPE, CODESET); size_t len = strlen (codeset); char *dest; - fromset = dest = (char *) alloca (len + 3); + fromcode = dest = (char *) alloca (len + 3); memcpy (__mempcpy (dest, codeset, len), "//", 3); } - res = __gconv_find_transform (toset, fromset, &steps, &nsteps, flags); + res = __gconv_find_transform (tocode, fromcode, &steps, &nsteps, flags); if (res == __GCONV_OK) { /* Allocate room for handle. */ @@ -209,3 +170,4 @@ __gconv_open (const char *toset, const char *fromset, __gconv_t *handle, *handle = result; return res; } +libc_hidden_def (__gconv_open) diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index d4797fba1..963b29f24 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -239,11 +239,9 @@ ucs4_internal_loop (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { uint32_t inval; @@ -307,11 +305,9 @@ ucs4_internal_loop_unaligned (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { if (__glibc_unlikely (inptr[0] > 0x80)) { @@ -613,11 +609,9 @@ ucs4le_internal_loop (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { uint32_t inval; @@ -684,11 +678,9 @@ ucs4le_internal_loop_unaligned (struct __gconv_step *step, int flags = step_data->__flags; const unsigned char *inptr = *inptrp; unsigned char *outptr = *outptrp; - size_t n_convert = MIN (inend - inptr, outend - outptr) / 4; int result; - size_t cnt; - for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4) + for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4) { if (__glibc_unlikely (inptr[3] > 0x80)) { diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c index 687067070..5b30055c0 100644 --- a/iconv/iconv_open.c +++ b/iconv/iconv_open.c @@ -31,49 +31,15 @@ iconv_t iconv_open (const char *tocode, const char *fromcode) { - /* Normalize the name. We remove all characters beside alpha-numeric, - '_', '-', '/', '.', and ':'. */ - size_t tocode_len = strlen (tocode) + 3; - char *tocode_conv; - bool tocode_usealloca = __libc_use_alloca (tocode_len); - if (tocode_usealloca) - tocode_conv = (char *) alloca (tocode_len); - else - { - tocode_conv = (char *) malloc (tocode_len); - if (tocode_conv == NULL) - return (iconv_t) -1; - } - strip (tocode_conv, tocode); - tocode = (tocode_conv[2] == '\0' && tocode[0] != '\0' - ? upstr (tocode_conv, tocode) : tocode_conv); + __gconv_t cd; + struct gconv_spec conv_spec; - size_t fromcode_len = strlen (fromcode) + 3; - char *fromcode_conv; - bool fromcode_usealloca = __libc_use_alloca (fromcode_len); - if (fromcode_usealloca) - fromcode_conv = (char *) alloca (fromcode_len); - else - { - fromcode_conv = (char *) malloc (fromcode_len); - if (fromcode_conv == NULL) - { - if (! tocode_usealloca) - free (tocode_conv); - return (iconv_t) -1; - } - } - strip (fromcode_conv, fromcode); - fromcode = (fromcode_conv[2] == '\0' && fromcode[0] != '\0' - ? upstr (fromcode_conv, fromcode) : fromcode_conv); + if (__gconv_create_spec (&conv_spec, fromcode, tocode) == NULL) + return (iconv_t) -1; - __gconv_t cd; - int res = __gconv_open (tocode, fromcode, &cd, 0); + int res = __gconv_open (&conv_spec, &cd, 0); - if (! fromcode_usealloca) - free (fromcode_conv); - if (! tocode_usealloca) - free (tocode_conv); + __gconv_destroy_spec (&conv_spec); if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK) { diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c index 9709e4a70..d59979759 100644 --- a/iconv/iconv_prog.c +++ b/iconv/iconv_prog.c @@ -39,6 +39,7 @@ #include #include "iconv_prog.h" #include "iconvconfig.h" +#include "gconv_charset.h" /* Get libc version number. */ #include "../version.h" @@ -118,8 +119,7 @@ main (int argc, char *argv[]) { int status = EXIT_SUCCESS; int remaining; - iconv_t cd; - const char *orig_to_code; + __gconv_t cd; struct charmap_t *from_charmap = NULL; struct charmap_t *to_charmap = NULL; @@ -139,39 +139,6 @@ main (int argc, char *argv[]) exit (EXIT_SUCCESS); } - /* If we have to ignore errors make sure we use the appropriate name for - the to-character-set. */ - orig_to_code = to_code; - if (omit_invalid) - { - const char *errhand = strchrnul (to_code, '/'); - int nslash = 2; - char *newp; - char *cp; - - if (*errhand == '/') - { - --nslash; - errhand = strchrnul (errhand + 1, '/'); - - if (*errhand == '/') - { - --nslash; - errhand = strchr (errhand, '\0'); - } - } - - newp = (char *) alloca (errhand - to_code + nslash + 7 + 1); - cp = mempcpy (newp, to_code, errhand - to_code); - while (nslash-- > 0) - *cp++ = '/'; - if (cp[-1] != '/') - *cp++ = ','; - memcpy (cp, "IGNORE", sizeof ("IGNORE")); - - to_code = newp; - } - /* POSIX 1003.2b introduces a silly thing: the arguments to -t anf -f can be file names of charmaps. In this case iconv will have to read those charmaps and use them to do the conversion. But there are @@ -184,10 +151,10 @@ main (int argc, char *argv[]) file. */ from_charmap = charmap_read (from_code, /*0, 1*/1, 0, 0, 0); - if (strchr (orig_to_code, '/') != NULL) + if (strchr (to_code, '/') != NULL) /* The to-name might be a charmap file name. Try reading the file. */ - to_charmap = charmap_read (orig_to_code, /*0, 1,*/1, 0, 0, 0); + to_charmap = charmap_read (to_code, /*0, 1,*/1, 0, 0, 0); /* At this point we have to handle two cases. The first one is @@ -201,9 +168,25 @@ main (int argc, char *argv[]) argc, remaining, argv, output_file); else { + struct gconv_spec conv_spec; + int res; + + if (__gconv_create_spec (&conv_spec, from_code, to_code) == NULL) + { + error (EXIT_FAILURE, errno, + _("failed to start conversion processing")); + exit (1); + } + + if (omit_invalid) + conv_spec.ignore = true; + /* Let's see whether we have these coded character sets. */ - cd = iconv_open (to_code, from_code); - if (cd == (iconv_t) -1) + res = __gconv_open (&conv_spec, &cd, 0); + + __gconv_destroy_spec (&conv_spec); + + if (res != __GCONV_OK) { if (errno == EINVAL) { @@ -221,7 +204,7 @@ main (int argc, char *argv[]) const char *from_pretty = (from_code[0] ? from_code : nl_langinfo (CODESET)); const char *to_pretty = - (orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET)); + (to_code[0] ? to_code : nl_langinfo (CODESET)); if (from_wrong) { diff --git a/iconv/tst-iconv-opt.c b/iconv/tst-iconv-opt.c new file mode 100644 index 000000000..669d812a6 --- /dev/null +++ b/iconv/tst-iconv-opt.c @@ -0,0 +1,347 @@ +/* Test iconv's TRANSLIT and IGNORE option handling + + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#include +#include +#include +#include +#include +#include + + +/* Run one iconv test. Arguments: + to: destination character set and options + from: source character set + input: input string to be converted + exp_in: expected number of bytes consumed + exp_ret: expected return value (error or number of irreversible conversions) + exp_out: expected output string + exp_err: expected value of `errno' after iconv returns. */ +static void +test_iconv (const char *to, const char *from, char *input, size_t exp_in, + size_t exp_ret, const char *exp_out, int exp_err) +{ + iconv_t cd; + char outbuf[500]; + size_t inlen, outlen; + char *inptr, *outptr; + size_t n; + + cd = iconv_open (to, from); + TEST_VERIFY (cd != (iconv_t) -1); + + inlen = strlen (input); + outlen = sizeof (outbuf); + inptr = input; + outptr = outbuf; + + errno = 0; + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + + TEST_COMPARE (n, exp_ret); + TEST_VERIFY (inptr == input + exp_in); + TEST_COMPARE (errno, exp_err); + TEST_COMPARE_BLOB (outbuf, outptr - outbuf, exp_out, strlen (exp_out)); + TEST_VERIFY (iconv_close (cd) == 0); +} + + +/* We test option parsing by converting UTF-8 inputs to ASCII under various + option combinations. The UTF-8 inputs fall into three categories: + - ASCII-only, + - non-ASCII, + - non-ASCII with invalid UTF-8 characters. */ + +/* 1. */ +char ascii[] = "Just some ASCII text"; + +/* 2. Valid UTF-8 input and some corresponding expected outputs with various + options. The two non-ASCII characters below are accented alphabets: + an `a' then an `o'. */ +char utf8[] = "UTF-8 text with \u00E1 couple \u00F3f non-ASCII characters"; +char u2a[] = "UTF-8 text with "; +char u2a_translit[] = "UTF-8 text with a couple of non-ASCII characters"; +char u2a_ignore[] = "UTF-8 text with couple f non-ASCII characters"; + +/* 3. Invalid UTF-8 input and some corresponding expected outputs. \xff is + invalid UTF-8. It's followed by some valid but non-ASCII UTF-8. */ +char iutf8[] = "Invalid UTF-8 \xff\u27E6text\u27E7"; +char iu2a[] = "Invalid UTF-8 "; +char iu2a_ignore[] = "Invalid UTF-8 text"; +char iu2a_both[] = "Invalid UTF-8 [|text|]"; + +/* 4. Another invalid UTF-8 input and corresponding expected outputs. This time + the valid non-ASCII UTF-8 characters appear before the invalid \xff. */ +char jutf8[] = "Invalid \u27E6UTF-8\u27E7 \xfftext"; +char ju2a[] = "Invalid "; +char ju2a_translit[] = "Invalid [|UTF-8|] "; +char ju2a_ignore[] = "Invalid UTF-8 text"; +char ju2a_both[] = "Invalid [|UTF-8|] text"; + +/* We also test option handling for character set names that have the form + "A/B". In this test, we test conversions "ISO-10646/UTF-8", and either + ISO-8859-1 or ASCII. */ + +/* 5. Accented 'A' and 'a' characters in ISO-8859-1 and UTF-8, and an + equivalent ASCII transliteration. */ +char iso8859_1_a[] = {0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, /* Accented A's. */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, /* Accented a's. */ + 0x00}; +char utf8_a[] = "\u00C0\u00C1\u00C2\u00C3\u00C4\u00C5" + "\u00E0\u00E1\u00E2\u00E3\u00E4\u00E5"; +char ascii_a[] = "AAAAAAaaaaaa"; + +/* 6. An invalid ASCII string where [0] is invalid and [1] is '~'. */ +char iascii [] = {0x80, '~', '\0'}; +char empty[] = ""; +char ia2u_ignore[] = "~"; + +static int +do_test (void) +{ + xsetlocale (LC_ALL, "en_US.UTF-8"); + + + /* 0. iconv_open should gracefully fail for invalid character sets. */ + + TEST_VERIFY (iconv_open ("INVALID", "UTF-8") == (iconv_t) -1); + TEST_VERIFY (iconv_open ("UTF-8", "INVALID") == (iconv_t) -1); + TEST_VERIFY (iconv_open ("INVALID", "INVALID") == (iconv_t) -1); + + + /* 1. ASCII-only UTF-8 input should convert to ASCII with no changes: */ + + test_iconv ("ASCII", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//TRANSLIT", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//TRANSLIT//", "UTF-8", ascii, strlen (ascii), 0, ascii, + 0); + test_iconv ("ASCII//IGNORE", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + test_iconv ("ASCII//IGNORE//", "UTF-8", ascii, strlen (ascii), 0, ascii, 0); + + + /* 2. Valid UTF-8 input with non-ASCII characters: */ + + /* EILSEQ when converted to ASCII. */ + test_iconv ("ASCII", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, EILSEQ); + + /* Converted without error with TRANSLIT enabled. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, u2a_translit, + 0); + + /* EILSEQ with IGNORE enabled. Non-ASCII chars dropped from output. */ + test_iconv ("ASCII//IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, transliterated without error. We test + four combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + + /* Misspellings of TRANSLIT and IGNORE are ignored, but conversion still + works while respecting any other correctly spelled options. */ + + test_iconv ("ASCII//T", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//TRANSLITERATE", "UTF-8", utf8, strlen (u2a), (size_t) -1, + u2a, EILSEQ); + test_iconv ("ASCII//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//IGNORED", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + test_iconv ("ASCII//TRANSLITERATE//IGNORED", "UTF-8", utf8, strlen (u2a), + (size_t) -1, u2a, EILSEQ); + test_iconv ("ASCII//IGNORED,TRANSLITERATE", "UTF-8", utf8, strlen (u2a), + (size_t) -1, u2a, EILSEQ); + test_iconv ("ASCII//T//I", "UTF-8", utf8, strlen (u2a), (size_t) -1, u2a, + EILSEQ); + + test_iconv ("ASCII//TRANSLIT//I", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//I//TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//IGNORED,TRANSLIT", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + test_iconv ("ASCII//TRANSLIT,IGNORED", "UTF-8", utf8, strlen (utf8), 2, + u2a_translit, 0); + + test_iconv ("ASCII//IGNORE,T", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + test_iconv ("ASCII//T,IGNORE", "UTF-8", utf8, strlen (utf8), (size_t) -1, + u2a_ignore, EILSEQ); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLITERATE//IGNORE", "UTF-8", utf8, strlen (utf8), + (size_t) -1, u2a_ignore, EILSEQ); + test_iconv ("ASCII//IGNORE//TRANSLITERATE", "UTF-8", utf8, strlen (utf8), + (size_t) -1, u2a_ignore, EILSEQ); + + + /* 3. Invalid UTF-8 followed by some valid non-ASCII UTF-8 characters: */ + + /* EILSEQ; output is truncated at the first invalid UTF-8 character. */ + test_iconv ("ASCII", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, iu2a, + EILSEQ); + + /* With TRANSLIT enabled: EILSEQ; output still truncated at the first invalid + UTF-8 character. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", iutf8, strlen (iu2a), (size_t) -1, + iu2a, EILSEQ); + + /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and + valid UTF-8 non-ASCII characters. */ + test_iconv ("ASCII//IGNORE", "UTF-8", iutf8, strlen (iutf8), (size_t) -1, + iu2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 + characters and transliterates valid non-ASCII UTF-8 characters. We test + four combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", iutf8, strlen (iutf8), 2, + iu2a_both, 0); + + + /* 4. Invalid UTF-8 with valid non-ASCII UTF-8 chars appearing first: */ + + /* EILSEQ; output is truncated at the first non-ASCII character. */ + test_iconv ("ASCII", "UTF-8", jutf8, strlen (ju2a), (size_t) -1, ju2a, + EILSEQ); + + /* With TRANSLIT enabled: EILSEQ; output now truncated at the first invalid + UTF-8 character. */ + test_iconv ("ASCII//TRANSLIT", "UTF-8", jutf8, strlen (jutf8) - 5, + (size_t) -1, ju2a_translit, EILSEQ); + test_iconv ("ASCII//translit", "UTF-8", jutf8, strlen (jutf8) - 5, + (size_t) -1, ju2a_translit, EILSEQ); + + /* With IGNORE enabled: EILSEQ; output omits invalid UTF-8 characters and + valid UTF-8 non-ASCII characters. */ + test_iconv ("ASCII//IGNORE", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, + ju2a_ignore, EILSEQ); + test_iconv ("ASCII//ignore", "UTF-8", jutf8, strlen (jutf8), (size_t) -1, + ju2a_ignore, EILSEQ); + + /* With TRANSLIT and IGNORE enabled, output omits only invalid UTF-8 + characters and transliterates valid non-ASCII UTF-8 characters. We test + several combinations. */ + + test_iconv ("ASCII//TRANSLIT,IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Due to bug 19519, iconv was ignoring IGNORE for the following input. */ + test_iconv ("ASCII//TRANSLIT//IGNORE", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Due to bug 19519, iconv was ignoring TRANSLIT for the following input. */ + test_iconv ("ASCII//IGNORE//TRANSLIT", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//translit,ignore", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + /* Trailing whitespace and separators should be ignored. */ + test_iconv ("ASCII//IGNORE,TRANSLIT ", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT/", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT//", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT,,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + test_iconv ("ASCII//IGNORE,TRANSLIT /,", "UTF-8", jutf8, strlen (jutf8), 2, + ju2a_both, 0); + + /* TRANSLIT or IGNORE suffixes in fromcode should be ignored. */ + test_iconv ("ASCII", "UTF-8//TRANSLIT", jutf8, strlen (ju2a), (size_t) -1, + ju2a, EILSEQ); + test_iconv ("ASCII", "UTF-8//IGNORE", jutf8, strlen (ju2a), (size_t) -1, + ju2a, EILSEQ); + test_iconv ("ASCII", "UTF-8//TRANSLIT,IGNORE", jutf8, strlen (ju2a), + (size_t) -1, ju2a, EILSEQ); + + + /* 5. Charset names of the form "A/B/": */ + + /* ISO-8859-1 is converted to UTF-8 without needing transliteration. */ + test_iconv ("ISO-10646/UTF-8", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT/IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ISO-8859-1", iso8859_1_a, + strlen (iso8859_1_a), 0, utf8_a, 0); + + /* UTF-8 with accented A's is converted to ASCII with transliteration. */ + test_iconv ("ASCII", "ISO-10646/UTF-8", utf8_a, + 0, (size_t) -1, empty, EILSEQ); + test_iconv ("ASCII//IGNORE", "ISO-10646/UTF-8", utf8_a, + strlen (utf8_a), (size_t) -1, empty, EILSEQ); + test_iconv ("ASCII//TRANSLIT", "ISO-10646/UTF-8", utf8_a, + strlen (utf8_a), 12, ascii_a, 0); + + /* Invalid ASCII is converted to UTF-8 only with IGNORE. */ + test_iconv ("ISO-10646/UTF-8", "ASCII", iascii, strlen (empty), (size_t) -1, + empty, EILSEQ); + test_iconv ("ISO-10646/UTF-8/TRANSLIT", "ASCII", iascii, strlen (empty), + (size_t) -1, empty, EILSEQ); + test_iconv ("ISO-10646/UTF-8/IGNORE", "ASCII", iascii, strlen (iascii), + (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8/TRANSLIT,IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + /* Due to bug 19519, iconv was ignoring IGNORE for the following three + inputs: */ + test_iconv ("ISO-10646/UTF-8/TRANSLIT/IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8//TRANSLIT,IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + test_iconv ("ISO-10646/UTF-8//TRANSLIT//IGNORE", "ASCII", iascii, + strlen (iascii), (size_t) -1, ia2u_ignore, EILSEQ); + + return 0; +} + +#include diff --git a/iconv/tst-iconv8.c b/iconv/tst-iconv8.c new file mode 100644 index 000000000..0b92b19f6 --- /dev/null +++ b/iconv/tst-iconv8.c @@ -0,0 +1,50 @@ +/* Test iconv behavior on UCS4 conversions with //IGNORE. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Derived from BZ #26923 */ +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("UTF-8//IGNORE", "ISO-10646/UCS4/"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* + * Convert sequence beginning with an irreversible character into buffer that + * is too small. + */ + char input[12] = "\xe1\x80\xa1" "AAAAAAAAA"; + char *inptr = input; + size_t insize = sizeof (input); + char output[6]; + char *outptr = output; + size_t outsize = sizeof (output); + + TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == -1); + TEST_VERIFY (errno == E2BIG); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh new file mode 100644 index 000000000..d8db7b335 --- /dev/null +++ b/iconv/tst-iconv_prog.sh @@ -0,0 +1,284 @@ +#!/bin/bash +# Test for some known iconv(1) hangs from bug 19519, and miscellaneous +# iconv(1) program error conditions. +# Copyright (C) 2020 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# . + +codir=$1 +test_wrapper_env="$2" +run_program_env="$3" + +# We have to have some directories in the library path. +LIBPATH=$codir:$codir/iconvdata + +# How the start the iconv(1) program. $from is not defined/expanded yet. +ICONV=' +$codir/elf/ld.so --library-path $LIBPATH --inhibit-rpath ${from}.so +$codir/iconv/iconv_prog +' +ICONV="$test_wrapper_env $run_program_env $ICONV" + +# List of known hangs; +# Gathered by running an exhaustive 2 byte input search against glibc-2.28 +hangarray=( +"\x00\x23;-c;ANSI_X3.110;UTF-8//TRANSLIT//IGNORE" +"\x00\xa1;-c;ARMSCII-8;UTF-8//TRANSLIT//IGNORE" +"\x00\xa1;-c;ASMO_449;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;BIG5;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BIG5HKSCS;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BRF;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;BS_4730;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1250;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;CP1251;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1252;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1253;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1254;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1255;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1257;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;CP1258;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CP932;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CSA_Z243.4-1985-1;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;CSA_Z243.4-1985-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DEC-MCS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DIN_66003;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;DS_2089;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-AT-DE;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-AT-DE-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-CA-FR;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-DK-NO;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-DK-NO-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-ES-S;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FI-SE;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FI-SE-A;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-FR;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-IS-FRISS;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-IT;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-PT;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-UK;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;EBCDIC-US;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ES;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ES2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-CN;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JISX0213;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JP;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-JP-MS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-KR;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;EUC-TW;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GB18030;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GB_1988-80;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GBK;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GOST_19768-74;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK7;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK7-OLD;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;GREEK-CCITT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-GREEK8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-ROMAN8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-ROMAN9;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-THAI8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;HP-TURKISH8;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM038;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM1004;UTF-8//TRANSLIT//IGNORE" +"\x00\xff;-c;IBM1008;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM1046;UTF-8//TRANSLIT//IGNORE" +"\x00\x51;-c;IBM1132;UTF-8//TRANSLIT//IGNORE" +"\x00\xa0;-c;IBM1133;UTF-8//TRANSLIT//IGNORE" +"\x00\xce;-c;IBM1137;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE" +"\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE" +"\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE" +"\x00\x0f;-c;IBM1364;UTF-8" +"\x0e\x0e;-c;IBM1364;UTF-8" +"\x00\x0f;-c;IBM1371;UTF-8" +"\x0e\x0e;-c;IBM1371;UTF-8" +"\x00\x0f;-c;IBM1388;UTF-8" +"\x0e\x0e;-c;IBM1388;UTF-8" +"\x00\x0f;-c;IBM1390;UTF-8" +"\x0e\x0e;-c;IBM1390;UTF-8" +"\x00\x0f;-c;IBM1399;UTF-8" +"\x0e\x0e;-c;IBM1399;UTF-8" +"\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM281;UTF-8//TRANSLIT//IGNORE" +"\x00\x57;-c;IBM290;UTF-8//TRANSLIT//IGNORE" +"\x00\x45;-c;IBM420;UTF-8//TRANSLIT//IGNORE" +"\x00\x68;-c;IBM423;UTF-8//TRANSLIT//IGNORE" +"\x00\x70;-c;IBM424;UTF-8//TRANSLIT//IGNORE" +"\x00\x53;-c;IBM4517;UTF-8//TRANSLIT//IGNORE" +"\x00\x53;-c;IBM4899;UTF-8//TRANSLIT//IGNORE" +"\x00\xa5;-c;IBM4909;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;IBM4971;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM803;UTF-8//TRANSLIT//IGNORE" +"\x00\x91;-c;IBM851;UTF-8//TRANSLIT//IGNORE" +"\x00\x9b;-c;IBM856;UTF-8//TRANSLIT//IGNORE" +"\x00\xd5;-c;IBM857;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM864;UTF-8//TRANSLIT//IGNORE" +"\x00\x94;-c;IBM868;UTF-8//TRANSLIT//IGNORE" +"\x00\x94;-c;IBM869;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM874;UTF-8//TRANSLIT//IGNORE" +"\x00\x6a;-c;IBM875;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM880;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM891;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM903;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;IBM904;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM905;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM9066;UTF-8//TRANSLIT//IGNORE" +"\x00\x48;-c;IBM918;UTF-8//TRANSLIT//IGNORE" +"\x00\x57;-c;IBM930;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM932;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM933;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM935;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM937;UTF-8//TRANSLIT//IGNORE" +"\x00\x41;-c;IBM939;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IBM943;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS-8;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;INIS-CYRILLIC;UTF-8//TRANSLIT//IGNORE" +"\x00\xec;-c;ISIRI-3342;UTF-8//TRANSLIT//IGNORE" +"\x00\xec;-c;ISO_10367-BOX;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-CN;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-CN-EXT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-JP-3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-2022-KR;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_2033;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5427;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5427-EXT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO_5428;UTF-8//TRANSLIT//IGNORE" +"\x00\xa4;-c;ISO_6937;UTF-8//TRANSLIT//IGNORE" +"\x00\xa0;-c;ISO_6937-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-11;UTF-8//TRANSLIT//IGNORE" +"\x00\xa5;-c;ISO-8859-3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-6;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-7;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;ISO-8859-8;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;ISO-IR-197;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;ISO-IR-209;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;IT;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JIS_C6220-1969-RO;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JIS_C6229-1984-B;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JOHAB;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;JUS_I.B1.002;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;KOI-8;UTF-8//TRANSLIT//IGNORE" +"\x00\x88;-c;KOI8-T;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;KSC5636;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;LATIN-GREEK;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;LATIN-GREEK-1;UTF-8//TRANSLIT//IGNORE" +"\x00\xf6;-c;MAC-IS;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;MSZ_7795.3;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NATS-DANO;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NATS-SEFI;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NC_NC00-10;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NF_Z_62-010;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NF_Z_62-010_1973;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NS_4551-1;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;NS_4551-2;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;PT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;PT2;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;RK1048;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;SEN_850200_B;UTF-8//TRANSLIT//IGNORE" +"\x00\x98;-c;SEN_850200_C;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;Shift_JISX0213;UTF-8//TRANSLIT//IGNORE" +"\x00\x80;-c;SJIS;UTF-8//TRANSLIT//IGNORE" +"\x00\x23;-c;T.61-8BIT;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;TIS-620;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;TSCII;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;UHC;UTF-8//TRANSLIT//IGNORE" +"\x00\xd8;-c;UNICODE;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;UTF-16;UTF-8//TRANSLIT//IGNORE" +"\xdc\x00;-c;UTF-16BE;UTF-8//TRANSLIT//IGNORE" +"\x00\xdc;-c;UTF-16LE;UTF-8//TRANSLIT//IGNORE" +"\xff\xff;-c;UTF-7;UTF-8//TRANSLIT//IGNORE" +"\x00\x81;-c;WIN-SAMI-2;UTF-8//TRANSLIT//IGNORE" +) + +# List of option combinations that *should* lead to an error +errorarray=( +# Converting from/to invalid character sets should cause error +"\x00\x00;;INVALID;INVALID" +"\x00\x00;;INVALID;UTF-8" +"\x00\x00;;UTF-8;INVALID" +) + +# Requires $twobyte input, $c flag, $from, and $to to be set; sets $ret +execute_test () +{ + eval PROG=\"$ICONV\" + echo -en "$twobyte" \ + | timeout -k 4 3 $PROG $c -f $from -t "$to" &>/dev/null + ret=$? +} + +check_hangtest_result () +{ + if [ "$ret" -eq "124" ] || [ "$ret" -eq "137" ]; then # timeout/hang + result="HANG" + else + if [ "$ret" -eq "139" ]; then # segfault + result="SEGFAULT" + else + if [ "$ret" -gt "127" ]; then # unexpected error + result="UNEXPECTED" + else + result="OK" + fi + fi + fi + + echo -n "$result: from: \"$from\", to: \"$to\"," + echo " input \"$twobyte\", flags \"$c\"" + + if [ "$result" != "OK" ]; then + exit 1 + fi +} + +for hangcommand in "${hangarray[@]}"; do + twobyte="$(echo "$hangcommand" | cut -d";" -f 1)" + c="$(echo "$hangcommand" | cut -d";" -f 2)" + from="$(echo "$hangcommand" | cut -d";" -f 3)" + to="$(echo "$hangcommand" | cut -d";" -f 4)" + execute_test + check_hangtest_result +done + +check_errtest_result () +{ + if [ "$ret" -eq "1" ]; then # we errored out as expected + result="PASS" + else + result="FAIL" + fi + echo -n "$result: from: \"$from\", to: \"$to\"," + echo " input \"$twobyte\", flags \"$c\", return code $ret" + + if [ "$result" != "PASS" ]; then + exit 1 + fi +} + +for errorcommand in "${errorarray[@]}"; do + twobyte="$(echo "$errorcommand" | cut -d";" -f 1)" + c="$(echo "$errorcommand" | cut -d";" -f 2)" + from="$(echo "$errorcommand" | cut -d";" -f 3)" + to="$(echo "$errorcommand" | cut -d";" -f 4)" + execute_test + check_errtest_result +done diff --git a/iconvdata/Makefile b/iconvdata/Makefile index c83962f35..31b1cf8a9 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -1,4 +1,5 @@ # Copyright (C) 1997-2020 Free Software Foundation, Inc. +# Copyright (C) The GNU Toolchain Authors. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -73,7 +74,9 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ - bug-iconv10 bug-iconv11 bug-iconv12 + bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 \ + bug-iconv15 \ + tst-iconv-iso-2022-cn-ext ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -316,6 +319,12 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) +$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) +$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \ $(addprefix $(objpfx),$(modules.so)) \ diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c new file mode 100644 index 000000000..87aaff398 --- /dev/null +++ b/iconvdata/bug-iconv13.c @@ -0,0 +1,53 @@ +/* bug 24973: Test EUC-KR module + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined + areas, which are not allowed and should be skipped over due to + //IGNORE. The trailing 0xfe also is an incomplete sequence, which + should be checked first. */ + char input[4] = { '\xc9', '\xa1', '\0', '\xfe' }; + char *inptr = input; + size_t insize = sizeof (input); + char output[4]; + char *outptr = output; + size_t outsize = sizeof (output); + + /* This used to crash due to buffer overrun. */ + TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1); + TEST_VERIFY (errno == EINVAL); + /* The conversion should produce one character, the converted null + character. */ + TEST_VERIFY (sizeof (output) - outsize == 1); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c new file mode 100644 index 000000000..902f140fa --- /dev/null +++ b/iconvdata/bug-iconv14.c @@ -0,0 +1,127 @@ +/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256). + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +/* Use an escape sequence to return to the initial state. */ +static void +with_escape_sequence (void) +{ + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (c != (iconv_t) -1); + + char in[] = "\e$(O+D\e(B"; + char *inbuf = in; + size_t inleft = strlen (in); + char out[3]; /* Space for one output character. */ + char *outbuf; + size_t outleft; + + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); + TEST_COMPARE (errno, E2BIG); + TEST_COMPARE (inleft, 3); + TEST_COMPARE (inbuf - in, strlen (in) - 3); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xc3); + TEST_COMPARE (out[1] & 0xff, 0xa6); + + /* Return to the initial shift state, producing the pending + character. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0); + TEST_COMPARE (inleft, 0); + TEST_COMPARE (inbuf - in, strlen (in)); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + /* Nothing should be flushed the second time. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out)); + TEST_COMPARE (outbuf - out, 0); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + TEST_COMPARE (iconv_close (c), 0); +} + +/* Use an explicit flush to return to the initial state. */ +static void +with_flush (void) +{ + iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (c != (iconv_t) -1); + + char in[] = "\e$(O+D"; + char *inbuf = in; + size_t inleft = strlen (in); + char out[3]; /* Space for one output character. */ + char *outbuf; + size_t outleft; + + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1); + TEST_COMPARE (errno, E2BIG); + TEST_COMPARE (inleft, 0); + TEST_COMPARE (inbuf - in, strlen (in)); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xc3); + TEST_COMPARE (out[1] & 0xff, 0xa6); + + /* Flush the pending character. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out) - 2); + TEST_COMPARE (outbuf - out, 2); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + /* Nothing should be flushed the second time. */ + outbuf = out; + outleft = sizeof (out); + TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0); + TEST_COMPARE (outleft, sizeof (out)); + TEST_COMPARE (outbuf - out, 0); + TEST_COMPARE (out[0] & 0xff, 0xcc); + TEST_COMPARE (out[1] & 0xff, 0x80); + + TEST_COMPARE (iconv_close (c), 0); +} + +static int +do_test (void) +{ + with_escape_sequence (); + with_flush (); + return 0; +} + +#include diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c new file mode 100644 index 000000000..cc04bd031 --- /dev/null +++ b/iconvdata/bug-iconv15.c @@ -0,0 +1,60 @@ +/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv + may emit spurious NUL character on state reset. + Copyright (C) The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + char in[] = "\x1b(I"; + char *inbuf = in; + size_t inleft = sizeof (in) - 1; + char out[1]; + char *outbuf = out; + size_t outleft = sizeof (out); + iconv_t cd; + + cd = iconv_open ("UTF8", "ISO-2022-JP-3"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + /* First call to iconv should alter internal state. + Now, JISX0201_Kana_set is selected and + state value != ASCII_set. */ + TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1); + + /* No bytes should have been added to + the output buffer at this point. */ + TEST_VERIFY (outbuf == out); + TEST_VERIFY (outleft == sizeof (out)); + + /* Second call shall emit spurious NUL character in unpatched glibc. */ + TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1); + + /* No characters are expected to be produced. */ + TEST_VERIFY (outbuf == out); + TEST_VERIFY (outleft == sizeof (out)); + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + return 0; +} + +#include diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c index b0d56cf3e..1045bae92 100644 --- a/iconvdata/euc-kr.c +++ b/iconvdata/euc-kr.c @@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp) \ if (ch <= 0x9f) \ ++inptr; \ - /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \ - user-defined areas. */ \ - else if (__builtin_expect (ch == 0xa0, 0) \ - || __builtin_expect (ch > 0xfe, 0) \ - || __builtin_expect (ch == 0xc9, 0)) \ + else if (__glibc_unlikely (ch == 0xa0)) \ { \ /* This is illegal. */ \ STANDARD_FROM_LOOP_ERR_HANDLER (1); \ diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c index 49e7267ab..521f0825b 100644 --- a/iconvdata/ibm1364.c +++ b/iconvdata/ibm1364.c @@ -158,24 +158,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c index 947b80742..34e1010be 100644 --- a/iconvdata/iso-2022-cn-ext.c +++ b/iconvdata/iso-2022-cn-ext.c @@ -575,6 +575,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); { \ const char *escseq; \ \ + if (outptr + 4 > outend) \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ assert (used == CNS11643_2_set); /* XXX */ \ escseq = "*H"; \ *outptr++ = ESC; \ @@ -588,6 +594,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); { \ const char *escseq; \ \ + if (outptr + 4 > outend) \ + { \ + result = __GCONV_FULL_OUTPUT; \ + break; \ + } \ + \ assert ((used >> 5) >= 3 && (used >> 5) <= 7); \ escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \ *outptr++ = ESC; \ diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c index 8c3b7e627..c7b470db6 100644 --- a/iconvdata/iso-2022-jp-3.c +++ b/iconvdata/iso-2022-jp-3.c @@ -1,5 +1,6 @@ /* Conversion module for ISO-2022-JP-3. Copyright (C) 1998-2020 Free Software Foundation, Inc. + Copyright (C) The GNU Toolchain Authors. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1998, and Bruno Haible , 2002. @@ -67,10 +68,15 @@ enum CURRENT_SEL_MASK = 7 << 3 }; -/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state - also contains the last two bytes to be output, shifted by 6 bits, and a - one-bit indicator whether they must be preceded by the shift sequence, - in bit 22. */ +/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the + state also contains the last two bytes to be output, shifted by 6 + bits, and a one-bit indicator whether they must be preceded by the + shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4 + conversion, COUNT may also contain a non-zero pending wide + character, shifted by six bits. This happens for certain inputs in + JISX0213_1_2004_set and JISX0213_2_set if the second wide character + in a combining sequence cannot be written because the buffer is + full. */ /* Since this is a stateful encoding we have to provide code which resets the output state to the initial state. This has to be done during the @@ -80,10 +86,27 @@ enum { \ if (FROM_DIRECTION) \ { \ - /* It's easy, we don't have to emit anything, we just reset the \ - state for the input. */ \ - data->__statep->__count &= 7; \ - data->__statep->__count |= ASCII_set; \ + uint32_t ch = data->__statep->__count >> 6; \ + \ + if (__glibc_unlikely (ch != 0)) \ + { \ + if (__glibc_likely (outbuf + 4 <= outend)) \ + { \ + /* Write out the last character. */ \ + put32u (outbuf, ch); \ + outbuf += 4; \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ + else \ + /* We don't have enough room in the output buffer. */ \ + status = __GCONV_FULL_OUTPUT; \ + } \ + else \ + { \ + data->__statep->__count &= 7; \ + data->__statep->__count |= ASCII_set; \ + } \ } \ else \ { \ @@ -151,7 +174,21 @@ enum #define LOOPFCT FROM_LOOP #define BODY \ { \ - uint32_t ch = *inptr; \ + uint32_t ch; \ + \ + /* Output any pending character. */ \ + ch = set >> 6; \ + if (__glibc_unlikely (ch != 0)) \ + { \ + put32 (outptr, ch); \ + outptr += 4; \ + /* Remove the pending character, but preserve state bits. */ \ + set &= (1 << 6) - 1; \ + continue; \ + } \ + \ + /* Otherwise read the next input byte. */ \ + ch = *inptr; \ \ /* Recognize escape sequences. */ \ if (__glibc_unlikely (ch == ESC)) \ @@ -297,21 +334,25 @@ enum uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \ \ + inptr += 2; \ + \ + put32 (outptr, u1); \ + outptr += 4; \ + \ /* See whether we have room for two characters. */ \ - if (outptr + 8 <= outend) \ + if (outptr + 4 <= outend) \ { \ - inptr += 2; \ - put32 (outptr, u1); \ - outptr += 4; \ put32 (outptr, u2); \ outptr += 4; \ continue; \ } \ - else \ - { \ - result = __GCONV_FULL_OUTPUT; \ - break; \ - } \ + \ + /* Otherwise store only the first character now, and \ + put the second one into the queue. */ \ + set |= u2 << 6; \ + /* Tell the caller why we terminate the loop. */ \ + result = __GCONV_FULL_OUTPUT; \ + break; \ } \ \ inptr += 2; \ diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h index d3eb3a4ff..f5cdc7279 100644 --- a/iconvdata/ksc5601.h +++ b/iconvdata/ksc5601.h @@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) unsigned char ch2; int idx; + if (avail < 2) + return 0; + /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */ if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e || (ch - offset) == 0x49) return __UNKNOWN_10646_CHAR; - if (avail < 2) - return 0; - ch2 = (*s)[1]; if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) return __UNKNOWN_10646_CHAR; diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c new file mode 100644 index 000000000..96a8765fd --- /dev/null +++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c @@ -0,0 +1,128 @@ +/* Verify ISO-2022-CN-EXT does not write out of the bounds. + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/* The test sets up a two memory page buffer with the second page marked + PROT_NONE to trigger a fault if the conversion writes beyond the exact + expected amount. Then we carry out various conversions and precisely + place the start of the output buffer in order to trigger a SIGSEGV if the + process writes anywhere between 1 and page sized bytes more (only one + PROT_NONE page is setup as a canary) than expected. These tests exercise + all three of the cases in ISO-2022-CN-EXT where the converter must switch + character sets and may run out of buffer space while doing the + operation. */ + +static int +do_test (void) +{ + iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8"); + TEST_VERIFY_EXIT (cd != (iconv_t) -1); + + char *ntf; + size_t ntfsize; + char *outbufbase; + { + int pgz = getpagesize (); + TEST_VERIFY_EXIT (pgz > 0); + ntfsize = 2 * pgz; + + ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE + | MAP_ANONYMOUS, -1); + xmprotect (ntf + pgz, pgz, PROT_NONE); + + outbufbase = ntf + pgz; + } + + /* Check if SOdesignation escape sequence does not trigger an OOB write. */ + { + char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2"; + + for (int i = 0; i < 9; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + /* Same as before for SS2designation. */ + { + char inbuf[] = "ã´½ \xe3\xb4\xbd"; + + for (int i = 0; i < 14; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + /* Same as before for SS3designation. */ + { + char inbuf[] = "劄 \xe5\x8a\x84"; + + for (int i = 0; i < 14; i++) + { + char *inp = inbuf; + size_t inleft = sizeof (inbuf) - 1; + + char *outp = outbufbase - i; + size_t outleft = i; + + TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) + == (size_t) -1); + TEST_COMPARE (errno, E2BIG); + + TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); + } + } + + TEST_VERIFY_EXIT (iconv_close (cd) != -1); + + xmunmap (ntf, ntfsize); + + return 0; +} + +#include diff --git a/include/libc-symbols.h b/include/libc-symbols.h index 685e20fdc..68fc79805 100644 --- a/include/libc-symbols.h +++ b/include/libc-symbols.h @@ -59,6 +59,19 @@ # define IN_MODULE (-1) #endif +/* Use symbol_version_reference to specify the version a symbol + reference should link to. Use symbol_version or + default_symbol_version for the definition of a versioned symbol. + The difference is that the latter is a no-op in non-shared + builds. */ +#ifdef __ASSEMBLER__ +# define symbol_version_reference(real, name, version) \ + .symver real, name##@##version +#else /* !__ASSEMBLER__ */ +# define symbol_version_reference(real, name, version) \ + __asm__ (".symver " #real "," #name "@" #version) +#endif + #ifndef _ISOMAC /* This is defined for the compilation of all C library code. features.h @@ -396,19 +409,6 @@ for linking") past the last element in SET. */ #define symbol_set_end_p(set, ptr) ((ptr) >= (void *const *) &__stop_##set) -/* Use symbol_version_reference to specify the version a symbol - reference should link to. Use symbol_version or - default_symbol_version for the definition of a versioned symbol. - The difference is that the latter is a no-op in non-shared - builds. */ -#ifdef __ASSEMBLER__ -# define symbol_version_reference(real, name, version) \ - .symver real, name##@##version -#else /* !__ASSEMBLER__ */ -# define symbol_version_reference(real, name, version) \ - __asm__ (".symver " #real "," #name "@" #version) -#endif - #ifdef SHARED # define symbol_version(real, name, version) \ symbol_version_reference(real, name, version) diff --git a/include/sys/prctl.h b/include/sys/prctl.h index 0920ed642..d33f3a290 100644 --- a/include/sys/prctl.h +++ b/include/sys/prctl.h @@ -4,6 +4,7 @@ # ifndef _ISOMAC extern int __prctl (int __option, ...); +libc_hidden_proto (__prctl) # endif /* !_ISOMAC */ #endif diff --git a/include/sys/un.h b/include/sys/un.h index bdbee9998..152afd9fc 100644 --- a/include/sys/un.h +++ b/include/sys/un.h @@ -1 +1,13 @@ #include + +#ifndef _ISOMAC + +/* Set ADDR->sun_family to AF_UNIX and ADDR->sun_path to PATHNAME. + Return 0 on success or -1 on failure (due to overlong PATHNAME). + The caller should always use sizeof (struct sockaddr_un) as the + socket address length, disregaring the length of PATHNAME. + Only concrete (non-abstract) pathnames are supported. */ +int __sockaddr_un_set (struct sockaddr_un *addr, const char *pathname) + attribute_hidden; + +#endif /* _ISOMAC */ diff --git a/intl/dcigettext.c b/intl/dcigettext.c index 465c8df34..bd332e71d 100644 --- a/intl/dcigettext.c +++ b/intl/dcigettext.c @@ -1119,11 +1119,19 @@ _nl_find_msg (struct loaded_l10nfile *domain_file, outcharset = encoding; # ifdef _LIBC + + struct gconv_spec conv_spec; + + __gconv_create_spec (&conv_spec, charset, outcharset); + /* We always want to use transliteration. */ - outcharset = norm_add_slashes (outcharset, "TRANSLIT"); - charset = norm_add_slashes (charset, ""); - int r = __gconv_open (outcharset, charset, &convd->conv, - GCONV_AVOID_NOCONV); + conv_spec.translit = true; + + int r = __gconv_open (&conv_spec, &convd->conv, + GCONV_AVOID_NOCONV); + + __gconv_destroy_spec (&conv_spec); + if (__builtin_expect (r != __GCONV_OK, 0)) { /* If the output encoding is the same there is diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c index fd70432ec..e9f6e5e09 100644 --- a/intl/tst-codeset.c +++ b/intl/tst-codeset.c @@ -22,13 +22,11 @@ #include #include #include +#include static int do_test (void) { - char *s; - int result = 0; - unsetenv ("LANGUAGE"); unsetenv ("OUTPUT_CHARSET"); setlocale (LC_ALL, "de_DE.ISO-8859-1"); @@ -36,25 +34,21 @@ do_test (void) bindtextdomain ("codeset", OBJPFX "domaindir"); /* Here we expect output in ISO-8859-1. */ - s = gettext ("cheese"); - if (strcmp (s, "K\344se")) - { - printf ("call 1 returned: %s\n", s); - result = 1; - } + TEST_COMPARE_STRING (gettext ("cheese"), "K\344se"); + /* Here we expect output in UTF-8. */ bind_textdomain_codeset ("codeset", "UTF-8"); + TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se"); - /* Here we expect output in UTF-8. */ - s = gettext ("cheese"); - if (strcmp (s, "K\303\244se")) - { - printf ("call 2 returned: %s\n", s); - result = 1; - } - - return result; + /* `a with umlaut' is transliterated to `ae'. */ + bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + /* Transliteration also works by default even if not set. */ + bind_textdomain_codeset ("codeset", "ASCII"); + TEST_COMPARE_STRING (gettext ("cheese"), "Kaese"); + + return 0; } -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include diff --git a/localedata/locales/oc_FR b/localedata/locales/oc_FR index d0d89a149..ba9377ed4 100644 --- a/localedata/locales/oc_FR +++ b/localedata/locales/oc_FR @@ -92,7 +92,7 @@ day "dimenge";/ "diluns";/ "dimars";/ "dimcres";/ - "dijus";/ + "dijus";/ "divendres";/ "dissabte" abmon "gen.";/ @@ -110,7 +110,7 @@ abmon "gen.";/ alt_mon "genir";/ "febrir";/ "mar";/ - "abrial";/ + "abril";/ "mai";/ "junh";/ "julhet";/ @@ -122,7 +122,7 @@ alt_mon "genir";/ mon "de genir";/ "de febrir";/ "de mar";/ - "dabrial";/ + "dabril";/ "de mai";/ "de junh";/ "de julhet";/ diff --git a/malloc/Makefile b/malloc/Makefile index 984045b5b..8edeb3d62 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -61,6 +61,16 @@ endif tests += $(tests-static) test-srcs = tst-mtrace +# These tests either are run with MALLOC_CHECK_=3 by default or do not work +# with MALLOC_CHECK_=3 because they expect a specific failure. +tests-exclude-mcheck = tst-mcheck tst-malloc-usable \ + tst-interpose-nothread tst-interpose-static-nothread \ + tst-interpose-static-thread tst-malloc-too-large \ + tst-mxfast tst-safe-linking + +# Run all tests with MALLOC_CHECK_=3 +tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests)) + routines = malloc morecore mcheck mtrace obstack reallocarray \ scratch_buffer_grow scratch_buffer_grow_preserve \ scratch_buffer_set_array_size \ @@ -99,6 +109,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library) $(objpfx)tst-malloc-thread-fail: $(shared-thread-library) $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library) $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library) +$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library) # Export the __malloc_initialize_hook variable to libc.so. LDFLAGS-tst-mallocstate = -rdynamic @@ -238,6 +253,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o $(objpfx)tst-interpose-thread: \ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) +$(objpfx)tst-interpose-thread-mcheck: \ + $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library) $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o $(objpfx)tst-interpose-static-thread: \ $(objpfx)tst-interpose-aux-thread.o $(static-thread-library) @@ -255,3 +272,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library) $(objpfx)tst-malloc_info: $(shared-thread-library) $(objpfx)tst-mallocfork2: $(shared-thread-library) +$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library) +$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library) +$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library) diff --git a/malloc/tst-mallocfork2.c b/malloc/tst-mallocfork2.c index 0602a9489..fc1fd64b2 100644 --- a/malloc/tst-mallocfork2.c +++ b/malloc/tst-mallocfork2.c @@ -62,6 +62,9 @@ static volatile sig_atomic_t sigusr1_received; progress. Checked by liveness_signal_handler. */ static volatile sig_atomic_t progress_indicator = 1; +/* Set to 1 if an error occurs in the signal handler. */ +static volatile sig_atomic_t error_indicator = 0; + static void sigusr1_handler (int signo) { @@ -72,7 +75,8 @@ sigusr1_handler (int signo) if (pid == -1) { write_message ("error: fork\n"); - abort (); + error_indicator = 1; + return; } if (pid == 0) _exit (0); @@ -81,12 +85,14 @@ sigusr1_handler (int signo) if (ret < 0) { write_message ("error: waitpid\n"); - abort (); + error_indicator = 1; + return; } if (status != 0) { write_message ("error: unexpected exit status from subprocess\n"); - abort (); + error_indicator = 1; + return; } } @@ -122,9 +128,25 @@ signal_sender (int signo, bool sleep) } } +/* Children processes. */ +static pid_t sigusr1_sender_pids[5] = { 0 }; +static pid_t sigusr2_sender_pid = 0; + +static void +kill_children (void) +{ + for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) + if (sigusr1_sender_pids[i] > 0) + kill (sigusr1_sender_pids[i], SIGKILL); + if (sigusr2_sender_pid > 0) + kill (sigusr2_sender_pid, SIGKILL); +} + static int do_test (void) { + atexit (kill_children); + /* shared->barrier is intialized along with sigusr1_sender_pids below. */ shared = support_shared_allocate (sizeof (*shared)); @@ -148,14 +170,13 @@ do_test (void) return 1; } - pid_t sigusr2_sender_pid = xfork (); + sigusr2_sender_pid = xfork (); if (sigusr2_sender_pid == 0) signal_sender (SIGUSR2, true); /* Send SIGUSR1 signals from several processes. Hopefully, one signal will hit one of the ciritical functions. Use a barrier to avoid sending signals while not running fork/free/malloc. */ - pid_t sigusr1_sender_pids[5]; { pthread_barrierattr_t attr; xpthread_barrierattr_init (&attr); @@ -166,7 +187,7 @@ do_test (void) } for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) { - sigusr1_sender_pids[i] = fork (); + sigusr1_sender_pids[i] = xfork (); if (sigusr1_sender_pids[i] == 0) signal_sender (SIGUSR1, false); } @@ -211,7 +232,7 @@ do_test (void) ++malloc_signals; xpthread_barrier_wait (&shared->barrier); - if (objects[slot] == NULL) + if (objects[slot] == NULL || error_indicator != 0) { printf ("error: malloc: %m\n"); for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) @@ -225,10 +246,6 @@ do_test (void) for (int slot = 0; slot < malloc_objects; ++slot) free (objects[slot]); - for (size_t i = 0; i < array_length (sigusr1_sender_pids); ++i) - kill (sigusr1_sender_pids[i], SIGKILL); - kill (sigusr2_sender_pid, SIGKILL); - printf ("info: signals received during fork: %u\n", fork_signals); printf ("info: signals received during free: %u\n", free_signals); printf ("info: signals received during malloc: %u\n", malloc_signals); diff --git a/math/Makefile b/math/Makefile index 5985b6744..3496af404 100644 --- a/math/Makefile +++ b/math/Makefile @@ -644,6 +644,128 @@ ifneq ($(long-double-fcts),yes) # We won't compile the `long double' code at all. Tell the `double' code # to define aliases for the `FUNCl' names. math-CPPFLAGS += -DNO_LONG_DOUBLE +# GCC 10 diagnoses aliases with types conflicting with built-in +# functions. +CFLAGS-w_acos.c += -fno-builtin-acosl +CFLAGS-w_acosh.c += -fno-builtin-acoshl +CFLAGS-w_asin.c += -fno-builtin-asinl +CFLAGS-s_asinh.c += -fno-builtin-asinhl +CFLAGS-s_atan.c += -fno-builtin-atanl +CFLAGS-w_atan2.c += -fno-builtin-atan2l +CFLAGS-w_atanh.c += -fno-builtin-atanhl +CFLAGS-s_cabs.c += -fno-builtin-cabsl +CFLAGS-s_cacos.c += -fno-builtin-cacosl +CFLAGS-s_cacosh.c += -fno-builtin-cacoshl +CFLAGS-s_canonicalize.c += -fno-builtin-canonicalizel +CFLAGS-s_carg.c += -fno-builtin-cargl +CFLAGS-s_casin.c += -fno-builtin-casinl +CFLAGS-s_casinh.c += -fno-builtin-casinhl +CFLAGS-s_catan.c += -fno-builtin-catanl +CFLAGS-s_catanh.c += -fno-builtin-catanhl +CFLAGS-s_cbrt.c += -fno-builtin-cbrtl +CFLAGS-s_ccos.c += -fno-builtin-ccosl +CFLAGS-s_ccosh.c += -fno-builtin-ccoshl +CFLAGS-s_ceil.c += -fno-builtin-ceill +CFLAGS-s_cexp.c += -fno-builtin-cexpl +CFLAGS-s_cimag.c += -fno-builtin-cimagl +CFLAGS-s_clog.c += -fno-builtin-clogl +CFLAGS-s_clog10.c += -fno-builtin-clog10l +CFLAGS-s_conj.c += -fno-builtin-conjl +CFLAGS-s_copysign.c += -fno-builtin-copysignl +CFLAGS-s_cos.c += -fno-builtin-cosl +CFLAGS-w_cosh.c += -fno-builtin-coshl +CFLAGS-s_cpow.c += -fno-builtin-cpowl +CFLAGS-s_cproj.c += -fno-builtin-cprojl +CFLAGS-s_creal.c += -fno-builtin-creall +CFLAGS-s_csin.c += -fno-builtin-csinl +CFLAGS-s_csinh.c += -fno-builtin-csinhl +CFLAGS-s_csqrt.c += -fno-builtin-csqrtl +CFLAGS-s_ctan.c += -fno-builtin-ctanl +CFLAGS-s_ctanh.c += -fno-builtin-ctanhl +CFLAGS-s_dadd.c += -fno-builtin-daddl +CFLAGS-s_ddiv.c += -fno-builtin-ddivl +CFLAGS-s_dmul.c += -fno-builtin-dmull +CFLAGS-s_dsub.c += -fno-builtin-dsubl +CFLAGS-s_erf.c += -fno-builtin-erfl +CFLAGS-s_erfc.c += -fno-builtin-erfcl +CFLAGS-e_exp.c += -fno-builtin-expl +CFLAGS-w_exp10.c += -fno-builtin-exp10l +CFLAGS-e_exp2.c += -fno-builtin-exp2l +CFLAGS-s_expm1.c += -fno-builtin-expm1l +CFLAGS-s_fabs.c += -fno-builtin-fabsl +CFLAGS-s_fadd.c += -fno-builtin-faddl +CFLAGS-s_fdim.c += -fno-builtin-fdiml +CFLAGS-s_fdiv.c += -fno-builtin-fdivl +CFLAGS-s_finite.c += -fno-builtin-finitel +CFLAGS-s_floor.c += -fno-builtin-floorl +CFLAGS-s_fma.c += -fno-builtin-fmal +CFLAGS-s_fmax.c += -fno-builtin-fmaxl +CFLAGS-s_fmaxmag.c += -fno-builtin-fmaxmagl +CFLAGS-s_fmin.c += -fno-builtin-fminl +CFLAGS-s_fminmag.c += -fno-builtin-fminmagl +CFLAGS-w_fmod.c += -fno-builtin-fmodl +CFLAGS-s_fmul.c += -fno-builtin-fmull +CFLAGS-s_frexp.c += -fno-builtin-frexpl +CFLAGS-s_fromfp.c += -fno-builtin-fromfpl +CFLAGS-s_fromfpx.c += -fno-builtin-fromfpxl +CFLAGS-s_fsub.c += -fno-builtin-fsubl +CFLAGS-s_gamma.c += -fno-builtin-gammal +CFLAGS-s_getpayload.c += -fno-builtin-getpayloadl +CFLAGS-w_hypot.c += -fno-builtin-hypotl +CFLAGS-w_ilogb.c += -fno-builtin-ilogbl +CFLAGS-s_isinf.c += -fno-builtin-isinfl +CFLAGS-s_isnan.c += -fno-builtin-isnanl +CFLAGS-w_j0.c += -fno-builtin-j0l +CFLAGS-w_j1.c += -fno-builtin-j1l +CFLAGS-w_jn.c += -fno-builtin-jnl +CFLAGS-s_ldexp.c += -fno-builtin-ldexpl +CFLAGS-w_lgamma.c += -fno-builtin-lgammal +CFLAGS-w_lgamma_r.c += -fno-builtin-lgammal_r +CFLAGS-w_llogb.c += -fno-builtin-llogbl +CFLAGS-s_llrint.c += -fno-builtin-llrintl +CFLAGS-s_llround.c += -fno-builtin-llroundl +CFLAGS-e_log.c += -fno-builtin-logl +CFLAGS-w_log10.c += -fno-builtin-log10l +CFLAGS-w_log1p.c += -fno-builtin-log1pl +CFLAGS-e_log2.c += -fno-builtin-log2l +CFLAGS-s_logb.c += -fno-builtin-logbl +CFLAGS-s_lrint.c += -fno-builtin-lrintl +CFLAGS-s_lround.c += -fno-builtin-lroundl +CFLAGS-s_modf.c += -fno-builtin-modfl +CFLAGS-s_nan.c += -fno-builtin-nanl +CFLAGS-s_nearbyint.c += -fno-builtin-nearbyintl +CFLAGS-s_nextafter.c += -fno-builtin-nextafterl +CFLAGS-s_nextdown.c += -fno-builtin-nextdownl +CFLAGS-s_nexttoward.c += -fno-builtin-nexttoward -fno-builtin-nexttowardl +CFLAGS-s_nexttowardf.c += -fno-builtin-nexttowardf +CFLAGS-s_nextup.c += -fno-builtin-nextupl +CFLAGS-e_pow.c += -fno-builtin-powl +CFLAGS-w_remainder.c += -fno-builtin-remainderl -fno-builtin-dreml +CFLAGS-s_remquo.c += -fno-builtin-remquol +CFLAGS-s_rint.c += -fno-builtin-rintl +CFLAGS-s_round.c += -fno-builtin-roundl +CFLAGS-s_roundeven.c += -fno-builtin-roundevenl +CFLAGS-w_scalb.c += -fno-builtin-scalbl +CFLAGS-w_scalbln.c += -fno-builtin-scalblnl +CFLAGS-s_scalbn.c += -fno-builtin-scalbnl +CFLAGS-s_setpayload.c += -fno-builtin-setpayloadl +CFLAGS-s_setpayloadsig.c += -fno-builtin-setpayloadsigl +CFLAGS-s_significand.c += -fno-builtin-significandl +CFLAGS-s_sin.c += -fno-builtin-sinl +CFLAGS-s_sincos.c += -fno-builtin-sincosl +CFLAGS-w_sinh.c += -fno-builtin-sinhl +CFLAGS-w_sqrt.c += -fno-builtin-sqrtl +CFLAGS-s_tan.c += -fno-builtin-tanl +CFLAGS-s_tanh.c += -fno-builtin-tanhl +CFLAGS-w_tgamma.c += -fno-builtin-tgammal +CFLAGS-s_totalorder.c += -fno-builtin-totalorderl +CFLAGS-s_totalordermag.c += -fno-builtin-totalordermagl +CFLAGS-s_trunc.c += -fno-builtin-truncl +CFLAGS-s_ufromfp.c += -fno-builtin-ufromfpl +CFLAGS-s_ufromfpx.c += -fno-builtin-ufromfpxl +CFLAGS-s_y0.c += -fno-builtin-y0l +CFLAGS-s_y1.c += -fno-builtin-y1l +CFLAGS-s_yn.c += -fno-builtin-ynl endif # These files quiet sNaNs in a way that is optimized away without diff --git a/misc/Makefile b/misc/Makefile index e0465980c..e167e199e 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -87,7 +87,7 @@ tests := tst-dirname tst-tsearch tst-fdset tst-mntent tst-hsearch \ tst-preadvwritev tst-preadvwritev64 tst-makedev tst-empty \ tst-preadvwritev2 tst-preadvwritev64v2 tst-warn-wide \ tst-ldbl-warn tst-ldbl-error tst-dbl-efgcvt tst-ldbl-efgcvt \ - tst-mntent-autofs + tst-mntent-autofs tst-syscalls # Tests which need libdl. ifeq (yes,$(build-shared)) diff --git a/misc/tst-syscalls.c b/misc/tst-syscalls.c new file mode 100644 index 000000000..cfcd38232 --- /dev/null +++ b/misc/tst-syscalls.c @@ -0,0 +1,167 @@ +/* Test for syscall interfaces. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* This test verifies that the x32 system call handling zero-extends + unsigned 32-bit arguments to the 64-bit argument registers for + system calls (bug 25810). The bug is specific to x32, but the test + should pass on all architectures. */ + +#include +#include +#include +#include +#include +#include + +/* On x32, this can be passed in a single 64-bit integer register. */ +struct Array +{ + size_t length; + void *ptr; +}; + +static int error_count; + +__attribute__ ((noclone, noinline)) +struct Array +allocate (size_t bytes) +{ + if (!bytes) + return __extension__ (struct Array) {0, 0}; + + void *p = mmap (0x0, bytes, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (p == MAP_FAILED) + return __extension__ (struct Array) {0, 0}; + + return __extension__ (struct Array) {bytes, p}; +} + +__attribute__ ((noclone, noinline)) +void +deallocate (struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + munmap. */ + if (b.length && munmap (b.ptr, b.length)) + { + printf ("munmap error: %m\n"); + error_count++; + } +} + +__attribute__ ((noclone, noinline)) +void * +do_mmap (void *addr, size_t length) +{ + return mmap (addr, length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); +} + +__attribute__ ((noclone, noinline)) +void * +reallocate (struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + do_mmap. */ + if (b.length) + return do_mmap (b.ptr, b.length); + return NULL; +} + +__attribute__ ((noclone, noinline)) +void +protect (struct Array b) +{ + if (b.length) + { + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument + to mprotect. */ + if (mprotect (b.ptr, b.length, + PROT_READ | PROT_WRITE | PROT_EXEC)) + { + printf ("mprotect error: %m\n"); + error_count++; + } + } +} + +__attribute__ ((noclone, noinline)) +ssize_t +do_read (int fd, void *ptr, struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + read. */ + if (b.length) + return read (fd, ptr, b.length); + return 0; +} + +__attribute__ ((noclone, noinline)) +ssize_t +do_write (int fd, void *ptr, struct Array b) +{ + /* On x32, the 64-bit integer register containing `b' may be copied + to another 64-bit integer register to pass the second argument to + write. */ + if (b.length) + return write (fd, ptr, b.length); + return 0; +} + +static int +do_test (void) +{ + struct Array array; + + array = allocate (1); + protect (array); + deallocate (array); + void *p = reallocate (array); + if (p == MAP_FAILED) + { + printf ("mmap error: %m\n"); + error_count++; + } + array.ptr = p; + protect (array); + deallocate (array); + + int fd = xopen ("/dev/null", O_RDWR, 0); + char buf[2]; + array.ptr = buf; + if (do_read (fd, array.ptr, array) == -1) + { + printf ("read error: %m\n"); + error_count++; + } + if (do_write (fd, array.ptr, array) == -1) + { + printf ("write error: %m\n"); + error_count++; + } + xclose (fd); + + return error_count ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/nptl/Makefile b/nptl/Makefile index 584e0ffd9..2df4c9098 100644 --- a/nptl/Makefile +++ b/nptl/Makefile @@ -332,7 +332,7 @@ tests-internal := tst-rwlock19 tst-rwlock20 \ tst-mutexpi8 tst-mutexpi8-static tst-cancel25 xtests = tst-setuid1 tst-setuid1-static tst-setuid2 \ - tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 + tst-mutexpp1 tst-mutexpp6 tst-mutexpp10 tst-setgroups # This test can run into task limits because of a linux kernel bug # and then cause the make process to fail too, see bug 24537. diff --git a/nptl/descr.h b/nptl/descr.h index 9dcf480bd..5f1f35e9a 100644 --- a/nptl/descr.h +++ b/nptl/descr.h @@ -94,7 +94,13 @@ struct pthread_unwind_buf struct xid_command { int syscall_no; - long int id[3]; + /* Enforce zero-extension for the pointer argument in + + int setgroups (size_t size, const gid_t *list); + + The kernel XID arguments are unsigned and do not require sign + extension. */ + unsigned long int id[3]; volatile int cntr; volatile int error; /* -1: no call yet, 0: success seen, >0: error seen. */ }; diff --git a/nptl/tst-setgroups.c b/nptl/tst-setgroups.c new file mode 100644 index 000000000..ae3c1b1ba --- /dev/null +++ b/nptl/tst-setgroups.c @@ -0,0 +1,79 @@ +/* Test setgroups as root and in the presence of threads (Bug 26248) + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The purpose of this test is to test the setgroups API as root and in + the presence of threads. Once we create a thread the setgroups + implementation must ensure that all threads are set to the same + group and this operation should not fail. Lastly we test setgroups + with a zero sized group and a bad address and verify we get EPERM. */ + +static void * +start_routine (void *args) +{ + return NULL; +} + +static int +do_test (void) +{ + int size; + /* NB: Stack address can be at 0xfffXXXXX on 32-bit OSes. */ + gid_t list[NGROUPS_MAX]; + int status = EXIT_SUCCESS; + + pthread_t thread = xpthread_create (NULL, start_routine, NULL); + + size = getgroups (sizeof (list) / sizeof (list[0]), list); + if (size < 0) + { + status = EXIT_FAILURE; + error (0, errno, "getgroups failed"); + } + if (setgroups (size, list) < 0) + { + if (errno == EPERM) + status = EXIT_UNSUPPORTED; + else + { + status = EXIT_FAILURE; + error (0, errno, "setgroups failed"); + } + } + + if (status == EXIT_SUCCESS && setgroups (0, list) < 0) + { + status = EXIT_FAILURE; + error (0, errno, "setgroups failed"); + } + + xpthread_join (thread); + + return status; +} + +#include diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c index 88c69d1e9..799298ad8 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "../inet/netgroup.h" #include "nscd.h" @@ -66,6 +67,16 @@ struct dataset char strdata[0]; }; +/* Send a notfound response to FD. Always returns -1 to indicate an + ephemeral error. */ +static time_t +send_notfound (int fd) +{ + if (fd != -1) + TEMP_FAILURE_RETRY (send (fd, ¬found, sizeof (notfound), MSG_NOSIGNAL)); + return -1; +} + /* Sends a notfound message and prepares a notfound dataset to write to the cache. Returns true if there was enough memory to allocate the dataset and returns the dataset in DATASETP, total bytes to write in TOTALP and the @@ -84,8 +95,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, total = sizeof (notfound); timeout = time (NULL) + db->negtimeout; - if (fd != -1) - TEMP_FAILURE_RETRY (send (fd, ¬found, total, MSG_NOSIGNAL)); + send_notfound (fd); dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1); /* If we cannot permanently store the result, so be it. */ @@ -110,11 +120,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, return cacheable; } +struct addgetnetgrentX_scratch +{ + /* This is the result that the caller should use. It can be NULL, + point into buffer, or it can be in the cache. */ + struct dataset *dataset; + + struct scratch_buffer buffer; + + /* Used internally in addgetnetgrentX as a staging area. */ + struct scratch_buffer tmp; + + /* Number of bytes in buffer that are actually used. */ + size_t buffer_used; +}; + +static void +addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch) +{ + scratch->dataset = NULL; + scratch_buffer_init (&scratch->buffer); + scratch_buffer_init (&scratch->tmp); + + /* Reserve space for the header. */ + scratch->buffer_used = sizeof (struct dataset); + static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space), + "initial buffer space"); + memset (scratch->tmp.data, 0, sizeof (struct dataset)); +} + +static void +addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch) +{ + scratch_buffer_free (&scratch->buffer); + scratch_buffer_free (&scratch->tmp); +} + +/* Copy LENGTH bytes from S into SCRATCH. Returns NULL if SCRATCH + could not be resized, otherwise a pointer to the copy. */ +static char * +addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch, + const char *s, size_t length) +{ + while (true) + { + size_t remaining = scratch->buffer.length - scratch->buffer_used; + if (remaining >= length) + break; + if (!scratch_buffer_grow_preserve (&scratch->buffer)) + return NULL; + } + char *copy = scratch->buffer.data + scratch->buffer_used; + memcpy (copy, s, length); + scratch->buffer_used += length; + return copy; +} + +/* Copy S into SCRATCH, including its null terminator. Returns false + if SCRATCH could not be resized. */ +static bool +addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s) +{ + if (s == NULL) + s = ""; + return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL; +} + +/* Caller must initialize and free *SCRATCH. If the return value is + negative, this function has sent a notfound response. */ static time_t addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, const char *key, uid_t uid, struct hashentry *he, - struct datahead *dh, struct dataset **resultp, - void **tofreep) + struct datahead *dh, struct addgetnetgrentX_scratch *scratch) { if (__glibc_unlikely (debug_level > 0)) { @@ -133,14 +210,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, char *key_copy = NULL; struct __netgrent data; - size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len); - size_t buffilled = sizeof (*dataset); - char *buffer = NULL; size_t nentries = 0; size_t group_len = strlen (key) + 1; struct name_list *first_needed = alloca (sizeof (struct name_list) + group_len); - *tofreep = NULL; if (netgroup_database == NULL && __nss_database_lookup2 ("netgroup", NULL, NULL, &netgroup_database)) @@ -148,12 +221,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, /* No such service. */ cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, &key_copy); - goto writeout; + goto maybe_cache_add; } memset (&data, '\0', sizeof (data)); - buffer = xmalloc (buflen); - *tofreep = buffer; first_needed->next = first_needed; memcpy (first_needed->name, key, group_len); data.needed_groups = first_needed; @@ -196,8 +267,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, while (1) { int e; - status = getfct.f (&data, buffer + buffilled, - buflen - buffilled - req->key_len, &e); + status = getfct.f (&data, scratch->tmp.data, + scratch->tmp.length, &e); if (status == NSS_STATUS_SUCCESS) { if (data.type == triple_val) @@ -205,68 +276,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, const char *nhost = data.val.triple.host; const char *nuser = data.val.triple.user; const char *ndomain = data.val.triple.domain; - - size_t hostlen = strlen (nhost ?: "") + 1; - size_t userlen = strlen (nuser ?: "") + 1; - size_t domainlen = strlen (ndomain ?: "") + 1; - - if (nhost == NULL || nuser == NULL || ndomain == NULL - || nhost > nuser || nuser > ndomain) - { - const char *last = nhost; - if (last == NULL - || (nuser != NULL && nuser > last)) - last = nuser; - if (last == NULL - || (ndomain != NULL && ndomain > last)) - last = ndomain; - - size_t bufused - = (last == NULL - ? buffilled - : last + strlen (last) + 1 - buffer); - - /* We have to make temporary copies. */ - size_t needed = hostlen + userlen + domainlen; - - if (buflen - req->key_len - bufused < needed) - { - buflen += MAX (buflen, 2 * needed); - /* Save offset in the old buffer. We don't - bother with the NULL check here since - we'll do that later anyway. */ - size_t nhostdiff = nhost - buffer; - size_t nuserdiff = nuser - buffer; - size_t ndomaindiff = ndomain - buffer; - - char *newbuf = xrealloc (buffer, buflen); - /* Fix up the triplet pointers into the new - buffer. */ - nhost = (nhost ? newbuf + nhostdiff - : NULL); - nuser = (nuser ? newbuf + nuserdiff - : NULL); - ndomain = (ndomain ? newbuf + ndomaindiff - : NULL); - buffer = newbuf; - } - - nhost = memcpy (buffer + bufused, - nhost ?: "", hostlen); - nuser = memcpy ((char *) nhost + hostlen, - nuser ?: "", userlen); - ndomain = memcpy ((char *) nuser + userlen, - ndomain ?: "", domainlen); - } - - char *wp = buffer + buffilled; - wp = memmove (wp, nhost ?: "", hostlen); - wp += hostlen; - wp = memmove (wp, nuser ?: "", userlen); - wp += userlen; - wp = memmove (wp, ndomain ?: "", domainlen); - wp += domainlen; - buffilled = wp - buffer; + if (!(addgetnetgrentX_append (scratch, nhost) + && addgetnetgrentX_append (scratch, nuser) + && addgetnetgrentX_append (scratch, ndomain))) + return send_notfound (fd); ++nentries; } else @@ -318,8 +331,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) { - buflen *= 2; - buffer = xrealloc (buffer, buflen); + if (!scratch_buffer_grow (&scratch->tmp)) + return send_notfound (fd); } else if (status == NSS_STATUS_RETURN || status == NSS_STATUS_NOTFOUND @@ -349,13 +362,20 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, { cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, &key_copy); - goto writeout; + goto maybe_cache_add; } - total = buffilled; + /* Capture the result size without the key appended. */ + total = scratch->buffer_used; + + /* Make a copy of the key. The scratch buffer must not move after + this point. */ + key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len); + if (key_copy == NULL) + return send_notfound (fd); /* Fill in the dataset. */ - dataset = (struct dataset *) buffer; + dataset = scratch->buffer.data; timeout = datahead_init_pos (&dataset->head, total + req->key_len, total - offsetof (struct dataset, resp), he == NULL ? 0 : dh->nreloads + 1, @@ -364,11 +384,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, dataset->resp.version = NSCD_VERSION; dataset->resp.found = 1; dataset->resp.nresults = nentries; - dataset->resp.result_len = buffilled - sizeof (*dataset); - - assert (buflen - buffilled >= req->key_len); - key_copy = memcpy (buffer + buffilled, key, req->key_len); - buffilled += req->key_len; + dataset->resp.result_len = total - sizeof (*dataset); /* Now we can determine whether on refill we have to create a new record or not. */ @@ -399,7 +415,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, if (__glibc_likely (newp != NULL)) { /* Adjust pointer into the memory block. */ - key_copy = (char *) newp + (key_copy - buffer); + key_copy = (char *) newp + (key_copy - (char *) dataset); dataset = memcpy (newp, dataset, total + req->key_len); cacheable = true; @@ -411,14 +427,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } if (he == NULL && fd != -1) - { - /* We write the dataset before inserting it to the database - since while inserting this thread might block and so would - unnecessarily let the receiver wait. */ - writeout: + /* We write the dataset before inserting it to the database since + while inserting this thread might block and so would + unnecessarily let the receiver wait. */ writeall (fd, &dataset->resp, dataset->head.recsize); - } + maybe_cache_add: if (cacheable) { /* If necessary, we also propagate the data to disk. */ @@ -442,7 +456,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, } out: - *resultp = dataset; + scratch->dataset = dataset; return timeout; } @@ -463,6 +477,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, if (user != NULL) key = (char *) rawmemchr (key, '\0') + 1; const char *domain = *key++ ? key : NULL; + struct addgetnetgrentX_scratch scratch; + + addgetnetgrentX_scratch_init (&scratch); if (__glibc_unlikely (debug_level > 0)) { @@ -478,12 +495,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, group, group_len, db, uid); time_t timeout; - void *tofree; if (result != NULL) - { - timeout = result->head.timeout; - tofree = NULL; - } + timeout = result->head.timeout; else { request_header req_get = @@ -492,7 +505,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, .key_len = group_len }; timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL, - &result, &tofree); + &scratch); + result = scratch.dataset; + if (timeout < 0) + goto out; } struct indataset @@ -503,24 +519,26 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, = (struct indataset *) mempool_alloc (db, sizeof (*dataset) + req->key_len, 1); - struct indataset dataset_mem; bool cacheable = true; if (__glibc_unlikely (dataset == NULL)) { cacheable = false; - dataset = &dataset_mem; + /* The alloca is safe because nscd_run_worker verfies that + key_len is not larger than MAXKEYLEN. */ + dataset = alloca (sizeof (*dataset) + req->key_len); } datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len, sizeof (innetgroup_response_header), - he == NULL ? 0 : dh->nreloads + 1, result->head.ttl); + he == NULL ? 0 : dh->nreloads + 1, + result == NULL ? db->negtimeout : result->head.ttl); /* Set the notfound status and timeout based on the result from getnetgrent. */ - dataset->head.notfound = result->head.notfound; + dataset->head.notfound = result == NULL || result->head.notfound; dataset->head.timeout = timeout; dataset->resp.version = NSCD_VERSION; - dataset->resp.found = result->resp.found; + dataset->resp.found = result != NULL && result->resp.found; /* Until we find a matching entry the result is 0. */ dataset->resp.result = 0; @@ -568,7 +586,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, goto out; } - if (he == NULL) + /* addgetnetgrentX may have already sent a notfound response. Do + not send another one. */ + if (he == NULL && dataset->resp.found) { /* We write the dataset before inserting it to the database since while inserting this thread might block and so would @@ -602,7 +622,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, } out: - free (tofree); + addgetnetgrentX_scratch_free (&scratch); return timeout; } @@ -612,11 +632,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req, const char *key, uid_t uid, struct hashentry *he, struct datahead *dh) { - struct dataset *ignore; - void *tofree; - time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, - &ignore, &tofree); - free (tofree); + struct addgetnetgrentX_scratch scratch; + addgetnetgrentX_scratch_init (&scratch); + time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch); + addgetnetgrentX_scratch_free (&scratch); + if (timeout < 0) + timeout = 0; return timeout; } @@ -660,5 +681,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he, .key_len = he->len }; - return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh); + time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, + he, dh); + if (timeout < 0) + timeout = 0; + return timeout; } diff --git a/nscd/selinux.c b/nscd/selinux.c index a4ea8008e..1ebf92482 100644 --- a/nscd/selinux.c +++ b/nscd/selinux.c @@ -33,6 +33,7 @@ #ifdef HAVE_LIBAUDIT # include #endif +#include #include "dbg_log.h" #include "selinux.h" @@ -320,6 +321,12 @@ avc_free_lock (void *lock) } +/* avc_init (along with several other symbols) was marked as deprecated by the + SELinux API starting from version 3.1. We use it here, but should + eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); + /* Initialize the user space access vector cache (AVC) for NSCD along with log/thread/lock callbacks. */ void @@ -335,7 +342,14 @@ nscd_avc_init (void) audit_init (); #endif } +DIAG_POP_NEEDS_COMMENT + +/* security_context_t and sidput (along with several other symbols) were marked + as deprecated by the SELinux API starting from version 3.1. We use them + here, but should eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); /* Check the permission from the caller (via getpeercon) to nscd. Returns 0 if access is allowed, 1 if denied, and -1 on error. @@ -422,6 +436,7 @@ out: return rc; } +DIAG_POP_NEEDS_COMMENT /* Wrapper to get AVC statistics. */ diff --git a/nss/makedb.c b/nss/makedb.c index 8e389a168..8e1e8ec9a 100644 --- a/nss/makedb.c +++ b/nss/makedb.c @@ -38,6 +38,7 @@ #include #include #include "nss_db/nss_db.h" +#include /* Get libc version number. */ #include "../version.h" @@ -841,6 +842,13 @@ print_database (int fd) #ifdef HAVE_SELINUX + +/* security_context_t and matchpathcon (along with several other symbols) were + marked as deprecated by the SELinux API starting from version 3.1. We use + them here, but should eventually switch to the newer API. */ +DIAG_PUSH_NEEDS_COMMENT +DIAG_IGNORE_NEEDS_COMMENT (10, "-Wdeprecated-declarations"); + static void set_file_creation_context (const char *outname, mode_t mode) { @@ -870,6 +878,7 @@ set_file_creation_context (const char *outname, mode_t mode) freecon (ctx); } } +DIAG_POP_NEEDS_COMMENT static void reset_file_creation_context (void) diff --git a/nss/nss_compat/compat-grp.c b/nss/nss_compat/compat-grp.c index a8de1e03b..f8b19a5cf 100644 --- a/nss/nss_compat/compat-grp.c +++ b/nss/nss_compat/compat-grp.c @@ -142,7 +142,7 @@ _nss_compat_setgrent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endgrent (ent_t *ent) { if (ent->stream != NULL) @@ -163,6 +163,15 @@ internal_endgrent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endgrent, but preserve errno in all cases. */ +static void +internal_endgrent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endgrent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endgrent (void) { @@ -483,7 +492,7 @@ _nss_compat_getgrnam_r (const char *name, struct group *grp, if (result == NSS_STATUS_SUCCESS) result = internal_getgrnam_r (name, grp, &ent, buffer, buflen, errnop); - internal_endgrent (&ent); + internal_endgrent_noerror (&ent); return result; } @@ -612,7 +621,7 @@ _nss_compat_getgrgid_r (gid_t gid, struct group *grp, if (result == NSS_STATUS_SUCCESS) result = internal_getgrgid_r (gid, grp, &ent, buffer, buflen, errnop); - internal_endgrent (&ent); + internal_endgrent_noerror (&ent); return result; } diff --git a/nss/nss_compat/compat-initgroups.c b/nss/nss_compat/compat-initgroups.c index 939b25b33..7591442ff 100644 --- a/nss/nss_compat/compat-initgroups.c +++ b/nss/nss_compat/compat-initgroups.c @@ -133,7 +133,7 @@ internal_setgrent (ent_t *ent) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endgrent (ent_t *ent) { if (ent->stream != NULL) @@ -157,6 +157,15 @@ internal_endgrent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endgrent, but preserve errno in all cases. */ +static void +internal_endgrent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endgrent (ent); + __set_errno (saved_errno); +} + /* Add new group record. */ static void add_group (long int *start, long int *size, gid_t **groupsp, long int limit, @@ -501,7 +510,7 @@ _nss_compat_initgroups_dyn (const char *user, gid_t group, long int *start, done: scratch_buffer_free (&tmpbuf); - internal_endgrent (&intern); + internal_endgrent_noerror (&intern); return status; } diff --git a/nss/nss_compat/compat-pwd.c b/nss/nss_compat/compat-pwd.c index ec3f35c59..bd5e707da 100644 --- a/nss/nss_compat/compat-pwd.c +++ b/nss/nss_compat/compat-pwd.c @@ -259,7 +259,7 @@ _nss_compat_setpwent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endpwent (ent_t *ent) { if (ent->stream != NULL) @@ -287,6 +287,15 @@ internal_endpwent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endpwent, but preserve errno in all cases. */ +static void +internal_endpwent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endpwent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endpwent (void) { @@ -822,7 +831,7 @@ _nss_compat_getpwnam_r (const char *name, struct passwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getpwnam_r (name, pwd, &ent, buffer, buflen, errnop); - internal_endpwent (&ent); + internal_endpwent_noerror (&ent); return result; } @@ -1061,7 +1070,7 @@ _nss_compat_getpwuid_r (uid_t uid, struct passwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getpwuid_r (uid, pwd, &ent, buffer, buflen, errnop); - internal_endpwent (&ent); + internal_endpwent_noerror (&ent); return result; } diff --git a/nss/nss_compat/compat-spwd.c b/nss/nss_compat/compat-spwd.c index f6b7a1ef1..d0e3c51b4 100644 --- a/nss/nss_compat/compat-spwd.c +++ b/nss/nss_compat/compat-spwd.c @@ -215,7 +215,7 @@ _nss_compat_setspent (int stayopen) } -static enum nss_status +static enum nss_status __attribute_warn_unused_result__ internal_endspent (ent_t *ent) { if (ent->stream != NULL) @@ -244,6 +244,15 @@ internal_endspent (ent_t *ent) return NSS_STATUS_SUCCESS; } +/* Like internal_endspent, but preserve errno in all cases. */ +static void +internal_endspent_noerror (ent_t *ent) +{ + int saved_errno = errno; + enum nss_status unused __attribute__ ((unused)) = internal_endspent (ent); + __set_errno (saved_errno); +} + enum nss_status _nss_compat_endspent (void) { @@ -261,7 +270,6 @@ _nss_compat_endspent (void) return result; } - static enum nss_status getspent_next_nss_netgr (const char *name, struct spwd *result, ent_t *ent, char *group, char *buffer, size_t buflen, @@ -786,7 +794,7 @@ _nss_compat_getspnam_r (const char *name, struct spwd *pwd, if (result == NSS_STATUS_SUCCESS) result = internal_getspnam_r (name, pwd, &ent, buffer, buflen, errnop); - internal_endspent (&ent); + internal_endspent_noerror (&ent); return result; } diff --git a/posix/glob.c b/posix/glob.c index cba9cd181..4580cefb9 100644 --- a/posix/glob.c +++ b/posix/glob.c @@ -827,31 +827,32 @@ __glob (const char *pattern, int flags, int (*errfunc) (const char *, int), { size_t home_len = strlen (p->pw_dir); size_t rest_len = end_name == NULL ? 0 : strlen (end_name); - char *d; + char *d, *newp; + bool use_alloca = glob_use_alloca (alloca_used, + home_len + rest_len + 1); - if (__glibc_unlikely (malloc_dirname)) - free (dirname); - malloc_dirname = 0; - - if (glob_use_alloca (alloca_used, home_len + rest_len + 1)) - dirname = alloca_account (home_len + rest_len + 1, - alloca_used); + if (use_alloca) + newp = alloca_account (home_len + rest_len + 1, alloca_used); else { - dirname = malloc (home_len + rest_len + 1); - if (dirname == NULL) + newp = malloc (home_len + rest_len + 1); + if (newp == NULL) { scratch_buffer_free (&pwtmpbuf); retval = GLOB_NOSPACE; goto out; } - malloc_dirname = 1; } - d = mempcpy (dirname, p->pw_dir, home_len); + d = mempcpy (newp, p->pw_dir, home_len); if (end_name != NULL) d = mempcpy (d, end_name, rest_len); *d = '\0'; + if (__glibc_unlikely (malloc_dirname)) + free (dirname); + dirname = newp; + malloc_dirname = !use_alloca; + dirlen = home_len + rest_len; dirname_modified = 1; } diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c index ed1b22308..cb3f989cb 100644 --- a/posix/wordexp-test.c +++ b/posix/wordexp-test.c @@ -183,6 +183,7 @@ struct test_case_struct { 0, NULL, "$var", 0, 0, { NULL, }, IFS }, { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS }, { 0, NULL, "", 0, 0, { NULL, }, IFS }, + { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS }, /* Flags not already covered (testit() has special handling for these) */ { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS }, diff --git a/posix/wordexp.c b/posix/wordexp.c index e082d9489..56289503a 100644 --- a/posix/wordexp.c +++ b/posix/wordexp.c @@ -1399,7 +1399,7 @@ envsubst: /* Is it a numeric parameter? */ else if (isdigit (env[0])) { - int n = atoi (env); + unsigned long n = strtoul (env, NULL, 10); if (n >= __libc_argc) /* Substitute NULL. */ diff --git a/socket/Makefile b/socket/Makefile index 125c042ca..19fc6ab41 100644 --- a/socket/Makefile +++ b/socket/Makefile @@ -29,10 +29,14 @@ headers := sys/socket.h sys/un.h bits/sockaddr.h bits/socket.h \ routines := accept bind connect getpeername getsockname getsockopt \ listen recv recvfrom recvmsg send sendmsg sendto \ setsockopt shutdown socket socketpair isfdtype opensock \ - sockatmark accept4 recvmmsg sendmmsg + sockatmark accept4 recvmmsg sendmmsg sockaddr_un_set tests := tst-accept4 +tests-internal := \ + tst-sockaddr_un_set \ + # tests-internal + aux := sa_len include ../Rules diff --git a/socket/sockaddr_un_set.c b/socket/sockaddr_un_set.c new file mode 100644 index 000000000..0bd40dc34 --- /dev/null +++ b/socket/sockaddr_un_set.c @@ -0,0 +1,41 @@ +/* Set the sun_path member of struct sockaddr_un. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +int +__sockaddr_un_set (struct sockaddr_un *addr, const char *pathname) +{ + size_t name_length = strlen (pathname); + + /* The kernel supports names of exactly sizeof (addr->sun_path) + bytes, without a null terminator, but userspace does not; see the + SUN_LEN macro. */ + if (name_length >= sizeof (addr->sun_path)) + { + __set_errno (EINVAL); /* Error code used by the kernel. */ + return -1; + } + + addr->sun_family = AF_UNIX; + memcpy (addr->sun_path, pathname, name_length + 1); + return 0; +} diff --git a/socket/tst-sockaddr_un_set.c b/socket/tst-sockaddr_un_set.c new file mode 100644 index 000000000..29c2a81af --- /dev/null +++ b/socket/tst-sockaddr_un_set.c @@ -0,0 +1,62 @@ +/* Test the __sockaddr_un_set function. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* Re-compile the function because the version in libc is not + exported. */ +#include "sockaddr_un_set.c" + +#include + +static int +do_test (void) +{ + struct sockaddr_un sun; + + memset (&sun, 0xcc, sizeof (sun)); + __sockaddr_un_set (&sun, ""); + TEST_COMPARE (sun.sun_family, AF_UNIX); + TEST_COMPARE (__sockaddr_un_set (&sun, ""), 0); + + memset (&sun, 0xcc, sizeof (sun)); + TEST_COMPARE (__sockaddr_un_set (&sun, "/example"), 0); + TEST_COMPARE_STRING (sun.sun_path, "/example"); + + { + char pathname[108]; /* Length of sun_path (ABI constant). */ + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + memset (&sun, 0xcc, sizeof (sun)); + TEST_COMPARE (__sockaddr_un_set (&sun, pathname), 0); + TEST_COMPARE (sun.sun_family, AF_UNIX); + TEST_COMPARE_STRING (sun.sun_path, pathname); + } + + { + char pathname[109]; + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + memset (&sun, 0xcc, sizeof (sun)); + errno = 0; + TEST_COMPARE (__sockaddr_un_set (&sun, pathname), -1); + TEST_COMPARE (errno, EINVAL); + } + + return 0; +} + +#include diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 95af0c12d..5e92d6b9a 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -66,6 +66,10 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \ tst-scanf-round \ tst-renameat2 tst-bz11319 tst-bz11319-fortify2 \ scanf14a scanf16a \ + tst-printf-bz25691 \ + tst-vfprintf-width-prec-alloc \ + tst-grouping2 \ + # tests test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble @@ -75,10 +79,12 @@ tests-special += $(objpfx)tst-unbputc.out $(objpfx)tst-printf.out \ $(objpfx)tst-printf-bz18872-mem.out \ $(objpfx)tst-setvbuf1-cmp.out \ $(objpfx)tst-vfprintf-width-prec-mem.out \ - $(objpfx)tst-printfsz-islongdouble.out + $(objpfx)tst-printfsz-islongdouble.out \ + $(objpfx)tst-printf-bz25691-mem.out generated += tst-printf-bz18872.c tst-printf-bz18872.mtrace \ tst-printf-bz18872-mem.out \ - tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out + tst-vfprintf-width-prec.mtrace tst-vfprintf-width-prec-mem.out \ + tst-printf-bz25691.mtrace tst-printf-bz25691-mem.out endif include ../Rules @@ -91,6 +97,7 @@ $(objpfx)bug14.out: $(gen-locales) $(objpfx)scanf13.out: $(gen-locales) $(objpfx)test-vfprintf.out: $(gen-locales) $(objpfx)tst-grouping.out: $(gen-locales) +$(objpfx)tst-grouping2.out: $(gen-locales) $(objpfx)tst-sprintf.out: $(gen-locales) $(objpfx)tst-sscanf.out: $(gen-locales) $(objpfx)tst-swprintf.out: $(gen-locales) @@ -100,6 +107,8 @@ endif tst-printf-bz18872-ENV = MALLOC_TRACE=$(objpfx)tst-printf-bz18872.mtrace tst-vfprintf-width-prec-ENV = \ MALLOC_TRACE=$(objpfx)tst-vfprintf-width-prec.mtrace +tst-printf-bz25691-ENV = \ + MALLOC_TRACE=$(objpfx)tst-printf-bz25691.mtrace $(objpfx)tst-unbputc.out: tst-unbputc.sh $(objpfx)tst-unbputc $(SHELL) $< $(common-objpfx) '$(test-program-prefix)' > $@; \ diff --git a/stdio-common/bug22.c b/stdio-common/bug22.c index b3d48eb8e..029b54994 100644 --- a/stdio-common/bug22.c +++ b/stdio-common/bug22.c @@ -57,7 +57,7 @@ do_test (void) ret = fprintf (fp, "%." SN3 "d", 1); printf ("ret = %d\n", ret); - if (ret != -1 || errno != EOVERFLOW) + if (ret != N3) return 1; /* GCC 9 warns about output of more than INT_MAX characters; this is diff --git a/stdio-common/tst-grouping2.c b/stdio-common/tst-grouping2.c new file mode 100644 index 000000000..3024c942a --- /dev/null +++ b/stdio-common/tst-grouping2.c @@ -0,0 +1,39 @@ +/* Test printf with grouping and large width (bug 29530) + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +static int +do_test (void) +{ + const int field_width = 1000; + char buf[field_width + 1]; + + xsetlocale (LC_NUMERIC, "de_DE.UTF-8"); + + /* This used to crash in group_number. */ + TEST_COMPARE (sprintf (buf, "%'*d", field_width, 1000), field_width); + TEST_COMPARE_STRING (buf + field_width - 6, " 1.000"); + + return 0; +} + +#include diff --git a/stdio-common/tst-printf-bz25691.c b/stdio-common/tst-printf-bz25691.c new file mode 100644 index 000000000..37b30a3a8 --- /dev/null +++ b/stdio-common/tst-printf-bz25691.c @@ -0,0 +1,108 @@ +/* Test for memory leak with large width (BZ#25691). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int +do_test (void) +{ + mtrace (); + + /* For 's' conversion specifier with 'l' modifier the array must be + converted to multibyte characters up to the precision specific + value. */ + { + /* The input size value is to force a heap allocation on temporary + buffer (in the old implementation). */ + const size_t winputsize = 64 * 1024 + 1; + wchar_t *winput = xmalloc (winputsize * sizeof (wchar_t)); + wmemset (winput, L'a', winputsize - 1); + winput[winputsize - 1] = L'\0'; + + char result[9]; + const char expected[] = "aaaaaaaa"; + int ret; + + ret = snprintf (result, sizeof (result), "%.65537ls", winput); + TEST_COMPARE (ret, winputsize - 1); + TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected)); + + ret = snprintf (result, sizeof (result), "%ls", winput); + TEST_COMPARE (ret, winputsize - 1); + TEST_COMPARE_BLOB (result, sizeof (result), expected, sizeof (expected)); + + free (winput); + } + + /* For 's' converstion specifier the array is interpreted as a multibyte + character sequence and converted to wide characters up to the precision + specific value. */ + { + /* The input size value is to force a heap allocation on temporary + buffer (in the old implementation). */ + const size_t mbssize = 32 * 1024; + char *mbs = xmalloc (mbssize); + memset (mbs, 'a', mbssize - 1); + mbs[mbssize - 1] = '\0'; + + const size_t expectedsize = 32 * 1024; + wchar_t *expected = xmalloc (expectedsize * sizeof (wchar_t)); + wmemset (expected, L'a', expectedsize - 1); + expected[expectedsize-1] = L'\0'; + + const size_t resultsize = mbssize * sizeof (wchar_t); + wchar_t *result = xmalloc (resultsize); + int ret; + + ret = swprintf (result, resultsize, L"%.65537s", mbs); + TEST_COMPARE (ret, mbssize - 1); + TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t), + expected, expectedsize * sizeof (wchar_t)); + + ret = swprintf (result, resultsize, L"%1$.65537s", mbs); + TEST_COMPARE (ret, mbssize - 1); + TEST_COMPARE_BLOB (result, (ret + 1) * sizeof (wchar_t), + expected, expectedsize * sizeof (wchar_t)); + + /* Same test, but with an invalid multibyte sequence. */ + mbs[mbssize - 2] = 0xff; + + ret = swprintf (result, resultsize, L"%.65537s", mbs); + TEST_COMPARE (ret, -1); + + ret = swprintf (result, resultsize, L"%1$.65537s", mbs); + TEST_COMPARE (ret, -1); + + free (mbs); + free (result); + free (expected); + } + + return 0; +} + +#include diff --git a/stdio-common/tst-vfprintf-width-prec-alloc.c b/stdio-common/tst-vfprintf-width-prec-alloc.c new file mode 100644 index 000000000..0a74b53a3 --- /dev/null +++ b/stdio-common/tst-vfprintf-width-prec-alloc.c @@ -0,0 +1,41 @@ +/* Test large width or precision does not involve large allocation. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +char test_string[] = "test"; + +static int +do_test (void) +{ + struct rlimit limit; + TEST_VERIFY_EXIT (getrlimit (RLIMIT_AS, &limit) == 0); + limit.rlim_cur = 200 * 1024 * 1024; + TEST_VERIFY_EXIT (setrlimit (RLIMIT_AS, &limit) == 0); + FILE *fp = fopen ("/dev/null", "w"); + TEST_VERIFY_EXIT (fp != NULL); + TEST_COMPARE (fprintf (fp, "%1000000000d", 1), 1000000000); + TEST_COMPARE (fprintf (fp, "%.1000000000s", test_string), 4); + TEST_COMPARE (fprintf (fp, "%1000000000d %1000000000d", 1, 2), 2000000001); + TEST_COMPARE (fprintf (fp, "%2$.*1$s", 0x7fffffff, test_string), 4); + return 0; +} + +#include diff --git a/stdio-common/vfprintf-internal.c b/stdio-common/vfprintf-internal.c index 3be92d4b6..b1c8f5c43 100644 --- a/stdio-common/vfprintf-internal.c +++ b/stdio-common/vfprintf-internal.c @@ -31,6 +31,7 @@ #include #include #include +#include /* This code is shared between the standard stdio implementation found in GNU C library and the libio implementation originally found in @@ -45,10 +46,6 @@ #include #endif -/* In some cases we need extra space for all the output which is not - counted in the width of the string. We assume 32 characters is - enough. */ -#define EXTSIZ 32 #define ARGCHECK(S, Format) \ do \ { \ @@ -119,22 +116,38 @@ while (0) #endif -#define done_add(val) \ - do { \ - unsigned int _val = val; \ - assert ((unsigned int) done < (unsigned int) INT_MAX); \ - if (__glibc_unlikely (INT_MAX - done < _val)) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += _val; \ - } while (0) +/* Add LENGTH to DONE. Return the new value of DONE, or -1 on + overflow (and set errno accordingly). */ +static inline int +done_add_func (size_t length, int done) +{ + if (done < 0) + return done; + int ret; + if (INT_ADD_WRAPV (done, length, &ret)) + { + __set_errno (EOVERFLOW); + return -1; + } + return ret; +} + +#define done_add(val) \ + do \ + { \ + /* Ensure that VAL has a type similar to int. */ \ + _Static_assert (sizeof (val) == sizeof (int), "value int size"); \ + _Static_assert ((__typeof__ (val)) -1 < 0, "value signed"); \ + done = done_add_func ((val), done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) #ifndef COMPILE_WPRINTF # define vfprintf __vfprintf_internal # define CHAR_T char +# define OTHER_CHAR_T wchar_t # define UCHAR_T unsigned char # define INT_T int typedef const char *THOUSANDS_SEP_T; @@ -143,25 +156,14 @@ typedef const char *THOUSANDS_SEP_T; # define STR_LEN(Str) strlen (Str) # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - ssize_t written = _IO_padn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putc_unlocked (C, F) # define ORIENT if (_IO_vtable_offset (s) == 0 && _IO_fwide (s, -1) != -1)\ return -1 +# define CONVERT_FROM_OTHER_STRING __wcsrtombs #else # define vfprintf __vfwprintf_internal # define CHAR_T wchar_t +# define OTHER_CHAR_T char /* This is a hack!!! There should be a type uwchar_t. */ # define UCHAR_T unsigned int /* uwchar_t */ # define INT_T wint_t @@ -173,21 +175,9 @@ typedef wchar_t THOUSANDS_SEP_T; # include <_itowa.h> # define PUT(F, S, N) _IO_sputn ((F), (S), (N)) -# define PAD(Padchar) \ - do { \ - if (width > 0) \ - { \ - ssize_t written = _IO_wpadn (s, (Padchar), width); \ - if (__glibc_unlikely (written != width)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - done_add (written); \ - } \ - } while (0) # define PUTC(C, F) _IO_putwc_unlocked (C, F) # define ORIENT if (_IO_fwide (s, 1) != 1) return -1 +# define CONVERT_FROM_OTHER_STRING __mbsrtowcs # undef _itoa # define _itoa(Val, Buf, Base, Case) _itowa (Val, Buf, Base, Case) @@ -196,6 +186,33 @@ typedef wchar_t THOUSANDS_SEP_T; # define EOF WEOF #endif +static inline int +pad_func (FILE *s, CHAR_T padchar, int width, int done) +{ + if (width > 0) + { + ssize_t written; +#ifndef COMPILE_WPRINTF + written = _IO_padn (s, padchar, width); +#else + written = _IO_wpadn (s, padchar, width); +#endif + if (__glibc_unlikely (written != width)) + return -1; + return done_add_func (width, done); + } + return done; +} + +#define PAD(Padchar) \ + do \ + { \ + done = pad_func (s, (Padchar), width, done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) + #include "_i18n_number.h" /* Include the shared code for parsing the format string. */ @@ -215,24 +232,115 @@ typedef wchar_t THOUSANDS_SEP_T; } \ while (0) -#define outstring(String, Len) \ - do \ - { \ - assert ((size_t) done <= (size_t) INT_MAX); \ - if ((size_t) PUT (s, (String), (Len)) != (size_t) (Len)) \ - { \ - done = -1; \ - goto all_done; \ - } \ - if (__glibc_unlikely (INT_MAX - done < (Len))) \ - { \ - done = -1; \ - __set_errno (EOVERFLOW); \ - goto all_done; \ - } \ - done += (Len); \ - } \ - while (0) +static inline int +outstring_func (FILE *s, const UCHAR_T *string, size_t length, int done) +{ + assert ((size_t) done <= (size_t) INT_MAX); + if ((size_t) PUT (s, string, length) != (size_t) (length)) + return -1; + return done_add_func (length, done); +} + +#define outstring(String, Len) \ + do \ + { \ + const void *string_ = (String); \ + done = outstring_func (s, string_, (Len), done); \ + if (done < 0) \ + goto all_done; \ + } \ + while (0) + +/* Write the string SRC to S. If PREC is non-negative, write at most + PREC bytes. If LEFT is true, perform left justification. */ +static int +outstring_converted_wide_string (FILE *s, const OTHER_CHAR_T *src, int prec, + int width, bool left, int done) +{ + /* Use a small buffer to combine processing of multiple characters. + CONVERT_FROM_OTHER_STRING expects the buffer size in (wide) + characters, and buf_length counts that. */ + enum { buf_length = 256 / sizeof (CHAR_T) }; + CHAR_T buf[buf_length]; + _Static_assert (sizeof (buf) > MB_LEN_MAX, + "buffer is large enough for a single multi-byte character"); + + /* Add the initial padding if needed. */ + if (width > 0 && !left) + { + /* Make a first pass to find the output width, so that we can + add the required padding. */ + mbstate_t mbstate = { 0 }; + const OTHER_CHAR_T *src_copy = src; + size_t total_written; + if (prec < 0) + total_written = CONVERT_FROM_OTHER_STRING + (NULL, &src_copy, 0, &mbstate); + else + { + /* The source might not be null-terminated. Enforce the + limit manually, based on the output length. */ + total_written = 0; + size_t limit = prec; + while (limit > 0 && src_copy != NULL) + { + size_t write_limit = buf_length; + if (write_limit > limit) + write_limit = limit; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src_copy, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + total_written += written; + limit -= written; + } + } + + /* Output initial padding. */ + if (total_written < width) + { + done = pad_func (s, L_(' '), width - total_written, done); + if (done < 0) + return done; + } + } + + /* Convert the input string, piece by piece. */ + size_t total_written = 0; + { + mbstate_t mbstate = { 0 }; + /* If prec is negative, remaining is not decremented, otherwise, + it serves as the write limit. */ + size_t remaining = -1; + if (prec >= 0) + remaining = prec; + while (remaining > 0 && src != NULL) + { + size_t write_limit = buf_length; + if (remaining < write_limit) + write_limit = remaining; + size_t written = CONVERT_FROM_OTHER_STRING + (buf, &src, write_limit, &mbstate); + if (written == (size_t) -1) + return -1; + if (written == 0) + break; + done = outstring_func (s, (const UCHAR_T *) buf, written, done); + if (done < 0) + return done; + total_written += written; + if (prec >= 0) + remaining -= written; + } + } + + /* Add final padding. */ + if (width > 0 && left && total_written < width) + return pad_func (s, L_(' '), width - total_written, done); + return done; +} /* For handling long_double and longlong we use the same flag. If `long' and `long long' are effectively the same type define it to @@ -1022,7 +1130,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -1034,7 +1141,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1051,41 +1157,12 @@ static const uint8_t jump_table[] = } \ else if (!is_long && spec != L_('S')) \ { \ - /* This is complicated. We have to transform the multibyte \ - string into a wide character string. */ \ - const char *mbs = (const char *) string; \ - mbstate_t mbstate; \ - \ - len = prec != -1 ? __strnlen (mbs, (size_t) prec) : strlen (mbs); \ - \ - /* Allocate dynamically an array which definitely is long \ - enough for the wide character version. Each byte in the \ - multi-byte string can produce at most one wide character. */ \ - if (__glibc_unlikely (len > SIZE_MAX / sizeof (wchar_t))) \ - { \ - __set_errno (EOVERFLOW); \ - done = -1; \ - goto all_done; \ - } \ - else if (__libc_use_alloca (len * sizeof (wchar_t))) \ - string = (CHAR_T *) alloca (len * sizeof (wchar_t)); \ - else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t))) \ - == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - len = __mbsrtowcs (string, &mbs, len, &mbstate); \ - if (len == (size_t) -1) \ - { \ - /* Illegal multibyte character. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const char *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ else \ { \ @@ -1108,8 +1185,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (L' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #else @@ -1158,7 +1233,6 @@ static const uint8_t jump_table[] = LABEL (form_string): \ { \ size_t len; \ - int string_malloced; \ \ /* The string argument could in fact be `char *' or `wchar_t *'. \ But this should not make a difference here. */ \ @@ -1170,7 +1244,6 @@ static const uint8_t jump_table[] = /* Entry point for printing other strings. */ \ LABEL (print_string): \ \ - string_malloced = 0; \ if (string == NULL) \ { \ /* Write "(null)" if there's space. */ \ @@ -1196,51 +1269,12 @@ static const uint8_t jump_table[] = } \ else \ { \ - const wchar_t *s2 = (const wchar_t *) string; \ - mbstate_t mbstate; \ - \ - memset (&mbstate, '\0', sizeof (mbstate_t)); \ - \ - if (prec >= 0) \ - { \ - /* The string `s2' might not be NUL terminated. */ \ - if (__libc_use_alloca (prec)) \ - string = (char *) alloca (prec); \ - else if ((string = (char *) malloc (prec)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - len = __wcsrtombs (string, &s2, prec, &mbstate); \ - } \ - else \ - { \ - len = __wcsrtombs (NULL, &s2, 0, &mbstate); \ - if (len != (size_t) -1) \ - { \ - assert (__mbsinit (&mbstate)); \ - s2 = (const wchar_t *) string; \ - if (__libc_use_alloca (len + 1)) \ - string = (char *) alloca (len + 1); \ - else if ((string = (char *) malloc (len + 1)) == NULL) \ - { \ - done = -1; \ - goto all_done; \ - } \ - else \ - string_malloced = 1; \ - (void) __wcsrtombs (string, &s2, len + 1, &mbstate); \ - } \ - } \ - \ - if (len == (size_t) -1) \ - { \ - /* Illegal wide-character string. */ \ - done = -1; \ - goto all_done; \ - } \ + done = outstring_converted_wide_string \ + (s, (const wchar_t *) string, prec, width, left, done); \ + if (done < 0) \ + goto all_done; \ + /* The padding has already been written. */ \ + break; \ } \ \ if ((width -= len) < 0) \ @@ -1254,8 +1288,6 @@ static const uint8_t jump_table[] = outstring (string, len); \ if (left) \ PAD (' '); \ - if (__glibc_unlikely (string_malloced)) \ - free (string); \ } \ break; #endif @@ -1307,7 +1339,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* Buffer intermediate results. */ CHAR_T work_buffer[WORK_BUFFER_SIZE]; - CHAR_T *workstart = NULL; CHAR_T *workend; /* We have to save the original argument pointer. */ @@ -1416,7 +1447,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) UCHAR_T pad = L_(' ');/* Padding character. */ CHAR_T spec; - workstart = NULL; workend = work_buffer + WORK_BUFFER_SIZE; /* Get current character in format string. */ @@ -1508,31 +1538,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) pad = L_(' '); left = 1; } - - if (__glibc_unlikely (width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) - { - __set_errno (EOVERFLOW); - done = -1; - goto all_done; - } - - if (width >= WORK_BUFFER_SIZE - EXTSIZ) - { - /* We have to use a special buffer. */ - size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T); - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + width + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + width + EXTSIZ; - } - } } JUMP (*f, step1_jumps); @@ -1540,31 +1545,13 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) LABEL (width): width = read_int (&f); - if (__glibc_unlikely (width == -1 - || width >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) + if (__glibc_unlikely (width == -1)) { __set_errno (EOVERFLOW); done = -1; goto all_done; } - if (width >= WORK_BUFFER_SIZE - EXTSIZ) - { - /* We have to use a special buffer. */ - size_t needed = ((size_t) width + EXTSIZ) * sizeof (CHAR_T); - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + width + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + width + EXTSIZ; - } - } if (*f == L_('$')) /* Oh, oh. The argument comes from a positional parameter. */ goto do_positional; @@ -1613,34 +1600,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) } else prec = 0; - if (prec > width && prec > WORK_BUFFER_SIZE - EXTSIZ) - { - /* Deallocate any previously allocated buffer because it is - too small. */ - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - if (__glibc_unlikely (prec >= INT_MAX / sizeof (CHAR_T) - EXTSIZ)) - { - __set_errno (EOVERFLOW); - done = -1; - goto all_done; - } - size_t needed = ((size_t) prec + EXTSIZ) * sizeof (CHAR_T); - - if (__libc_use_alloca (needed)) - workend = (CHAR_T *) alloca (needed) + prec + EXTSIZ; - else - { - workstart = (CHAR_T *) malloc (needed); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + prec + EXTSIZ; - } - } JUMP (*f, step2_jumps); /* Process 'h' modifier. There might another 'h' following. */ @@ -1704,10 +1663,6 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* The format is correctly handled. */ ++nspecs_done; - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - /* Look for next format specifier. */ #ifdef COMPILE_WPRINTF f = __find_specwc ((end_of_spec = ++f)); @@ -1725,18 +1680,11 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap, unsigned int mode_flags) /* Hand off processing for positional parameters. */ do_positional: - if (__glibc_unlikely (workstart != NULL)) - { - free (workstart); - workstart = NULL; - } done = printf_positional (s, format, readonly_format, ap, &ap_save, done, nspecs_done, lead_str_end, work_buffer, save_errno, grouping, thousands_sep, mode_flags); all_done: - if (__glibc_unlikely (workstart != NULL)) - free (workstart); /* Unlock the stream. */ _IO_funlockfile (s); _IO_cleanup_region_end (0); @@ -1780,8 +1728,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, /* Just a counter. */ size_t cnt; - CHAR_T *workstart = NULL; - if (grouping == (const char *) -1) { #ifdef COMPILE_WPRINTF @@ -1974,7 +1920,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, char pad = specs[nspecs_done].info.pad; CHAR_T spec = specs[nspecs_done].info.spec; - workstart = NULL; CHAR_T *workend = work_buffer + WORK_BUFFER_SIZE; /* Fill in last information. */ @@ -2008,27 +1953,6 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, prec = specs[nspecs_done].info.prec; } - /* Maybe the buffer is too small. */ - if (MAX (prec, width) + EXTSIZ > WORK_BUFFER_SIZE) - { - if (__libc_use_alloca ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T))) - workend = ((CHAR_T *) alloca ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T)) - + (MAX (prec, width) + EXTSIZ)); - else - { - workstart = (CHAR_T *) malloc ((MAX (prec, width) + EXTSIZ) - * sizeof (CHAR_T)); - if (workstart == NULL) - { - done = -1; - goto all_done; - } - workend = workstart + (MAX (prec, width) + EXTSIZ); - } - } - /* Process format specifiers. */ while (1) { @@ -2102,18 +2026,12 @@ printf_positional (FILE *s, const CHAR_T *format, int readonly_format, break; } - if (__glibc_unlikely (workstart != NULL)) - free (workstart); - workstart = NULL; - /* Write the following constant string. */ outstring (specs[nspecs_done].end_of_fmt, specs[nspecs_done].next_fmt - specs[nspecs_done].end_of_fmt); } all_done: - if (__glibc_unlikely (workstart != NULL)) - free (workstart); scratch_buffer_free (&argsbuf); scratch_buffer_free (&specsbuf); return done; @@ -2236,7 +2154,8 @@ group_number (CHAR_T *front_ptr, CHAR_T *w, CHAR_T *rear_ptr, copy_rest: /* No further grouping to be done. Copy the rest of the number. */ - memmove (w, s, (front_ptr -s) * sizeof (CHAR_T)); + w -= s - front_ptr; + memmove (w, front_ptr, (s - front_ptr) * sizeof (CHAR_T)); break; } else if (*grouping != '\0') diff --git a/stdlib/Makefile b/stdlib/Makefile index 45214b59e..4615f6dfe 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -70,7 +70,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ test-canon test-canon2 tst-strtoll tst-environ \ tst-xpg-basename tst-random tst-random2 tst-bsearch \ tst-limits tst-rand48 bug-strtod tst-setcontext \ - tst-setcontext2 test-a64l tst-qsort tst-system testmb2 \ + tst-setcontext2 test-a64l tst-qsort testmb2 \ bug-strtod2 tst-atof1 tst-atof2 tst-strtod2 \ tst-rand48-2 tst-makecontext tst-strtod5 \ tst-qsort2 tst-makecontext2 tst-strtod6 tst-unsetenv1 \ @@ -92,6 +92,7 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \ tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \ tst-tls-atexit tst-tls-atexit-nodelete tests-static := tst-secure-getenv +tests-container := tst-system ifeq ($(build-hardcoded-path-in-tests),yes) tests += tst-empty-env diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c index 3cfe9a05c..d4b1139c5 100644 --- a/stdlib/tst-secure-getenv.c +++ b/stdlib/tst-secure-getenv.c @@ -30,167 +30,12 @@ #include #include +#include #include +#include #include static char MAGIC_ARGUMENT[] = "run-actual-test"; -#define MAGIC_STATUS 19 - -/* Return a GID which is not our current GID, but is present in the - supplementary group list. */ -static gid_t -choose_gid (void) -{ - int count = getgroups (0, NULL); - if (count < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t *groups; - groups = xcalloc (count, sizeof (*groups)); - int ret = getgroups (count, groups); - if (ret < 0) - { - printf ("getgroups: %m\n"); - exit (1); - } - gid_t current = getgid (); - gid_t not_current = 0; - for (int i = 0; i < ret; ++i) - { - if (groups[i] != current) - { - not_current = groups[i]; - break; - } - } - free (groups); - return not_current; -} - - -/* Copies the executable into a restricted directory, so that we can - safely make it SGID with the TARGET group ID. Then runs the - executable. */ -static int -run_executable_sgid (gid_t target) -{ - char *dirname = xasprintf ("%s/secure-getenv.%jd", - test_dir, (intmax_t) getpid ()); - char *execname = xasprintf ("%s/bin", dirname); - int infd = -1; - int outfd = -1; - int ret = -1; - if (mkdir (dirname, 0700) < 0) - { - printf ("mkdir: %m\n"); - goto err; - } - infd = open ("/proc/self/exe", O_RDONLY); - if (infd < 0) - { - printf ("open (/proc/self/exe): %m\n"); - goto err; - } - outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); - if (outfd < 0) - { - printf ("open (%s): %m\n", execname); - goto err; - } - char buf[4096]; - for (;;) - { - ssize_t rdcount = read (infd, buf, sizeof (buf)); - if (rdcount < 0) - { - printf ("read: %m\n"); - goto err; - } - if (rdcount == 0) - break; - char *p = buf; - char *end = buf + rdcount; - while (p != end) - { - ssize_t wrcount = write (outfd, buf, end - p); - if (wrcount == 0) - errno = ENOSPC; - if (wrcount <= 0) - { - printf ("write: %m\n"); - goto err; - } - p += wrcount; - } - } - if (fchown (outfd, getuid (), target) < 0) - { - printf ("fchown (%s): %m\n", execname); - goto err; - } - if (fchmod (outfd, 02750) < 0) - { - printf ("fchmod (%s): %m\n", execname); - goto err; - } - if (close (outfd) < 0) - { - printf ("close (outfd): %m\n"); - goto err; - } - if (close (infd) < 0) - { - printf ("close (infd): %m\n"); - goto err; - } - - int kid = fork (); - if (kid < 0) - { - printf ("fork: %m\n"); - goto err; - } - if (kid == 0) - { - /* Child process. */ - char *args[] = { execname, MAGIC_ARGUMENT, NULL }; - execve (execname, args, environ); - printf ("execve (%s): %m\n", execname); - _exit (1); - } - int status; - if (waitpid (kid, &status, 0) < 0) - { - printf ("waitpid: %m\n"); - goto err; - } - if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS) - { - printf ("Unexpected exit status %d from child process\n", - status); - goto err; - } - ret = 0; - -err: - if (outfd >= 0) - close (outfd); - if (infd >= 0) - close (infd); - if (execname) - { - unlink (execname); - free (execname); - } - if (dirname) - { - rmdir (dirname); - free (dirname); - } - return ret; -} static int do_test (void) @@ -212,15 +57,15 @@ do_test (void) exit (1); } - gid_t target = choose_gid (); - if (target == 0) - { - fprintf (stderr, - "Could not find a suitable GID for user %jd, skipping test\n", - (intmax_t) getuid ()); - exit (0); - } - return run_executable_sgid (target); + int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT); + + if (WEXITSTATUS (status) == EXIT_UNSUPPORTED) + return EXIT_UNSUPPORTED; + + if (!WIFEXITED (status)) + FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status); + + return 0; } static void @@ -229,23 +74,15 @@ alternative_main (int argc, char **argv) if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0) { if (getgid () == getegid ()) - { - /* This can happen if the file system is mounted nosuid. */ - fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n", - (intmax_t) getgid ()); - exit (MAGIC_STATUS); - } + /* This can happen if the file system is mounted nosuid. */ + FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n", + (intmax_t) getgid ()); if (getenv ("PATH") == NULL) - { - printf ("PATH variable not present\n"); - exit (3); - } + FAIL_EXIT (3, "PATH variable not present\n"); if (secure_getenv ("PATH") != NULL) - { - printf ("PATH variable not filtered out\n"); - exit (4); - } - exit (MAGIC_STATUS); + FAIL_EXIT (4, "PATH variable not filtered out\n"); + + exit (EXIT_SUCCESS); } } diff --git a/stdlib/tst-system.c b/stdlib/tst-system.c index b6c5aea08..eddea33f4 100644 --- a/stdlib/tst-system.c +++ b/stdlib/tst-system.c @@ -17,14 +17,130 @@ . */ #include +#include +#include +#include +#include +#include +#include +#include +#include + +static char *tmpdir; +static long int namemax; + +static void +do_prepare (int argc, char *argv[]) +{ + tmpdir = support_create_temp_directory ("tst-system-"); + /* Include the last '/0'. */ + namemax = pathconf (tmpdir, _PC_NAME_MAX) + 1; + TEST_VERIFY_EXIT (namemax != -1); +} +#define PREPARE do_prepare + +struct args +{ + const char *command; + int exit_status; + int term_sig; + const char *path; +}; + +static void +call_system (void *closure) +{ + struct args *args = (struct args *) closure; + int ret; + + if (args->path != NULL) + TEST_COMPARE (setenv ("PATH", args->path, 1), 0); + ret = system (args->command); + if (args->term_sig == 0) + { + /* Expect regular termination. */ + TEST_VERIFY (WIFEXITED (ret) != 0); + TEST_COMPARE (WEXITSTATUS (ret), args->exit_status); + } + else + { + /* status_or_signal < 0. Expect termination by signal. */ + TEST_VERIFY (WIFSIGNALED (ret) != 0); + TEST_COMPARE (WTERMSIG (ret), args->term_sig); + } +} static int do_test (void) { - return system (":"); -} + TEST_VERIFY (system (NULL) != 0); + { + char cmd[namemax]; + memset (cmd, 'a', sizeof(cmd)); + cmd[sizeof(cmd) - 1] = '\0'; + + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + cmd, 127, 0, tmpdir + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stderr); + + char *returnerr = xasprintf ("%s: execing %s failed: " + "No such file or directory", + basename(_PATH_BSHELL), cmd); + TEST_COMPARE_STRING (result.err.buffer, returnerr); + free (returnerr); + } + + { + char cmd[namemax + 1]; + memset (cmd, 'a', sizeof(cmd)); + cmd[sizeof(cmd) - 1] = '\0'; + + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + cmd, 127, 0, tmpdir + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stderr); + + char *returnerr = xasprintf ("%s: execing %s failed: " + "File name too long", + basename(_PATH_BSHELL), cmd); + TEST_COMPARE_STRING (result.err.buffer, returnerr); + free (returnerr); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { + "kill $$", 0, SIGTERM + }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_none); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { "echo ...", 0 }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_stdout); + TEST_COMPARE_STRING (result.out.buffer, "...\n"); + } + + { + struct support_capture_subprocess result; + result = support_capture_subprocess (call_system, + &(struct args) { "exit 1", 1 }); + support_capture_subprocess_check (&result, "system", 0, sc_allow_none); + } + + TEST_COMPARE (system (""), 0); + + return 0; +} -#define TEST_FUNCTION do_test () -#include "../test-skeleton.c" +#include diff --git a/string/test-memchr.c b/string/test-memchr.c index 5dd0aa547..de70e794d 100644 --- a/string/test-memchr.c +++ b/string/test-memchr.c @@ -65,8 +65,8 @@ do_one_test (impl_t *impl, const CHAR *s, int c, size_t n, CHAR *exp_res) CHAR *res = CALL (impl, s, c, n); if (res != exp_res) { - error (0, 0, "Wrong result in function %s %p %p", impl->name, - res, exp_res); + error (0, 0, "Wrong result in function %s (%p, %d, %zu) -> %p != %p", + impl->name, s, c, n, res, exp_res); ret = 1; return; } @@ -91,7 +91,7 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) } buf[align + len] = 0; - if (pos < len) + if (pos < MIN(n, len)) { buf[align + pos] = seek_char; buf[align + len] = -seek_char; @@ -107,6 +107,38 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char) do_one_test (impl, (CHAR *) (buf + align), seek_char, n, result); } +static void +do_overflow_tests (void) +{ + size_t i, j, len; + const size_t one = 1; + uintptr_t buf_addr = (uintptr_t) buf1; + + for (i = 0; i < 750; ++i) + { + do_test (0, i, 751, SIZE_MAX - i, BIG_CHAR); + do_test (0, i, 751, i - buf_addr, BIG_CHAR); + do_test (0, i, 751, -buf_addr - i, BIG_CHAR); + do_test (0, i, 751, SIZE_MAX - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, SIZE_MAX - buf_addr + i, BIG_CHAR); + + len = 0; + for (j = 8 * sizeof(size_t) - 1; j ; --j) + { + len |= one << j; + do_test (0, i, 751, len - i, BIG_CHAR); + do_test (0, i, 751, len + i, BIG_CHAR); + do_test (0, i, 751, len - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, len - buf_addr + i, BIG_CHAR); + + do_test (0, i, 751, ~len - i, BIG_CHAR); + do_test (0, i, 751, ~len + i, BIG_CHAR); + do_test (0, i, 751, ~len - buf_addr - i, BIG_CHAR); + do_test (0, i, 751, ~len - buf_addr + i, BIG_CHAR); + } + } +} + static void do_random_tests (void) { @@ -221,6 +253,7 @@ test_main (void) do_test (page_size / 2 - i, i, i, 1, 0x9B); do_random_tests (); + do_overflow_tests (); return ret; } diff --git a/string/test-strncat.c b/string/test-strncat.c index abbacb95c..0c7f68d08 100644 --- a/string/test-strncat.c +++ b/string/test-strncat.c @@ -134,6 +134,66 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2, } } +static void +do_overflow_tests (void) +{ + size_t i, j, len; + const size_t one = 1; + CHAR *s1, *s2; + uintptr_t s1_addr; + s1 = (CHAR *) buf1; + s2 = (CHAR *) buf2; + s1_addr = (uintptr_t)s1; + for (j = 0; j < 200; ++j) + s2[j] = 32 + 23 * j % (BIG_CHAR - 32); + s2[200] = 0; + for (i = 0; i < 750; ++i) { + for (j = 0; j < i; ++j) + s1[j] = 32 + 23 * j % (BIG_CHAR - 32); + s1[i] = '\0'; + + FOR_EACH_IMPL (impl, 0) + { + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, i - s1_addr); + s2[200] = '\0'; + do_one_test (impl, s2, s1, -s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, SIZE_MAX - s1_addr + i); + } + + len = 0; + for (j = 8 * sizeof(size_t) - 1; j ; --j) + { + len |= one << j; + FOR_EACH_IMPL (impl, 0) + { + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len + i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, len - s1_addr + i); + + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len + i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - s1_addr - i); + s2[200] = '\0'; + do_one_test (impl, s2, s1, ~len - s1_addr + i); + } + } + } +} + static void do_random_tests (void) { @@ -316,6 +376,7 @@ test_main (void) } do_random_tests (); + do_overflow_tests (); return ret; } diff --git a/string/test-strncmp.c b/string/test-strncmp.c index d961ac449..02806f4eb 100644 --- a/string/test-strncmp.c +++ b/string/test-strncmp.c @@ -403,6 +403,18 @@ check2 (void) free (s2); } +static void +check3 (void) +{ + const CHAR *s1 = L ("abc"); + CHAR *s2 = STRDUP (s1); + + FOR_EACH_IMPL (impl, 0) + check_result (impl, s1, s2, SIZE_MAX, 0); + + free (s2); +} + int test_main (void) { @@ -412,6 +424,7 @@ test_main (void) check1 (); check2 (); + check3 (); printf ("%23s", ""); FOR_EACH_IMPL (impl, 0) diff --git a/string/test-strnlen.c b/string/test-strnlen.c index 80ac9e860..ca34352b0 100644 --- a/string/test-strnlen.c +++ b/string/test-strnlen.c @@ -27,6 +27,7 @@ #ifndef WIDE # define STRNLEN strnlen +# define MEMSET memset # define CHAR char # define BIG_CHAR CHAR_MAX # define MIDDLE_CHAR 127 @@ -34,6 +35,7 @@ #else # include # define STRNLEN wcsnlen +# define MEMSET wmemset # define CHAR wchar_t # define BIG_CHAR WCHAR_MAX # define MIDDLE_CHAR 1121 @@ -73,7 +75,7 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) { size_t i; - align &= 63; + align &= (getpagesize () / sizeof (CHAR) - 1); if ((align + len) * sizeof (CHAR) >= page_size) return; @@ -87,6 +89,56 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char) do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen)); } +static void +do_overflow_tests (void) +{ + size_t i, j, al_idx, repeats, len; + const size_t one = 1; + uintptr_t buf_addr = (uintptr_t) buf1; + const size_t alignments[] = { 0, 1, 7, 9, 31, 33, 63, 65, 95, 97, 127, 129 }; + + for (al_idx = 0; al_idx < sizeof (alignments) / sizeof (alignments[0]); + al_idx++) + { + for (repeats = 0; repeats < 2; ++repeats) + { + size_t align = repeats ? (getpagesize () - alignments[al_idx]) + : alignments[al_idx]; + align /= sizeof (CHAR); + for (i = 0; i < 750; ++i) + { + do_test (align, i, SIZE_MAX, BIG_CHAR); + + do_test (align, i, SIZE_MAX - i, BIG_CHAR); + do_test (align, i, i - buf_addr, BIG_CHAR); + do_test (align, i, -buf_addr - i, BIG_CHAR); + do_test (align, i, SIZE_MAX - buf_addr - i, BIG_CHAR); + do_test (align, i, SIZE_MAX - buf_addr + i, BIG_CHAR); + + len = 0; + for (j = 8 * sizeof (size_t) - 1; j; --j) + { + len |= one << j; + do_test (align, i, len, BIG_CHAR); + do_test (align, i, len - i, BIG_CHAR); + do_test (align, i, len + i, BIG_CHAR); + do_test (align, i, len - buf_addr - i, BIG_CHAR); + do_test (align, i, len - buf_addr + i, BIG_CHAR); + + do_test (align, i, ~len - i, BIG_CHAR); + do_test (align, i, ~len + i, BIG_CHAR); + do_test (align, i, ~len - buf_addr - i, BIG_CHAR); + do_test (align, i, ~len - buf_addr + i, BIG_CHAR); + + do_test (align, i, -buf_addr, BIG_CHAR); + do_test (align, i, j - buf_addr, BIG_CHAR); + do_test (align, i, -buf_addr - j, BIG_CHAR); + } + } + } + } +} + static void do_random_tests (void) { @@ -153,7 +205,7 @@ do_page_tests (void) size_t last_offset = (page_size / sizeof (CHAR)) - 1; CHAR *s = (CHAR *) buf2; - memset (s, 65, (last_offset - 1)); + MEMSET (s, 65, (last_offset - 1)); s[last_offset] = 0; /* Place short strings ending at page boundary. */ @@ -196,6 +248,35 @@ do_page_tests (void) } } +/* Tests meant to unveil fail on implementations that access bytes + beyond the maxium length. */ + +static void +do_page_2_tests (void) +{ + size_t i, exp_len, offset; + size_t last_offset = page_size / sizeof (CHAR); + + CHAR *s = (CHAR *) buf2; + MEMSET (s, 65, last_offset); + + /* Place short strings ending at page boundary without the null + byte. */ + offset = last_offset; + for (i = 0; i < 128; i++) + { + /* Decrease offset to stress several sizes and alignments. */ + offset--; + exp_len = last_offset - offset; + FOR_EACH_IMPL (impl, 0) + { + /* If an implementation goes beyond EXP_LEN, it will trigger + the segfault. */ + do_one_test (impl, (CHAR *) (s + offset), exp_len, exp_len); + } + } +} + int test_main (void) { @@ -242,6 +323,8 @@ test_main (void) do_random_tests (); do_page_tests (); + do_page_2_tests (); + do_overflow_tests (); return ret; } diff --git a/sunrpc/Makefile b/sunrpc/Makefile index d5840d077..162a5cef5 100644 --- a/sunrpc/Makefile +++ b/sunrpc/Makefile @@ -95,7 +95,8 @@ others += rpcgen endif tests = tst-xdrmem tst-xdrmem2 test-rpcent tst-udp-error tst-udp-timeout \ - tst-udp-nonblocking + tst-udp-nonblocking tst-bug22542 tst-bug28768 + xtests := tst-getmyaddr ifeq ($(have-thread-library),yes) @@ -246,3 +247,5 @@ $(objpfx)tst-udp-timeout: $(common-objpfx)linkobj/libc.so $(objpfx)tst-udp-nonblocking: $(common-objpfx)linkobj/libc.so $(objpfx)tst-udp-garbage: \ $(common-objpfx)linkobj/libc.so $(shared-thread-library) + +$(objpfx)tst-bug22542: $(common-objpfx)linkobj/libc.so diff --git a/sunrpc/clnt_gen.c b/sunrpc/clnt_gen.c index 13ced8994..b44357cd8 100644 --- a/sunrpc/clnt_gen.c +++ b/sunrpc/clnt_gen.c @@ -57,9 +57,13 @@ clnt_create (const char *hostname, u_long prog, u_long vers, if (strcmp (proto, "unix") == 0) { - memset ((char *)&sun, 0, sizeof (sun)); - sun.sun_family = AF_UNIX; - strcpy (sun.sun_path, hostname); + if (__sockaddr_un_set (&sun, hostname) < 0) + { + struct rpc_createerr *ce = &get_rpc_createerr (); + ce->cf_stat = RPC_SYSTEMERROR; + ce->cf_error.re_errno = errno; + return NULL; + } sock = RPC_ANYSOCK; client = clntunix_create (&sun, prog, vers, &sock, 0, 0); if (client == NULL) diff --git a/sunrpc/svc_unix.c b/sunrpc/svc_unix.c index e01afeabe..b065d6063 100644 --- a/sunrpc/svc_unix.c +++ b/sunrpc/svc_unix.c @@ -154,7 +154,10 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path) SVCXPRT *xprt; struct unix_rendezvous *r; struct sockaddr_un addr; - socklen_t len = sizeof (struct sockaddr_in); + socklen_t len = sizeof (addr); + + if (__sockaddr_un_set (&addr, path) < 0) + return NULL; if (sock == RPC_ANYSOCK) { @@ -165,12 +168,6 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path) } madesock = TRUE; } - memset (&addr, '\0', sizeof (addr)); - addr.sun_family = AF_UNIX; - len = strlen (path) + 1; - memcpy (addr.sun_path, path, len); - len += sizeof (addr.sun_family); - __bind (sock, (struct sockaddr *) &addr, len); if (__getsockname (sock, (struct sockaddr *) &addr, &len) != 0 diff --git a/sunrpc/tst-bug22542.c b/sunrpc/tst-bug22542.c new file mode 100644 index 000000000..d6cd79787 --- /dev/null +++ b/sunrpc/tst-bug22542.c @@ -0,0 +1,44 @@ +/* Test to verify that overlong hostname is rejected by clnt_create + and doesn't cause a buffer overflow (bug 22542). + + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + /* Create an arbitrary hostname that's longer than fits in sun_path. */ + char name [sizeof ((struct sockaddr_un*)0)->sun_path * 2]; + memset (name, 'x', sizeof name - 1); + name [sizeof name - 1] = '\0'; + + errno = 0; + CLIENT *clnt = clnt_create (name, 0, 0, "unix"); + + TEST_VERIFY (clnt == NULL); + TEST_COMPARE (errno, EINVAL); + return 0; +} + +#include diff --git a/sunrpc/tst-bug28768.c b/sunrpc/tst-bug28768.c new file mode 100644 index 000000000..35a4b7b0b --- /dev/null +++ b/sunrpc/tst-bug28768.c @@ -0,0 +1,42 @@ +/* Test to verify that long path is rejected by svcunix_create (bug 28768). + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +/* svcunix_create does not have a default version in linkobj/libc.so. */ +compat_symbol_reference (libc, svcunix_create, svcunix_create, GLIBC_2_1); + +static int +do_test (void) +{ + char pathname[109]; + memset (pathname, 'x', sizeof (pathname)); + pathname[sizeof (pathname) - 1] = '\0'; + + errno = 0; + TEST_VERIFY (svcunix_create (RPC_ANYSOCK, 4096, 4096, pathname) == NULL); + TEST_COMPARE (errno, EINVAL); + + return 0; +} + +#include diff --git a/support/Makefile b/support/Makefile index 3325feb79..05e8c292b 100644 --- a/support/Makefile +++ b/support/Makefile @@ -83,8 +83,10 @@ libsupport-routines = \ xasprintf \ xbind \ xcalloc \ + xchdir \ xchroot \ xclock_gettime \ + xclone \ xclose \ xconnect \ xcopy_file_range \ diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h index 9808750f8..421f65767 100644 --- a/support/capture_subprocess.h +++ b/support/capture_subprocess.h @@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess struct support_capture_subprocess support_capture_subprogram (const char *file, char *const argv[]); +/* Copy the running program into a setgid binary and run it with CHILD_ID + argument. If execution is successful, return the exit status of the child + program, otherwise return a non-zero failure exit code. */ +int support_capture_subprogram_self_sgid + (char *child_id); + /* Deallocate the subprocess data captured by support_capture_subprocess. */ void support_capture_subprocess_free (struct support_capture_subprocess *); diff --git a/support/shell-container.c b/support/shell-container.c index 509e0d69b..201536d24 100644 --- a/support/shell-container.c +++ b/support/shell-container.c @@ -135,6 +135,37 @@ copy_func (char **argv) } +/* Emulate the 'exit' builtin. The exit value is optional. */ +static int +exit_func (char **argv) +{ + int exit_val = 0; + + if (argv[0] != 0) + exit_val = atoi (argv[0]) & 0xff; + exit (exit_val); + return 0; +} + +/* Emulate the "/bin/kill" command. Options are ignored. */ +static int +kill_func (char **argv) +{ + int signum = SIGTERM; + int i; + + for (i = 0; argv[i]; i++) + { + pid_t pid; + if (strcmp (argv[i], "$$") == 0) + pid = getpid (); + else + pid = atoi (argv[i]); + kill (pid, signum); + } + return 0; +} + /* This is a list of all the built-in commands we understand. */ static struct { const char *name; @@ -143,6 +174,8 @@ static struct { { "true", true_func }, { "echo", echo_func }, { "cp", copy_func }, + { "exit", exit_func }, + { "kill", kill_func }, { NULL, NULL } }; @@ -238,7 +271,7 @@ run_command_array (char **argv) fprintf (stderr, "sh: execing %s failed: %s", argv[0], strerror (errno)); - exit (1); + exit (127); } waitpid (pid, &status, 0); @@ -251,6 +284,11 @@ run_command_array (char **argv) if (rv) exit (rv); } + else if (WIFSIGNALED (status)) + { + int sig = WTERMSIG (status); + raise (sig); + } else exit (1); } diff --git a/support/subprocess.h b/support/subprocess.h index 8b442fd5c..34ffd02e8 100644 --- a/support/subprocess.h +++ b/support/subprocess.h @@ -38,6 +38,11 @@ struct support_subprocess support_subprocess struct support_subprocess support_subprogram (const char *file, char *const argv[]); +/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it + to complete. Return program exit status. */ +int support_subprogram_wait + (const char *file, char *const argv[]); + /* Wait for the subprocess indicated by PROC::PID. Return the status indicate by waitpid call. */ int support_process_wait (struct support_subprocess *proc); diff --git a/support/support.h b/support/support.h index 77d68c2ab..0536474c4 100644 --- a/support/support.h +++ b/support/support.h @@ -23,6 +23,7 @@ #ifndef SUPPORT_H #define SUPPORT_H +#include #include #include /* For mode_t. */ diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c index eeed676e3..28a37df67 100644 --- a/support/support_capture_subprocess.c +++ b/support/support_capture_subprocess.c @@ -20,11 +20,14 @@ #include #include +#include #include #include #include #include #include +#include +#include static void transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) @@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream) if (ret < 0) { support_record_failure (); - printf ("error: reading from subprocess %s: %m", what); + printf ("error: reading from subprocess %s: %m\n", what); pfd->events = 0; pfd->revents = 0; } @@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[]) return result; } +/* Copies the executable into a restricted directory, so that we can + safely make it SGID with the TARGET group ID. Then runs the + executable. */ +static int +copy_and_spawn_sgid (char *child_id, gid_t gid) +{ + char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd", + test_dir, (intmax_t) getpid ()); + char *execname = xasprintf ("%s/bin", dirname); + int infd = -1; + int outfd = -1; + int ret = 1, status = 1; + + TEST_VERIFY (mkdir (dirname, 0700) == 0); + if (support_record_failure_is_failed ()) + goto err; + + infd = open ("/proc/self/exe", O_RDONLY); + if (infd < 0) + FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n"); + + outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700); + TEST_VERIFY (outfd >= 0); + if (support_record_failure_is_failed ()) + goto err; + + char buf[4096]; + for (;;) + { + ssize_t rdcount = read (infd, buf, sizeof (buf)); + TEST_VERIFY (rdcount >= 0); + if (support_record_failure_is_failed ()) + goto err; + if (rdcount == 0) + break; + char *p = buf; + char *end = buf + rdcount; + while (p != end) + { + ssize_t wrcount = write (outfd, buf, end - p); + if (wrcount == 0) + errno = ENOSPC; + TEST_VERIFY (wrcount > 0); + if (support_record_failure_is_failed ()) + goto err; + p += wrcount; + } + } + TEST_VERIFY (fchown (outfd, getuid (), gid) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (fchmod (outfd, 02750) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (close (outfd) == 0); + if (support_record_failure_is_failed ()) + goto err; + TEST_VERIFY (close (infd) == 0); + if (support_record_failure_is_failed ()) + goto err; + + /* We have the binary, now spawn the subprocess. Avoid using + support_subprogram because we only want the program exit status, not the + contents. */ + ret = 0; + + char * const args[] = {execname, child_id, NULL}; + + status = support_subprogram_wait (args[0], args); + +err: + if (outfd >= 0) + close (outfd); + if (infd >= 0) + close (infd); + if (execname != NULL) + { + unlink (execname); + free (execname); + } + if (dirname != NULL) + { + rmdir (dirname); + free (dirname); + } + + if (ret != 0) + FAIL_EXIT1("Failed to make sgid executable for test\n"); + + return status; +} + +int +support_capture_subprogram_self_sgid (char *child_id) +{ + gid_t target = 0; + const int count = 64; + gid_t groups[count]; + + /* Get a GID which is not our current GID, but is present in the + supplementary group list. */ + int ret = getgroups (count, groups); + if (ret < 0) + FAIL_UNSUPPORTED("Could not get group list for user %jd\n", + (intmax_t) getuid ()); + + gid_t current = getgid (); + for (int i = 0; i < ret; ++i) + { + if (groups[i] != current) + { + target = groups[i]; + break; + } + } + + if (target == 0) + FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n", + (intmax_t) getuid ()); + + return copy_and_spawn_sgid (child_id, target); +} + void support_capture_subprocess_free (struct support_capture_subprocess *p) { diff --git a/support/support_subprocess.c b/support/support_subprocess.c index 36e3a77af..4a2582811 100644 --- a/support/support_subprocess.c +++ b/support/support_subprocess.c @@ -27,7 +27,7 @@ #include static struct support_subprocess -support_suprocess_init (void) +support_subprocess_init (void) { struct support_subprocess result; @@ -48,7 +48,7 @@ support_suprocess_init (void) struct support_subprocess support_subprocess (void (*callback) (void *), void *closure) { - struct support_subprocess result = support_suprocess_init (); + struct support_subprocess result = support_subprocess_init (); result.pid = xfork (); if (result.pid == 0) @@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure) struct support_subprocess support_subprogram (const char *file, char *const argv[]) { - struct support_subprocess result = support_suprocess_init (); + struct support_subprocess result = support_subprocess_init (); posix_spawn_file_actions_t fa; /* posix_spawn_file_actions_init does not fail. */ @@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[]) xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]); xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]); - result.pid = xposix_spawn (file, &fa, NULL, argv, NULL); + result.pid = xposix_spawn (file, &fa, NULL, argv, environ); xclose (result.stdout_pipe[1]); xclose (result.stderr_pipe[1]); @@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[]) return result; } +int +support_subprogram_wait (const char *file, char *const argv[]) +{ + posix_spawn_file_actions_t fa; + + posix_spawn_file_actions_init (&fa); + struct support_subprocess res = support_subprocess_init (); + + res.pid = xposix_spawn (file, &fa, NULL, argv, environ); + + return support_process_wait (&res); +} + int support_process_wait (struct support_subprocess *proc) { diff --git a/support/temp_file.c b/support/temp_file.c index 277c5e0cf..e41128c2d 100644 --- a/support/temp_file.c +++ b/support/temp_file.c @@ -1,5 +1,6 @@ /* Temporary file handling for tests. - Copyright (C) 1998-2020 Free Software Foundation, Inc. + Copyright (C) 1998-2022 Free Software Foundation, Inc. + Copyright The GNU Tools Authors. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,15 +21,17 @@ some 32-bit platforms. */ #define _FILE_OFFSET_BITS 64 +#include #include #include #include +#include #include #include #include #include -#include +#include /* List of temporary files. */ static struct temp_name_list @@ -36,14 +39,20 @@ static struct temp_name_list struct temp_name_list *next; char *name; pid_t owner; + bool toolong; } *temp_name_list; /* Location of the temporary files. Set by the test skeleton via support_set_test_dir. The string is not be freed. */ static const char *test_dir = _PATH_TMP; -void -add_temp_file (const char *name) +/* Name of subdirectories in a too long temporary directory tree. */ +static char toolong_subdir[NAME_MAX + 1]; +static bool toolong_initialized; +static size_t toolong_path_max; + +static void +add_temp_file_internal (const char *name, bool toolong) { struct temp_name_list *newp = (struct temp_name_list *) xcalloc (sizeof (*newp), 1); @@ -53,21 +62,26 @@ add_temp_file (const char *name) newp->name = newname; newp->next = temp_name_list; newp->owner = getpid (); + newp->toolong = toolong; temp_name_list = newp; } else free (newp); } +void +add_temp_file (const char *name) +{ + add_temp_file_internal (name, false); +} + int -create_temp_file (const char *base, char **filename) +create_temp_file_in_dir (const char *base, const char *dir, char **filename) { char *fname; int fd; - fname = (char *) xmalloc (strlen (test_dir) + 1 + strlen (base) - + sizeof ("XXXXXX")); - strcpy (stpcpy (stpcpy (stpcpy (fname, test_dir), "/"), base), "XXXXXX"); + fname = xasprintf ("%s/%sXXXXXX", dir, base); fd = mkstemp (fname); if (fd == -1) @@ -86,8 +100,14 @@ create_temp_file (const char *base, char **filename) return fd; } -char * -support_create_temp_directory (const char *base) +int +create_temp_file (const char *base, char **filename) +{ + return create_temp_file_in_dir (base, test_dir, filename); +} + +static char * +create_temp_directory_internal (const char *base, bool toolong) { char *path = xasprintf ("%s/%sXXXXXX", test_dir, base); if (mkdtemp (path) == NULL) @@ -95,16 +115,132 @@ support_create_temp_directory (const char *base) printf ("error: mkdtemp (\"%s\"): %m", path); exit (1); } - add_temp_file (path); + add_temp_file_internal (path, toolong); return path; } -/* Helper functions called by the test skeleton follow. */ +char * +support_create_temp_directory (const char *base) +{ + return create_temp_directory_internal (base, false); +} + +static void +ensure_toolong_initialized (void) +{ + if (!toolong_initialized) + FAIL_EXIT1 ("uninitialized toolong directory tree\n"); +} + +static void +initialize_toolong (const char *base) +{ + long name_max = pathconf (base, _PC_NAME_MAX); + name_max = (name_max < 0 ? 64 + : (name_max < sizeof (toolong_subdir) ? name_max + : sizeof (toolong_subdir) - 1)); + + long path_max = pathconf (base, _PC_PATH_MAX); + path_max = (path_max < 0 ? 1024 + : path_max <= PTRDIFF_MAX ? path_max : PTRDIFF_MAX); + + /* Sanity check to ensure that the test does not create temporary directories + in different filesystems because this API doesn't support it. */ + if (toolong_initialized) + { + if (name_max != strlen (toolong_subdir)) + FAIL_UNSUPPORTED ("name_max: Temporary directories in different" + " filesystems not supported yet\n"); + if (path_max != toolong_path_max) + FAIL_UNSUPPORTED ("path_max: Temporary directories in different" + " filesystems not supported yet\n"); + return; + } + + toolong_path_max = path_max; + + size_t len = name_max; + memset (toolong_subdir, 'X', len); + toolong_initialized = true; +} + +char * +support_create_and_chdir_toolong_temp_directory (const char *basename) +{ + char *base = create_temp_directory_internal (basename, true); + xchdir (base); + + initialize_toolong (base); + + size_t sz = strlen (toolong_subdir); + + /* Create directories and descend into them so that the final path is larger + than PATH_MAX. */ + for (size_t i = 0; i <= toolong_path_max / sz; i++) + { + int ret = mkdir (toolong_subdir, S_IRWXU); + if (ret != 0 && errno == ENAMETOOLONG) + FAIL_UNSUPPORTED ("Filesystem does not support creating too long " + "directory trees\n"); + else if (ret != 0) + FAIL_EXIT1 ("Failed to create directory tree: %m\n"); + xchdir (toolong_subdir); + } + return base; +} void -support_set_test_dir (const char *path) +support_chdir_toolong_temp_directory (const char *base) { - test_dir = path; + ensure_toolong_initialized (); + + xchdir (base); + + size_t sz = strlen (toolong_subdir); + for (size_t i = 0; i <= toolong_path_max / sz; i++) + xchdir (toolong_subdir); +} + +/* Helper functions called by the test skeleton follow. */ + +static void +remove_toolong_subdirs (const char *base) +{ + ensure_toolong_initialized (); + + if (chdir (base) != 0) + { + printf ("warning: toolong cleanup base failed: chdir (\"%s\"): %m\n", + base); + return; + } + + /* Descend. */ + int levels = 0; + size_t sz = strlen (toolong_subdir); + for (levels = 0; levels <= toolong_path_max / sz; levels++) + if (chdir (toolong_subdir) != 0) + { + printf ("warning: toolong cleanup failed: chdir (\"%s\"): %m\n", + toolong_subdir); + break; + } + + /* Ascend and remove. */ + while (--levels >= 0) + { + if (chdir ("..") != 0) + { + printf ("warning: toolong cleanup failed: chdir (\"..\"): %m\n"); + return; + } + if (remove (toolong_subdir) != 0) + { + printf ("warning: could not remove subdirectory: %s: %m\n", + toolong_subdir); + return; + } + } } void @@ -119,6 +255,9 @@ support_delete_temp_files (void) around, to prevent PID reuse.) */ if (temp_name_list->owner == pid) { + if (temp_name_list->toolong) + remove_toolong_subdirs (temp_name_list->name); + if (remove (temp_name_list->name) != 0) printf ("warning: could not remove temporary file: %s: %m\n", temp_name_list->name); @@ -143,3 +282,9 @@ support_print_temp_files (FILE *f) fprintf (f, ")\n"); } } + +void +support_set_test_dir (const char *path) +{ + test_dir = path; +} diff --git a/support/temp_file.h b/support/temp_file.h index 8b6303a6e..2598f8213 100644 --- a/support/temp_file.h +++ b/support/temp_file.h @@ -32,11 +32,27 @@ void add_temp_file (const char *name); *FILENAME. */ int create_temp_file (const char *base, char **filename); +/* Create a temporary file in directory DIR. Return the opened file + descriptor on success, or -1 on failure. Write the file name to + *FILENAME if FILENAME is not NULL. In this case, the caller is + expected to free *FILENAME. */ +int create_temp_file_in_dir (const char *base, const char *dir, + char **filename); + /* Create a temporary directory and schedule it for deletion. BASE is used as a prefix for the unique directory name, which the function returns. The caller should free this string. */ char *support_create_temp_directory (const char *base); +/* Create a temporary directory tree that is longer than PATH_MAX and schedule + it for deletion. BASENAME is used as a prefix for the unique directory + name, which the function returns. The caller should free this string. */ +char *support_create_and_chdir_toolong_temp_directory (const char *basename); + +/* Change into the innermost directory of the directory tree BASE, which was + created using support_create_and_chdir_toolong_temp_directory. */ +void support_chdir_toolong_temp_directory (const char *base); + __END_DECLS #endif /* SUPPORT_TEMP_FILE_H */ diff --git a/support/xchdir.c b/support/xchdir.c new file mode 100644 index 000000000..beb4feff7 --- /dev/null +++ b/support/xchdir.c @@ -0,0 +1,28 @@ +/* chdir with error checking. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +void +xchdir (const char *path) +{ + if (chdir (path) != 0) + FAIL_EXIT1 ("chdir (\"%s\"): %m", path); +} diff --git a/support/xclone.c b/support/xclone.c new file mode 100644 index 000000000..243eee8b2 --- /dev/null +++ b/support/xclone.c @@ -0,0 +1,49 @@ +/* Auxiliary functions to issue the clone syscall. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef __linux__ +# include +# include /* For _STACK_GROWS_{UP,DOWN}. */ +# include + +pid_t +xclone (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size, + int flags) +{ + pid_t r = -1; + +# ifdef __ia64__ + extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size, + int flags, void *arg, ...); + r = __clone2 (fn, stack, stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ NULL); +# else +# if _STACK_GROWS_DOWN + r = clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL, + /* tls */ NULL, /* ctid */ NULL); +# elif _STACK_GROWS_UP + r = clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, NULL); +# endif +# endif + + if (r < 0) + FAIL_EXIT1 ("clone: %m"); + + return r; +} +#endif diff --git a/support/xsched.h b/support/xsched.h new file mode 100644 index 000000000..eefd73194 --- /dev/null +++ b/support/xsched.h @@ -0,0 +1,34 @@ +/* Wrapper for sched.h functions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef SUPPORT_XSCHED_H +#define SUPPORT_XSCHED_H + +__BEGIN_DECLS + +#include +#include + +#ifdef __linux__ +pid_t xclone (int (*fn) (void *arg), void *arg, void *stack, + size_t stack_size, int flags); +#endif + +__END_DECLS + +#endif diff --git a/support/xunistd.h b/support/xunistd.h index 96f498f2e..43799d92c 100644 --- a/support/xunistd.h +++ b/support/xunistd.h @@ -44,6 +44,7 @@ long xsysconf (int name); long long xlseek (int fd, long long offset, int whence); void xftruncate (int fd, long long length); void xsymlink (const char *target, const char *linkpath); +void xchdir (const char *path); /* Equivalent of "mkdir -p". */ void xmkdirp (const char *, mode_t); diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h index db3335e5a..8ffa0d1c5 100644 --- a/sysdeps/aarch64/dl-machine.h +++ b/sysdeps/aarch64/dl-machine.h @@ -392,13 +392,6 @@ elf_machine_lazy_rel (struct link_map *map, /* Check for unexpected PLT reloc type. */ if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1)) { - if (map->l_mach.plt == 0) - { - /* Prelinking. */ - *reloc_addr += l_addr; - return; - } - if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL)) { /* Check the symbol table for variant PCS symbols. */ @@ -422,7 +415,10 @@ elf_machine_lazy_rel (struct link_map *map, } } - *reloc_addr = map->l_mach.plt; + if (map->l_mach.plt == 0) + *reloc_addr += l_addr; + else + *reloc_addr = map->l_mach.plt; } else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1)) { diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index d0d47e90b..d8eb5e758 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -1,4 +1,5 @@ -/* Copyright (C) 2012-2020 Free Software Foundation, Inc. +/* Generic optimized memcpy using SIMD. + Copyright (C) 2012-2022 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -20,7 +21,7 @@ /* Assumptions: * - * ARMv8-a, AArch64, unaligned accesses. + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. * */ @@ -33,33 +34,20 @@ #define A_l x6 #define A_lw w6 #define A_h x7 -#define A_hw w7 #define B_l x8 #define B_lw w8 #define B_h x9 -#define C_l x10 -#define C_h x11 -#define D_l x12 -#define D_h x13 -#define E_l x14 -#define E_h x15 -#define F_l x16 -#define F_h x17 -#define G_l count -#define G_h dst -#define H_l src -#define H_h srcend +#define C_lw w10 #define tmp1 x14 -/* Copies are split into 3 main cases: small copies of up to 32 bytes, - medium copies of 33..128 bytes which are fully unrolled. Large copies - of more than 128 bytes align the destination and use an unrolled loop - processing 64 bytes per iteration. - In order to share code with memmove, small and medium copies read all - data before writing, allowing any kind of overlap. So small, medium - and large backwards memmoves are handled by falling through into memcpy. - Overlapping large forward memmoves use a loop that copies backwards. -*/ +#define A_q q0 +#define B_q q1 +#define C_q q2 +#define D_q q3 +#define E_q q4 +#define F_q q5 +#define G_q q6 +#define H_q q7 #ifndef MEMMOVE # define MEMMOVE memmove @@ -68,212 +56,198 @@ # define MEMCPY memcpy #endif -ENTRY_ALIGN (MEMMOVE, 6) +/* This implementation supports both memcpy and memmove and shares most code. + It uses unaligned accesses and branchless sequences to keep the code small, + simple and improve performance. - DELOUSE (0) - DELOUSE (1) - DELOUSE (2) + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check in memmove is negligible since it is only required for large copies. - sub tmp1, dstin, src - cmp count, 128 - ccmp tmp1, count, 2, hi - b.lo L(move_long) - - /* Common case falls through into memcpy. */ -END (MEMMOVE) -libc_hidden_builtin_def (MEMMOVE) -ENTRY (MEMCPY) + Large copies use a software pipelined loop processing 64 bytes per + iteration. The destination pointer is 16-byte aligned to minimize + unaligned accesses. The loop tail is handled by always copying 64 bytes + from the end. */ +ENTRY_ALIGN (MEMCPY, 6) DELOUSE (0) DELOUSE (1) DELOUSE (2) - prfm PLDL1KEEP, [src] add srcend, src, count add dstend, dstin, count - cmp count, 32 - b.ls L(copy32) cmp count, 128 b.hi L(copy_long) + cmp count, 32 + b.hi L(copy32_128) - /* Medium copies: 33..128 bytes. */ - ldp A_l, A_h, [src] - ldp B_l, B_h, [src, 16] - ldp C_l, C_h, [srcend, -32] - ldp D_l, D_h, [srcend, -16] - cmp count, 64 - b.hi L(copy128) - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] - ret - - .p2align 4 /* Small copies: 0..32 bytes. */ -L(copy32): - /* 16-32 bytes. */ cmp count, 16 - b.lo 1f - ldp A_l, A_h, [src] - ldp B_l, B_h, [srcend, -16] - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstend, -16] + b.lo L(copy16) + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] ret - .p2align 4 -1: - /* 8-15 bytes. */ - tbz count, 3, 1f + + /* Copy 8-15 bytes. */ +L(copy16): + tbz count, 3, L(copy8) ldr A_l, [src] ldr A_h, [srcend, -8] str A_l, [dstin] str A_h, [dstend, -8] ret - .p2align 4 -1: - /* 4-7 bytes. */ - tbz count, 2, 1f + + /* Copy 4-7 bytes. */ +L(copy8): + tbz count, 2, L(copy4) ldr A_lw, [src] - ldr A_hw, [srcend, -4] + ldr B_lw, [srcend, -4] str A_lw, [dstin] - str A_hw, [dstend, -4] + str B_lw, [dstend, -4] ret - /* Copy 0..3 bytes. Use a branchless sequence that copies the same - byte 3 times if count==1, or the 2nd byte twice if count==2. */ -1: - cbz count, 2f + /* Copy 0..3 bytes using a branchless sequence. */ +L(copy4): + cbz count, L(copy0) lsr tmp1, count, 1 ldrb A_lw, [src] - ldrb A_hw, [srcend, -1] + ldrb C_lw, [srcend, -1] ldrb B_lw, [src, tmp1] strb A_lw, [dstin] strb B_lw, [dstin, tmp1] - strb A_hw, [dstend, -1] -2: ret + strb C_lw, [dstend, -1] +L(copy0): + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +L(copy32_128): + ldp A_q, B_q, [src] + ldp C_q, D_q, [srcend, -32] + cmp count, 64 + b.hi L(copy128) + stp A_q, B_q, [dstin] + stp C_q, D_q, [dstend, -32] + ret .p2align 4 - /* Copy 65..128 bytes. Copy 64 bytes from the start and - 64 bytes from the end. */ + /* Copy 65..128 bytes. */ L(copy128): - ldp E_l, E_h, [src, 32] - ldp F_l, F_h, [src, 48] - ldp G_l, G_h, [srcend, -64] - ldp H_l, H_h, [srcend, -48] - stp A_l, A_h, [dstin] - stp B_l, B_h, [dstin, 16] - stp E_l, E_h, [dstin, 32] - stp F_l, F_h, [dstin, 48] - stp G_l, G_h, [dstend, -64] - stp H_l, H_h, [dstend, -48] - stp C_l, C_h, [dstend, -32] - stp D_l, D_h, [dstend, -16] + ldp E_q, F_q, [src, 32] + cmp count, 96 + b.ls L(copy96) + ldp G_q, H_q, [srcend, -64] + stp G_q, H_q, [dstend, -64] +L(copy96): + stp A_q, B_q, [dstin] + stp E_q, F_q, [dstin, 32] + stp C_q, D_q, [dstend, -32] ret - /* Align DST to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 128 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ + /* Align loop64 below to 16 bytes. */ + nop - .p2align 4 + /* Copy more than 128 bytes. */ L(copy_long): - and tmp1, dstin, 15 - bic dst, dstin, 15 - ldp D_l, D_h, [src] - sub src, src, tmp1 + /* Copy 16 bytes and then align src to 16-byte alignment. */ + ldr D_q, [src] + and tmp1, src, 15 + bic src, src, 15 + sub dst, dstin, tmp1 add count, count, tmp1 /* Count is now 16 too large. */ - ldp A_l, A_h, [src, 16] - stp D_l, D_h, [dstin] - ldp B_l, B_h, [src, 32] - ldp C_l, C_h, [src, 48] - ldp D_l, D_h, [src, 64]! + ldp A_q, B_q, [src, 16] + str D_q, [dstin] + ldp C_q, D_q, [src, 48] subs count, count, 128 + 16 /* Test and readjust count. */ - b.ls L(last64) + b.ls L(copy64_from_end) L(loop64): - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [src, 16] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [src, 32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [src, 48] - stp D_l, D_h, [dst, 64]! - ldp D_l, D_h, [src, 64]! + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [src, 80] + stp C_q, D_q, [dst, 48] + ldp C_q, D_q, [src, 112] + add src, src, 64 + add dst, dst, 64 subs count, count, 64 b.hi L(loop64) - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the end even if - there is just 1 byte left. */ -L(last64): - ldp E_l, E_h, [srcend, -64] - stp A_l, A_h, [dst, 16] - ldp A_l, A_h, [srcend, -48] - stp B_l, B_h, [dst, 32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dst, 48] - ldp C_l, C_h, [srcend, -16] - stp D_l, D_h, [dst, 64] - stp E_l, E_h, [dstend, -64] - stp A_l, A_h, [dstend, -48] - stp B_l, B_h, [dstend, -32] - stp C_l, C_h, [dstend, -16] + /* Write the last iteration and copy 64 bytes from the end. */ +L(copy64_from_end): + ldp E_q, F_q, [srcend, -64] + stp A_q, B_q, [dst, 16] + ldp A_q, B_q, [srcend, -32] + stp C_q, D_q, [dst, 48] + stp E_q, F_q, [dstend, -64] + stp A_q, B_q, [dstend, -32] ret - .p2align 4 -L(move_long): - cbz tmp1, 3f +END (MEMCPY) +libc_hidden_builtin_def (MEMCPY) + +ENTRY_ALIGN (MEMMOVE, 4) + DELOUSE (0) + DELOUSE (1) + DELOUSE (2) add srcend, src, count add dstend, dstin, count + cmp count, 128 + b.hi L(move_long) + cmp count, 32 + b.hi L(copy32_128) - /* Align dstend to 16 byte alignment so that we don't cross cache line - boundaries on both loads and stores. There are at least 128 bytes - to copy, so copy 16 bytes unaligned and then align. The loop - copies 64 bytes per iteration and prefetches one iteration ahead. */ + /* Small moves: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) + ldr A_q, [src] + ldr B_q, [srcend, -16] + str A_q, [dstin] + str B_q, [dstend, -16] + ret - and tmp1, dstend, 15 - ldp D_l, D_h, [srcend, -16] - sub srcend, srcend, tmp1 +L(move_long): + /* Only use backward copy if there is an overlap. */ + sub tmp1, dstin, src + cbz tmp1, L(move0) + cmp tmp1, count + b.hs L(copy_long) + + /* Large backwards copy for overlapping copies. + Copy 16 bytes and then align srcend to 16-byte alignment. */ +L(copy_long_backwards): + ldr D_q, [srcend, -16] + and tmp1, srcend, 15 + bic srcend, srcend, 15 sub count, count, tmp1 - ldp A_l, A_h, [srcend, -16] - stp D_l, D_h, [dstend, -16] - ldp B_l, B_h, [srcend, -32] - ldp C_l, C_h, [srcend, -48] - ldp D_l, D_h, [srcend, -64]! + ldp A_q, B_q, [srcend, -32] + str D_q, [dstend, -16] + ldp C_q, D_q, [srcend, -64] sub dstend, dstend, tmp1 subs count, count, 128 - b.ls 2f - - nop -1: - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [srcend, -16] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [srcend, -32] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [srcend, -48] - stp D_l, D_h, [dstend, -64]! - ldp D_l, D_h, [srcend, -64]! + b.ls L(copy64_from_start) + +L(loop64_backwards): + str B_q, [dstend, -16] + str A_q, [dstend, -32] + ldp A_q, B_q, [srcend, -96] + str D_q, [dstend, -48] + str C_q, [dstend, -64]! + ldp C_q, D_q, [srcend, -128] + sub srcend, srcend, 64 subs count, count, 64 - b.hi 1b - - /* Write the last full set of 64 bytes. The remainder is at most 64 - bytes, so it is safe to always copy 64 bytes from the start even if - there is just 1 byte left. */ -2: - ldp G_l, G_h, [src, 48] - stp A_l, A_h, [dstend, -16] - ldp A_l, A_h, [src, 32] - stp B_l, B_h, [dstend, -32] - ldp B_l, B_h, [src, 16] - stp C_l, C_h, [dstend, -48] - ldp C_l, C_h, [src] - stp D_l, D_h, [dstend, -64] - stp G_l, G_h, [dstin, 48] - stp A_l, A_h, [dstin, 32] - stp B_l, B_h, [dstin, 16] - stp C_l, C_h, [dstin] -3: ret + b.hi L(loop64_backwards) + + /* Write the last iteration and copy 64 bytes from the start. */ +L(copy64_from_start): + ldp E_q, F_q, [src, 32] + stp A_q, B_q, [dstend, -32] + ldp A_q, B_q, [src] + stp C_q, D_q, [dstend, -64] + stp E_q, F_q, [dstin, 32] + stp A_q, B_q, [dstin] +L(move0): + ret -END (MEMCPY) -libc_hidden_builtin_def (MEMCPY) +END (MEMMOVE) +libc_hidden_builtin_def (MEMMOVE) diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c index 2fafefd5d..eb6e94a00 100644 --- a/sysdeps/aarch64/multiarch/memcpy.c +++ b/sysdeps/aarch64/multiarch/memcpy.c @@ -36,7 +36,7 @@ extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; libc_ifunc (__libc_memcpy, (IS_THUNDERX (midr) ? __memcpy_thunderx - : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_ARES (midr) || IS_KUNPENG920 (midr) + : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr) ? __memcpy_falkor : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) ? __memcpy_thunderx2 diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S index 548130e41..a8ff52c07 100644 --- a/sysdeps/aarch64/strcpy.S +++ b/sysdeps/aarch64/strcpy.S @@ -234,8 +234,13 @@ L(entry_no_page_cross): #endif /* calculate the loc value */ cmeq datav.16b, datav.16b, #0 +#ifdef __AARCH64EB__ + mov data1, datav.d[1] + mov data2, datav.d[0] +#else mov data1, datav.d[0] mov data2, datav.d[1] +#endif cmp data1, 0 csel data1, data1, data2, ne mov pos, 8 diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S index 5981247dd..086a5c7e9 100644 --- a/sysdeps/aarch64/strnlen.S +++ b/sysdeps/aarch64/strnlen.S @@ -154,8 +154,13 @@ L(loop_end): byte. */ cmeq datav.16b, datav.16b, #0 +#ifdef __AARCH64EB__ + mov data1, datav.d[1] + mov data2, datav.d[0] +#else mov data1, datav.d[0] mov data2, datav.d[1] +#endif cmp data1, 0 csel data1, data1, data2, ne sub len, src, srcin diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h index 604c48917..f1feb19dc 100644 --- a/sysdeps/aarch64/sysdep.h +++ b/sysdeps/aarch64/sysdep.h @@ -45,7 +45,7 @@ #define ENTRY(name) \ .globl C_SYMBOL_NAME(name); \ .type C_SYMBOL_NAME(name),%function; \ - .align 4; \ + .p2align 6; \ C_LABEL(name) \ cfi_startproc; \ CALL_MCOUNT diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S index bf4ac7077..379bb56fc 100644 --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S @@ -268,7 +268,7 @@ ENTRY(memcpy) mov dst, dstin /* Preserve dstin, we need to return it. */ cmp count, #64 - bge .Lcpy_not_short + bhs .Lcpy_not_short /* Deal with small copies quickly by dropping straight into the exit block. */ @@ -351,10 +351,10 @@ ENTRY(memcpy) 1: subs tmp2, count, #64 /* Use tmp2 for count. */ - blt .Ltail63aligned + blo .Ltail63aligned cmp tmp2, #512 - bge .Lcpy_body_long + bhs .Lcpy_body_long .Lcpy_body_medium: /* Count in tmp2. */ #ifdef USE_VFP @@ -378,7 +378,7 @@ ENTRY(memcpy) add src, src, #64 vstr d1, [dst, #56] add dst, dst, #64 - bge 1b + bhs 1b tst tmp2, #0x3f beq .Ldone @@ -412,7 +412,7 @@ ENTRY(memcpy) ldrd A_l, A_h, [src, #64]! strd A_l, A_h, [dst, #64]! subs tmp2, tmp2, #64 - bge 1b + bhs 1b tst tmp2, #0x3f bne 1f ldr tmp2,[sp], #FRAME_SIZE @@ -482,7 +482,7 @@ ENTRY(memcpy) add src, src, #32 subs tmp2, tmp2, #prefetch_lines * 64 * 2 - blt 2f + blo 2f 1: cpy_line_vfp d3, 0 cpy_line_vfp d4, 64 @@ -494,7 +494,7 @@ ENTRY(memcpy) add dst, dst, #2 * 64 add src, src, #2 * 64 subs tmp2, tmp2, #prefetch_lines * 64 - bge 1b + bhs 1b 2: cpy_tail_vfp d3, 0 @@ -615,8 +615,8 @@ ENTRY(memcpy) 1: pld [src, #(3 * 64)] subs count, count, #64 - ldrmi tmp2, [sp], #FRAME_SIZE - bmi .Ltail63unaligned + ldrlo tmp2, [sp], #FRAME_SIZE + blo .Ltail63unaligned pld [src, #(4 * 64)] #ifdef USE_NEON @@ -633,7 +633,7 @@ ENTRY(memcpy) neon_load_multi d0-d3, src neon_load_multi d4-d7, src subs count, count, #64 - bmi 2f + blo 2f 1: pld [src, #(4 * 64)] neon_store_multi d0-d3, dst @@ -641,7 +641,7 @@ ENTRY(memcpy) neon_store_multi d4-d7, dst neon_load_multi d4-d7, src subs count, count, #64 - bpl 1b + bhs 1b 2: neon_store_multi d0-d3, dst neon_store_multi d4-d7, dst diff --git a/sysdeps/arm/be/nofpu/Implies b/sysdeps/arm/be/nofpu/Implies new file mode 100644 index 000000000..c90dd7fd5 --- /dev/null +++ b/sysdeps/arm/be/nofpu/Implies @@ -0,0 +1 @@ +arm/nofpu diff --git a/sysdeps/arm/le/nofpu/Implies b/sysdeps/arm/le/nofpu/Implies new file mode 100644 index 000000000..c90dd7fd5 --- /dev/null +++ b/sysdeps/arm/le/nofpu/Implies @@ -0,0 +1 @@ +arm/nofpu diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S index 510e8adaf..bcfbc51d9 100644 --- a/sysdeps/arm/memcpy.S +++ b/sysdeps/arm/memcpy.S @@ -68,7 +68,7 @@ ENTRY(memcpy) cfi_remember_state subs r2, r2, #4 - blt 8f + blo 8f ands ip, r0, #3 PLD( pld [r1, #0] ) bne 9f @@ -82,7 +82,7 @@ ENTRY(memcpy) cfi_rel_offset (r6, 4) cfi_rel_offset (r7, 8) cfi_rel_offset (r8, 12) - blt 5f + blo 5f CALGN( ands ip, r1, #31 ) CALGN( rsb r3, ip, #32 ) @@ -98,9 +98,9 @@ ENTRY(memcpy) #endif PLD( pld [r1, #0] ) -2: PLD( subs r2, r2, #96 ) +2: PLD( cmp r2, #96 ) PLD( pld [r1, #28] ) - PLD( blt 4f ) + PLD( blo 4f ) PLD( pld [r1, #60] ) PLD( pld [r1, #92] ) @@ -108,9 +108,7 @@ ENTRY(memcpy) 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) + bhs 3b 5: ands ip, r2, #28 rsb ip, ip, #32 @@ -222,7 +220,7 @@ ENTRY(memcpy) strbge r4, [r0], #1 subs r2, r2, ip strb lr, [r0], #1 - blt 8b + blo 8b ands ip, r1, #3 beq 1b @@ -236,7 +234,7 @@ ENTRY(memcpy) .macro forward_copy_shift pull push subs r2, r2, #28 - blt 14f + blo 14f CALGN( ands ip, r1, #31 ) CALGN( rsb ip, ip, #32 ) @@ -253,9 +251,9 @@ ENTRY(memcpy) cfi_rel_offset (r10, 16) PLD( pld [r1, #0] ) - PLD( subs r2, r2, #96 ) + PLD( cmp r2, #96 ) PLD( pld [r1, #28] ) - PLD( blt 13f ) + PLD( blo 13f ) PLD( pld [r1, #60] ) PLD( pld [r1, #92] ) @@ -280,9 +278,7 @@ ENTRY(memcpy) mov ip, ip, PULL #\pull orr ip, ip, lr, PUSH #\push stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) + bhs 12b pop {r5 - r8, r10} cfi_adjust_cfa_offset (-20) diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S index 954037ef3..0d07b76ee 100644 --- a/sysdeps/arm/memmove.S +++ b/sysdeps/arm/memmove.S @@ -85,7 +85,7 @@ ENTRY(memmove) add r1, r1, r2 add r0, r0, r2 subs r2, r2, #4 - blt 8f + blo 8f ands ip, r0, #3 PLD( pld [r1, #-4] ) bne 9f @@ -99,7 +99,7 @@ ENTRY(memmove) cfi_rel_offset (r6, 4) cfi_rel_offset (r7, 8) cfi_rel_offset (r8, 12) - blt 5f + blo 5f CALGN( ands ip, r1, #31 ) CALGN( sbcsne r4, ip, r2 ) @ C is always set here @@ -114,9 +114,9 @@ ENTRY(memmove) #endif PLD( pld [r1, #-4] ) -2: PLD( subs r2, r2, #96 ) +2: PLD( cmp r2, #96 ) PLD( pld [r1, #-32] ) - PLD( blt 4f ) + PLD( blo 4f ) PLD( pld [r1, #-64] ) PLD( pld [r1, #-96] ) @@ -124,9 +124,7 @@ ENTRY(memmove) 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} subs r2, r2, #32 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} - bge 3b - PLD( cmn r2, #96 ) - PLD( bge 4b ) + bhs 3b 5: ands ip, r2, #28 rsb ip, ip, #32 @@ -237,7 +235,7 @@ ENTRY(memmove) strbge r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! - blt 8b + blo 8b ands ip, r1, #3 beq 1b @@ -251,7 +249,7 @@ ENTRY(memmove) .macro backward_copy_shift push pull subs r2, r2, #28 - blt 14f + blo 14f CALGN( ands ip, r1, #31 ) CALGN( rsb ip, ip, #32 ) @@ -268,9 +266,9 @@ ENTRY(memmove) cfi_rel_offset (r10, 16) PLD( pld [r1, #-4] ) - PLD( subs r2, r2, #96 ) + PLD( cmp r2, #96 ) PLD( pld [r1, #-32] ) - PLD( blt 13f ) + PLD( blo 13f ) PLD( pld [r1, #-64] ) PLD( pld [r1, #-96] ) @@ -295,9 +293,7 @@ ENTRY(memmove) mov r4, r4, PUSH #\push orr r4, r4, r3, PULL #\pull stmdb r0!, {r4 - r8, r10, ip, lr} - bge 12b - PLD( cmn r2, #96 ) - PLD( bge 13b ) + bhs 12b pop {r5 - r8, r10} cfi_adjust_cfa_offset (-20) diff --git a/sysdeps/generic/unwind-arch.h b/sysdeps/generic/unwind-arch.h new file mode 100644 index 000000000..d712e5e11 --- /dev/null +++ b/sysdeps/generic/unwind-arch.h @@ -0,0 +1,30 @@ +/* Return backtrace of current program state. Arch-specific bits. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _UNWIND_ARCH_H +#define _UNWIND_ARCH_H + +#include + +static inline void * +unwind_arch_adjustment (void *prev, void *addr) +{ + return addr; +} + +#endif diff --git a/sysdeps/gnu/Makefile b/sysdeps/gnu/Makefile index 97fcb6fb9..26dc91d90 100644 --- a/sysdeps/gnu/Makefile +++ b/sysdeps/gnu/Makefile @@ -54,8 +54,7 @@ $(objpfx)errlist-compat.h: $(objpfx)errlist-compat.c generated += errlist-compat.c errlist-compat.h # This will force the generation above to happy if need be. -$(foreach o,$(object-suffixes) $(object-suffixes:=.d),\ - $(objpfx)errlist$o): $(objpfx)errlist-compat.h +$(foreach o,$(object-suffixes),$(objpfx)errlist$o): $(objpfx)errlist-compat.h endif ifeq ($(subdir),login) diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c index 0a3739728..25ca8f846 100644 --- a/sysdeps/hppa/dl-fptr.c +++ b/sysdeps/hppa/dl-fptr.c @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp) } install: - fdesc->ip = ip; fdesc->gp = gp; + fdesc->ip = ip; return (ElfW(Addr)) fdesc; } @@ -350,7 +350,9 @@ ElfW(Addr) _dl_lookup_address (const void *address) { ElfW(Addr) addr = (ElfW(Addr)) address; - unsigned int *desc, *gptr; + ElfW(Word) reloc_arg; + volatile unsigned int *desc; + unsigned int *gptr; /* Return ADDR if the least-significant two bits of ADDR are not consistent with ADDR being a linker defined function pointer. The normal value for @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address) if (!_dl_read_access_allowed (desc)) return addr; - /* Load first word of candidate descriptor. It should be a pointer + /* First load the relocation offset. */ + reloc_arg = (ElfW(Word)) desc[1]; + atomic_full_barrier(); + + /* Then load first word of candidate descriptor. It should be a pointer with word alignment and point to memory that can be read. */ gptr = (unsigned int *) desc[0]; if (((unsigned int) gptr & 3) != 0 @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address) /* See if descriptor requires resolution. The following trampoline is used in each global offset table for function resolution: - ldw 0(r20),r22 - bv r0(r22) + ldw 0(r20),r21 + bv r0(r21) ldw 4(r20),r21 tramp: b,l .-12,r20 depwi 0,31,2,r20 @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address) if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */ && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */ && (ElfW(Addr)) gptr[2] == elf_machine_resolve ()) - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]); + { + struct link_map *l = (struct link_map *) gptr[5]; + + /* If gp has been resolved, we need to hunt for relocation offset. */ + if (!(reloc_arg & PA_GP_RELOC)) + reloc_arg = _dl_fix_reloc_arg (addr, l); + + _dl_fixup (l, reloc_arg); + } return (ElfW(Addr)) desc[0]; } diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h index 9e98366ea..8ecff9770 100644 --- a/sysdeps/hppa/dl-machine.h +++ b/sysdeps/hppa/dl-machine.h @@ -48,6 +48,14 @@ #define GOT_FROM_PLT_STUB (4*4) #define PLT_ENTRY_SIZE (2*4) +/* The gp slot in the function descriptor contains the relocation offset + before resolution. To distinguish between a resolved gp value and an + unresolved relocation offset we set an unused bit in the relocation + offset. This would allow us to do a synchronzied two word update + using this bit (interlocked update), but instead of waiting for the + update we simply recompute the gp value given that we know the ip. */ +#define PA_GP_RELOC 1 + /* Initialize the function descriptor table before relocations */ static inline void __hppa_init_bootstrap_fdesc_table (struct link_map *map) @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, volatile Elf32_Addr *rfdesc = reloc_addr; /* map is the link_map for the caller, t is the link_map for the object being called */ - rfdesc[1] = value.gp; - /* Need to ensure that the gp is visible before the code - entry point is updated */ - rfdesc[0] = value.ip; + + /* We would like the function descriptor to be double word aligned. This + helps performance (ip and gp then reside on the same cache line) and + we can update the pair atomically with a single store. The linker + now ensures this alignment but we still have to handle old code. */ + if ((unsigned int)reloc_addr & 7) + { + /* Need to ensure that the gp is visible before the code + entry point is updated */ + rfdesc[1] = value.gp; + atomic_full_barrier(); + rfdesc[0] = value.ip; + } + else + { + /* Update pair atomically with floating point store. */ + union { ElfW(Word) v[2]; double d; } u; + + u.v[0] = value.ip; + u.v[1] = value.gp; + *(volatile double *)rfdesc = u.d; + } return value; } @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) here. The trampoline code will load the proper LTP and pass the reloc offset to the fixup function. */ - fptr->gp = iplt - jmprel; + fptr->gp = (iplt - jmprel) | PA_GP_RELOC; } /* r_sym != 0 */ else { diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c new file mode 100644 index 000000000..885a3f183 --- /dev/null +++ b/sysdeps/hppa/dl-runtime.c @@ -0,0 +1,58 @@ +/* On-demand PLT fixup for shared objects. HPPA version. + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* Clear PA_GP_RELOC bit in relocation offset. */ +#define reloc_offset (reloc_arg & ~PA_GP_RELOC) +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL) + +#include + +/* The caller has encountered a partially relocated function descriptor. + The gp of the descriptor has been updated, but not the ip. We find + the function descriptor again and compute the relocation offset and + return that to the caller. The caller will continue on to call + _dl_fixup with the relocation offset. */ + +ElfW(Word) +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) +{ + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; + const Elf32_Rela *reloc; + + l_addr = l->l_addr; + jmprel = D_PTR(l, l_info[DT_JMPREL]); + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val; + + /* Look for the entry... */ + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela)) + { + reloc = (const Elf32_Rela *) iplt; + r_type = ELF32_R_TYPE (reloc->r_info); + + if (__builtin_expect (r_type == R_PARISC_IPLT, 1) + && fptr == (struct fdesc *) (reloc->r_offset + l_addr)) + /* Found entry. Return the reloc offset. */ + return iplt - jmprel; + } + + /* Crash if we weren't passed a valid function pointer. */ + ABORT_INSTRUCTION; + return 0; +} diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S index 0114ca8b1..d0804b30c 100644 --- a/sysdeps/hppa/dl-trampoline.S +++ b/sysdeps/hppa/dl-trampoline.S @@ -31,7 +31,7 @@ slow down __cffc when it attempts to call fixup to resolve function descriptor references. Please refer to gcc/gcc/config/pa/fptr.c - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */ + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */ /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */ .text @@ -61,17 +61,20 @@ _dl_runtime_resolve: copy %sp, %r1 /* Copy previous sp */ /* Save function result address (on entry) */ stwm %r28,128(%sp) - /* Fillin some frame info to follow ABI */ + /* Fill in some frame info to follow ABI */ stw %r1,-4(%sp) /* Previous sp */ stw %r21,-32(%sp) /* PIC register value */ /* Save input floating point registers. This must be done in the new frame since the previous frame doesn't have enough space */ - ldo -56(%sp),%r1 + ldo -64(%sp),%r1 fstd,ma %fr4,-8(%r1) fstd,ma %fr5,-8(%r1) fstd,ma %fr6,-8(%r1) + + /* Test PA_GP_RELOC bit. */ + bb,>= %r19,31,2f /* branch if not reloc offset */ fstd,ma %fr7,-8(%r1) /* Set up args to fixup func, needs only two arguments */ @@ -79,7 +82,7 @@ _dl_runtime_resolve: copy %r19,%r25 /* (2) reloc offset */ /* Call the real address resolver. */ - bl _dl_fixup,%rp +3: bl _dl_fixup,%rp copy %r21,%r19 /* set fixup func ltp */ /* While the linker will set a function pointer to NULL when it @@ -102,7 +105,7 @@ _dl_runtime_resolve: copy %r29, %r19 /* Reload arguments fp args */ - ldo -56(%sp),%r1 + ldo -64(%sp),%r1 fldd,ma -8(%r1),%fr4 fldd,ma -8(%r1),%fr5 fldd,ma -8(%r1),%fr6 @@ -129,6 +132,25 @@ _dl_runtime_resolve: bv %r0(%rp) ldo -128(%sp),%sp +2: + /* Set up args for _dl_fix_reloc_arg. */ + copy %r22,%r26 /* (1) function pointer */ + depi 0,31,2,%r26 /* clear least significant bits */ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + /* Save ltp and link map arg for _dl_fixup. */ + stw %r21,-56(%sp) /* ltp */ + stw %r25,-60(%sp) /* struct link map */ + + /* Find reloc offset. */ + bl _dl_fix_reloc_arg,%rp + copy %r21,%r19 /* set func ltp */ + + /* Set up args for _dl_fixup. */ + ldw -56(%sp),%r21 /* ltp */ + ldw -60(%sp),%r26 /* (1) struct link map */ + b 3b + copy %ret0,%r25 /* (2) reloc offset */ .EXIT .PROCEND cfi_endproc @@ -153,7 +175,7 @@ _dl_runtime_profile: copy %sp, %r1 /* Copy previous sp */ /* Save function result address (on entry) */ stwm %r28,192(%sp) - /* Fillin some frame info to follow ABI */ + /* Fill in some frame info to follow ABI */ stw %r1,-4(%sp) /* Previous sp */ stw %r21,-32(%sp) /* PIC register value */ @@ -181,10 +203,11 @@ _dl_runtime_profile: fstd,ma %fr5,8(%r1) fstd,ma %fr6,8(%r1) fstd,ma %fr7,8(%r1) - /* 32-bit stack pointer and return register */ - stw %sp,-56(%sp) - stw %r2,-52(%sp) + /* Test PA_GP_RELOC bit. */ + bb,>= %r19,31,2f /* branch if not reloc offset */ + /* 32-bit stack pointer */ + stw %sp,-56(%sp) /* Set up args to fixup func, needs five arguments */ ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */ @@ -197,7 +220,7 @@ _dl_runtime_profile: stw %r1, -52(%sp) /* (5) long int *framesizep */ /* Call the real address resolver. */ - bl _dl_profile_fixup,%rp +3: bl _dl_profile_fixup,%rp copy %r21,%r19 /* set fixup func ltp */ /* Load up the returned function descriptor */ @@ -215,7 +238,9 @@ _dl_runtime_profile: fldd,ma 8(%r1),%fr5 fldd,ma 8(%r1),%fr6 fldd,ma 8(%r1),%fr7 - ldw -52(%sp),%rp + + /* Reload rp register -(192+20) without adjusting stack */ + ldw -212(%sp),%rp /* Reload static link register -(192+16) without adjusting stack */ ldw -208(%sp),%r29 @@ -303,6 +328,33 @@ L(cont): ldw -20(%sp),%rp /* Return */ bv,n 0(%r2) + +2: + /* Set up args for _dl_fix_reloc_arg. */ + copy %r22,%r26 /* (1) function pointer */ + depi 0,31,2,%r26 /* clear least significant bits */ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + /* Save ltp and link map arg for _dl_fixup. */ + stw %r21,-92(%sp) /* ltp */ + stw %r25,-116(%sp) /* struct link map */ + + /* Find reloc offset. */ + bl _dl_fix_reloc_arg,%rp + copy %r21,%r19 /* set func ltp */ + + /* Restore fixup ltp. */ + ldw -92(%sp),%r21 /* ltp */ + + /* Set up args to fixup func, needs five arguments */ + ldw -116(%sp),%r26 /* (1) struct link map */ + copy %ret0,%r25 /* (2) reloc offset */ + stw %r25,-120(%sp) /* Save reloc offset */ + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */ + ldo -56(%sp),%r23 /* (4) La_hppa_regs */ + ldo -112(%sp), %r1 + b 3b + stw %r1, -52(%sp) /* (5) long int *framesizep */ .EXIT .PROCEND cfi_endproc diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h index 8af0789a9..4334ade2a 100644 --- a/sysdeps/i386/dl-machine.h +++ b/sysdeps/i386/dl-machine.h @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, { # ifndef RTLD_BOOTSTRAP if (sym_map != map - && sym_map->l_type != lt_executable && !sym_map->l_relocated) { const char *strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ + if (sym_map->l_type == lt_executable) + _dl_fatal_printf ("\ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ +and creates an unsatisfiable circular dependency.\n", + RTLD_PROGNAME, strtab + refsym->st_name, + map->l_name); + else + _dl_error_printf ("\ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - RTLD_PROGNAME, map->l_name, - sym_map->l_name, - strtab + refsym->st_name); + RTLD_PROGNAME, map->l_name, + sym_map->l_name, + strtab + refsym->st_name); } # endif value = ((Elf32_Addr (*) (void)) value) (); diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h index b4bcd8fb6..6094af8fe 100644 --- a/sysdeps/i386/sysdep.h +++ b/sysdeps/i386/sysdep.h @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \ # define SETUP_PIC_REG(reg) \ .ifndef GET_PC_THUNK(reg); \ - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \ + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \ .globl GET_PC_THUNK(reg); \ .hidden GET_PC_THUNK(reg); \ .p2align 4; \ @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \ # define SETUP_PIC_REG_STR(reg) \ ".ifndef " GET_PC_THUNK_STR (reg) "\n" \ - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \ + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \ + GET_PC_THUNK_STR (reg) ",comdat\n" \ ".globl " GET_PC_THUNK_STR (reg) "\n" \ ".hidden " GET_PC_THUNK_STR (reg) "\n" \ ".p2align 4\n" \ diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile index 995e90d6d..6030adf7e 100644 --- a/sysdeps/ieee754/ldbl-96/Makefile +++ b/sysdeps/ieee754/ldbl-96/Makefile @@ -17,5 +17,8 @@ # . ifeq ($(subdir),math) -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo +ifeq ($(have-ssp),yes) +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all endif +endif # $(subdir) == math diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c index 5f742321a..bcdf20179 100644 --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y) return 0; } + if ((i0 & 0x80000000) == 0) + { + /* Pseudo-zero and unnormal representations are not valid + representations of long double. We need to avoid stack + corruption in __kernel_rem_pio2, which expects input in a + particular normal form, but those representations do not need + to be consistently handled like any particular floating-point + value. */ + y[1] = y[0] = __builtin_nanl (""); + return 0; + } + /* Split the 64 bits of the mantissa into three 24-bit integers stored in a double array. */ exp = j0 - 23; diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c new file mode 100644 index 000000000..f59b97769 --- /dev/null +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c @@ -0,0 +1,41 @@ +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +static int +do_test (void) +{ + for (int i = 0; i < 64; i++) + { + uint64_t sig = i == 63 ? 0 : 1ULL << i; + long double ld; + SET_LDOUBLE_WORDS (ld, 0x4141, + sig >> 32, sig & 0xffffffffULL); + /* The requirement is that no stack overflow occurs when the + pseudo-zero or unnormal goes through range reduction. */ + volatile long double ldr; + ldr = sinl (ld); + (void) ldr; + } + return 0; +} + +#include diff --git a/sysdeps/posix/getcwd.c b/sysdeps/posix/getcwd.c index f00b337a1..839d78d7b 100644 --- a/sysdeps/posix/getcwd.c +++ b/sysdeps/posix/getcwd.c @@ -241,6 +241,14 @@ __getcwd (char *buf, size_t size) char *path; #ifndef NO_ALLOCATION size_t allocated = size; + + /* A size of 1 byte is never useful. */ + if (allocated == 1) + { + __set_errno (ERANGE); + return NULL; + } + if (size == 0) { if (buf != NULL) diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c index e613e6a34..a03f478fc 100644 --- a/sysdeps/posix/system.c +++ b/sysdeps/posix/system.c @@ -101,7 +101,8 @@ cancel_handler (void *arg) static int do_system (const char *line) { - int status; + int status = -1; + int ret; pid_t pid; struct sigaction sa; #ifndef _LIBC_REENTRANT @@ -144,14 +145,14 @@ do_system (const char *line) __posix_spawnattr_setflags (&spawn_attr, POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK); - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, - (char *const[]){ (char*) SHELL_NAME, - (char*) "-c", - (char *) line, NULL }, - __environ); + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, + (char *const[]){ (char *) SHELL_NAME, + (char *) "-c", + (char *) line, NULL }, + __environ); __posix_spawnattr_destroy (&spawn_attr); - if (status == 0) + if (ret == 0) { /* Cancellation results in cleanup handlers running as exceptions in the block where they were installed, so it is safe to reference @@ -186,6 +187,9 @@ do_system (const char *line) } DO_UNLOCK (); + if (ret != 0) + __set_errno (ret); + return status; } diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h index 2ba009e91..829eec266 100644 --- a/sysdeps/powerpc/powerpc32/sysdep.h +++ b/sysdeps/powerpc/powerpc32/sysdep.h @@ -179,8 +179,8 @@ GOT_LABEL: ; \ #else /* Position-dependent code does not require access to the GOT. */ # define __GLRO(rOUT, rGOT, member, offset) \ - lis rOUT,(member+LOWORD)@ha; \ - lwz rOUT,(member+LOWORD)@l(rOUT) + lis rOUT,(member)@ha; \ + lwz rOUT,(member)@l(rOUT) #endif /* PIC */ #endif /* __ASSEMBLER__ */ diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c index 8a53a1088..362a2b713 100644 --- a/sysdeps/powerpc/powerpc64/backtrace.c +++ b/sysdeps/powerpc/powerpc64/backtrace.c @@ -54,11 +54,22 @@ struct signal_frame_64 { /* We don't care about the rest, since the IP value is at 'uc' field. */ }; +/* Test if the address match to the inside the trampoline code. + Up to and including kernel 5.8, returning from an interrupt or syscall to a + signal handler starts execution directly at the handler's entry point, with + LR set to address of the sigreturn trampoline (the vDSO symbol). + Newer kernels will branch to signal handler from the trampoline instead, so + checking the stacktrace against the vDSO entrypoint does not work in such + case. + The vDSO branches with a 'bctrl' instruction, so checking either the + vDSO address itself and the next instruction should cover all kernel + versions. */ static inline bool is_sigtramp_address (void *nip) { #ifdef HAVE_SIGTRAMP_RT64 - if (nip == GLRO (dl_vdso_sigtramp_rt64)) + if (nip == GLRO (dl_vdso_sigtramp_rt64) || + nip == GLRO (dl_vdso_sigtramp_rt64) + 4) return true; #endif return false; diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure index fa46e9e35..e7f576338 100644 --- a/sysdeps/s390/configure +++ b/sysdeps/s390/configure @@ -123,7 +123,9 @@ void testinsn (char *buf) __asm__ (".machine \"arch13\" \n\t" ".machinemode \"zarch_nohighgprs\" \n\t" "lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c @@ -271,7 +273,9 @@ else void testinsn (char *buf) { __asm__ ("lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac index 3ed5a8ef8..5c3479e8c 100644 --- a/sysdeps/s390/configure.ac +++ b/sysdeps/s390/configure.ac @@ -88,7 +88,9 @@ void testinsn (char *buf) __asm__ (".machine \"arch13\" \n\t" ".machinemode \"zarch_nohighgprs\" \n\t" "lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF dnl test, if assembler supports S390 arch13 instructions @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF void testinsn (char *buf) { __asm__ ("lghi %%r0,16 \n\t" - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + "mvcrl 0(%0),32(%0) \n\t" + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + : : "a" (buf) : "memory", "r0"); } EOF dnl test, if assembler supports S390 arch13 zarch instructions as default diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c index 5fc85e129..ee59b5de1 100644 --- a/sysdeps/s390/memmove.c +++ b/sysdeps/s390/memmove.c @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; s390_libc_ifunc_expr (__redirect_memmove, memmove, ({ s390_libc_ifunc_expr_stfle_init (); - (HAVE_MEMMOVE_ARCH13 + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) && S390_IS_ARCH13_MIE3 (stfle_bits)) ? MEMMOVE_ARCH13 : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index e6195c6e2..17c0cc395 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, memmove, # if HAVE_MEMMOVE_ARCH13 IFUNC_IMPL_ADD (array, i, memmove, - S390_IS_ARCH13_MIE3 (stfle_bits), + ((dl_hwcap & HWCAP_S390_VXRS_EXT2) + && S390_IS_ARCH13_MIE3 (stfle_bits)), MEMMOVE_ARCH13) # endif # if HAVE_MEMMOVE_Z13 diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies new file mode 100644 index 000000000..71b28ee1a --- /dev/null +++ b/sysdeps/sh/be/sh4/fpu/Implies @@ -0,0 +1 @@ +sh/sh4/fpu diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies new file mode 100644 index 000000000..71b28ee1a --- /dev/null +++ b/sysdeps/sh/le/sh4/fpu/Implies @@ -0,0 +1 @@ +sh/sh4/fpu diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh index c07626677..4f6c3490a 100644 --- a/sysdeps/unix/make-syscalls.sh +++ b/sysdeps/unix/make-syscalls.sh @@ -30,6 +30,7 @@ # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction) # s: non-NULL string (e.g., 1st arg to open) # S: optionally-NULL string (e.g., 1st arg to acct) +# U: unsigned long int (32-bit types are zero-extended to 64-bit types) # v: vararg scalar (e.g., optional 3rd arg to open) # V: byte-per-page vector (3rd arg to mincore) # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4) @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do ?:?????????) nargs=9;; esac + # Derive the unsigned long int arguments from the argument signature + ulong_arg_1=0 + ulong_arg_2=0 + ulong_count=0 + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U) + do + ulong_count=$(expr $ulong_count + 1) + ulong_arg=$(echo $U | sed -e "s/:U//") + case $ulong_count in + 1) + ulong_arg_1=$ulong_arg + ;; + 2) + ulong_arg_2=$ulong_arg + ;; + *) + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)" + exit 2 + esac + done + # Make sure only the first syscall rule is used, if multiple dirs # define the same syscall. echo '' @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do \$(make-target-directory) (echo '#define SYSCALL_NAME $syscall'; \\ echo '#define SYSCALL_NARGS $nargs'; \\ + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\ + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\ echo '#define SYSCALL_SYMBOL $strong'; \\ echo '#define SYSCALL_NOERRNO $noerrno'; \\ echo '#define SYSCALL_ERRVAL $errval'; \\ diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S index cf6c7a58f..f807a8603 100644 --- a/sysdeps/unix/syscall-template.S +++ b/sysdeps/unix/syscall-template.S @@ -25,6 +25,12 @@ defining a few macros: SYSCALL_NAME syscall name SYSCALL_NARGS number of arguments this call takes + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this + call takes. 0 means that there are no + unsigned long int arguments. + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this + call takes. 0 means that there is at most + one unsigned long int argument. SYSCALL_SYMBOL primary symbol name SYSCALL_NOERRNO 1 to define a no-errno version (see below) SYSCALL_ERRVAL 1 to define an error-value version (see below) @@ -44,9 +50,31 @@ /* This indirection is needed so that SYMBOL gets macro-expanded. */ #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL) -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N) -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N) -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N) +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros + have 2 extra arguments for unsigned long int arguments: + Extra argument 1: Position of the first unsigned long int argument. + Extra argument 2: Position of the second unsigned long int argument. + */ +#ifndef PSEUDOS_HAVE_ULONG_INDICES +# undef SYSCALL_ULONG_ARG_1 +# define SYSCALL_ULONG_ARG_1 0 +#endif + +#if SYSCALL_ULONG_ARG_1 +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \ + PSEUDO (SYMBOL, NAME, N, U1, U2) +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \ + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2) +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \ + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2) +#else +# define T_PSEUDO(SYMBOL, NAME, N) \ + PSEUDO (SYMBOL, NAME, N) +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \ + PSEUDO_NOERRNO (SYMBOL, NAME, N) +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \ + PSEUDO_ERRVAL (SYMBOL, NAME, N) +#endif #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL) #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL) #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL) @@ -56,7 +84,12 @@ /* This kind of system call stub never returns an error. We return the return value register to the caller unexamined. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret_NOERRNO T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) value, or zero for success. We may massage the kernel's return value to meet that ABI, but we never set errno here. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret_ERRVAL T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) /* This is a "normal" system call stub: if there is an error, it returns -1 and sets errno. */ +# if SYSCALL_ULONG_ARG_1 +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) +# else T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) +# endif ret T_PSEUDO_END (SYSCALL_SYMBOL) diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list index e28e801c7..6b22b2cb4 100644 --- a/sysdeps/unix/syscalls.list +++ b/sysdeps/unix/syscalls.list @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill link - link i:ss __link link listen - listen i:ii __listen listen lseek - lseek i:iii __libc_lseek __lseek lseek -madvise - madvise i:pii __madvise madvise +madvise - madvise i:pUi __madvise madvise mkdir - mkdir i:si __mkdir mkdir -mmap - mmap b:aniiii __mmap mmap -mprotect - mprotect i:aii __mprotect mprotect -munmap - munmap i:ai __munmap munmap +mmap - mmap b:aUiiii __mmap mmap +mprotect - mprotect i:aUi __mprotect mprotect +munmap - munmap i:aU __munmap munmap open - open Ci:siv __libc_open __open open profil - profil i:piii __profil profil ptrace - ptrace i:iiii ptrace -read - read Ci:ibn __libc_read __read read -readlink - readlink i:spi __readlink readlink +read - read Ci:ibU __libc_read __read read +readlink - readlink i:spU __readlink readlink readv - readv Ci:ipi __readv readv reboot - reboot i:i reboot -recv - recv Ci:ibni __libc_recv recv -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom +recv - recv Ci:ibUi __libc_recv recv +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg rename - rename i:ss rename rmdir - rmdir i:s __rmdir rmdir select - select Ci:iPPPP __select __libc_select select -send - send Ci:ibni __libc_send __send send +send - send Ci:ibUi __libc_send __send send sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto setdomain - setdomainname i:si setdomainname setegid - setegid i:i __setegid setegid seteuid - seteuid i:i __seteuid seteuid @@ -94,5 +94,5 @@ uname - uname i:p __uname uname unlink - unlink i:s __unlink unlink utimes - utimes i:sp __utimes utimes vhangup - vhangup i:i vhangup -write - write Ci:ibn __libc_write __write write +write - write Ci:ibU __libc_write __write write writev - writev Ci:ipi __writev writev diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile index f12b7b1a2..0a0da0015 100644 --- a/sysdeps/unix/sysv/linux/Makefile +++ b/sysdeps/unix/sysv/linux/Makefile @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \ setfsuid setfsgid epoll_pwait signalfd \ eventfd eventfd_read eventfd_write prlimit \ personality epoll_wait tee vmsplice splice \ - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \ + prctl \ + process_vm_readv process_vm_writev CFLAGS-gethostid.c = -fexceptions CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables @@ -273,7 +275,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \ sysdep_headers += bits/fcntl-linux.h -tests += tst-fallocate tst-fallocate64 +tests += tst-fallocate tst-fallocate64 tst-getcwd-smallbuff endif ifeq ($(subdir),elf) diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h index 937838774..c8471947b 100644 --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h @@ -17,6 +17,7 @@ #define __NR_clock_nanosleep 115 #define __NR_clock_settime 112 #define __NR_clone 220 +#define __NR_clone3 435 #define __NR_close 57 #define __NR_connect 203 #define __NR_copy_file_range 285 diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h index 1389cea1b..346d045fb 100644 --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h @@ -51,8 +51,12 @@ #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ && MIDR_PARTNUM(midr) == 0x000) -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - && MIDR_PARTNUM(midr) == 0xd0c) +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd0c) +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd49) +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd40) #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ && MIDR_PARTNUM(midr) == 0x000) diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data index a60053b91..08af68b5e 100644 --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data @@ -7,6 +7,9 @@ libc.so: malloc libc.so: memalign libc.so: realloc libm.so: matherr +# If outline atomics are used, libgcc (built outside of glibc) may +# call __getauxval using the PLT. +libc.so: __getauxval ? # The dynamic loader needs __tls_get_addr for TLS. ld.so: __tls_get_addr # The main malloc is interposed into the dynamic linker, for diff --git a/sysdeps/unix/sysv/linux/getpt.c b/sysdeps/unix/sysv/linux/getpt.c index 1803b232c..3cc745e11 100644 --- a/sysdeps/unix/sysv/linux/getpt.c +++ b/sysdeps/unix/sysv/linux/getpt.c @@ -16,69 +16,18 @@ License along with the GNU C Library; if not, see . */ -#include #include -#include #include #include -#include - -#include "linux_fsinfo.h" /* Path to the master pseudo terminal cloning device. */ #define _PATH_DEVPTMX _PATH_DEV "ptmx" -/* Directory containing the UNIX98 pseudo terminals. */ -#define _PATH_DEVPTS _PATH_DEV "pts" - -/* Prototype for function that opens BSD-style master pseudo-terminals. */ -extern int __bsd_getpt (void) attribute_hidden; /* Open a master pseudo terminal and return its file descriptor. */ int __posix_openpt (int oflag) { - static int have_no_dev_ptmx; - int fd; - - if (!have_no_dev_ptmx) - { - fd = __open (_PATH_DEVPTMX, oflag); - if (fd != -1) - { - struct statfs fsbuf; - static int devpts_mounted; - - /* Check that the /dev/pts filesystem is mounted - or if /dev is a devfs filesystem (this implies /dev/pts). */ - if (devpts_mounted - || (__statfs (_PATH_DEVPTS, &fsbuf) == 0 - && fsbuf.f_type == DEVPTS_SUPER_MAGIC) - || (__statfs (_PATH_DEV, &fsbuf) == 0 - && fsbuf.f_type == DEVFS_SUPER_MAGIC)) - { - /* Everything is ok. */ - devpts_mounted = 1; - return fd; - } - - /* If /dev/pts is not mounted then the UNIX98 pseudo terminals - are not usable. */ - __close (fd); - have_no_dev_ptmx = 1; - __set_errno (ENOENT); - } - else - { - if (errno == ENOENT || errno == ENODEV) - have_no_dev_ptmx = 1; - else - return -1; - } - } - else - __set_errno (ENOENT); - - return -1; + return __open (_PATH_DEVPTMX, oflag); } weak_alias (__posix_openpt, posix_openpt) @@ -86,16 +35,6 @@ weak_alias (__posix_openpt, posix_openpt) int __getpt (void) { - int fd = __posix_openpt (O_RDWR); - if (fd == -1) - fd = __bsd_getpt (); - return fd; + return __posix_openpt (O_RDWR); } - - -#define PTYNAME1 "pqrstuvwxyzabcde"; -#define PTYNAME2 "0123456789abcdef"; - -#define __getpt __bsd_getpt -#define HAVE_POSIX_OPENPT -#include +weak_alias (__getpt, getpt) diff --git a/sysdeps/unix/sysv/linux/grantpt.c b/sysdeps/unix/sysv/linux/grantpt.c index 2030e07fa..43122f9a7 100644 --- a/sysdeps/unix/sysv/linux/grantpt.c +++ b/sysdeps/unix/sysv/linux/grantpt.c @@ -1,44 +1,41 @@ -#include -#include -#include -#include -#include -#include -#include -#include +/* grantpt implementation for Linux. + Copyright (C) 1998-2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Zack Weinberg , 1998. -#include + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. -#include "pty-private.h" + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. -#if HAVE_PT_CHOWN -/* Close all file descriptors except the one specified. */ -static void -close_all_fds (void) -{ - DIR *dir = __opendir ("/proc/self/fd"); - if (dir != NULL) - { - struct dirent64 *d; - while ((d = __readdir64 (dir)) != NULL) - if (isdigit (d->d_name[0])) - { - char *endp; - long int fd = strtol (d->d_name, &endp, 10); - if (*endp == '\0' && fd != PTY_FILENO && fd != dirfd (dir)) - __close_nocancel_nostatus (fd); - } + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include - __closedir (dir); +int +grantpt (int fd) +{ + /* Without pt_chown on Linux, we have delegated the creation of the + pty node with the right group and permission mode to the kernel, and + non-root users are unlikely to be able to change it. Therefore let's + consider that POSIX enforcement is the responsibility of the whole + system and not only the GNU libc. */ - int nullfd = __open_nocancel (_PATH_DEVNULL, O_RDONLY); - assert (nullfd == STDIN_FILENO); - nullfd = __open_nocancel (_PATH_DEVNULL, O_WRONLY); - assert (nullfd == STDOUT_FILENO); - __dup2 (STDOUT_FILENO, STDERR_FILENO); - } + /* Verify that fd refers to a ptmx descriptor. */ + unsigned int ptyno; + int ret = __ioctl (fd, TIOCGPTN, &ptyno); + if (ret != 0 && errno == ENOTTY) + /* POSIX requires EINVAL instead of ENOTTY provided by the kernel. */ + __set_errno (EINVAL); + return ret; } -# define CLOSE_ALL_FDS() close_all_fds() -#endif - -#include diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h index 9d8ffbe86..bf61b66b7 100644 --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t; typedef intmax_t atomic_max_t; typedef uintmax_t uatomic_max_t; +#define atomic_full_barrier() __sync_synchronize () + #define __HAVE_64B_ATOMICS 0 #define USE_ATOMIC_COMPILER_BUILTINS 0 +/* We use the compiler atomic load and store builtins as the generic + defines are not atomic. In particular, we need to use compare and + exchange for stores as the implementation is synthesized. */ +void __atomic_link_error (void); +#define __atomic_check_size_ls(mem) \ + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ + __atomic_link_error (); + +#define atomic_load_relaxed(mem) \ + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_RELAXED); }) +#define atomic_load_acquire(mem) \ + ({ __atomic_check_size_ls((mem)); \ + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) + +#define atomic_store_relaxed(mem, val) \ + do { \ + __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ + } while (0) +#define atomic_store_release(mem, val) \ + do { \ + __atomic_check_size_ls((mem)); \ + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ + } while (0) + /* XXX Is this actually correct? */ #define ATOMIC_EXCHANGE_USES_CAS 1 diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h index ed873d9dd..796663a23 100644 --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall0(name,dummy) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r12) \ @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall1(name,arg1) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ + long int __arg1 = (long int) (arg1); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r12) \ @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall2(name,arg1,arg2) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r12) \ @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall3(name,arg1,arg2,arg3) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \ @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall4(name,arg1,arg2,arg3,arg4) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \ @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ - register long __r9 __asm__("r9") = (long)(arg5); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + long int __arg5 = (long int) (arg5); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ + register long int __r9 __asm__("r9") = __arg5; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \ @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \ # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \ ({ \ - register long __ret __asm__("r3"); \ - register long __r12 __asm__("r12") = name; \ - register long __r5 __asm__("r5") = (long)(arg1); \ - register long __r6 __asm__("r6") = (long)(arg2); \ - register long __r7 __asm__("r7") = (long)(arg3); \ - register long __r8 __asm__("r8") = (long)(arg4); \ - register long __r9 __asm__("r9") = (long)(arg5); \ - register long __r10 __asm__("r10") = (long)(arg6); \ + long int __arg1 = (long int) (arg1); \ + long int __arg2 = (long int) (arg2); \ + long int __arg3 = (long int) (arg3); \ + long int __arg4 = (long int) (arg4); \ + long int __arg5 = (long int) (arg5); \ + long int __arg6 = (long int) (arg6); \ + register long int __ret __asm__("r3"); \ + register long int __r12 __asm__("r12") = name; \ + register long int __r5 __asm__("r5") = __arg1; \ + register long int __r6 __asm__("r6") = __arg2; \ + register long int __r7 __asm__("r7") = __arg3; \ + register long int __r8 __asm__("r8") = __arg4; \ + register long int __r9 __asm__("r9") = __arg5; \ + register long int __r10 __asm__("r10") = __arg6; \ __asm__ __volatile__( "brki r14,8; nop;" \ : "=r"(__ret) \ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S index b2bbf1018..ff445a540 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S @@ -22,9 +22,9 @@ .text .set nomips16 -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4, - long arg5, - long number) */ +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, + long int number) */ ENTRY(__mips_syscall5) lw v0, 20(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S index 572d7c113..2b4a3117d 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S @@ -22,9 +22,9 @@ .text .set nomips16 -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4, - long arg5, long arg6, - long number) */ +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, long int arg6, + long int number) */ ENTRY(__mips_syscall6) lw v0, 24(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S index 05164cb25..2723bbb13 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S @@ -22,9 +22,10 @@ .text .set nomips16 -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4, - long arg5, long arg6, long arg7, - long number) */ +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3, + long int arg4, long int arg5, long int arg6, + long int arg7, + long int number) */ ENTRY(__mips_syscall7) lw v0, 28(sp) diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h index 9bf551ace..f23ede025 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h @@ -19,51 +19,57 @@ #ifndef MIPS16_SYSCALL_H #define MIPS16_SYSCALL_H 1 -long long __nomips16 __mips16_syscall0 (long number); +long long int __nomips16 __mips16_syscall0 (long int number); #define __mips16_syscall0(dummy, number) \ - __mips16_syscall0 ((long) (number)) + __mips16_syscall0 ((long int) (number)) -long long __nomips16 __mips16_syscall1 (long a0, - long number); +long long int __nomips16 __mips16_syscall1 (long int a0, + long int number); #define __mips16_syscall1(a0, number) \ - __mips16_syscall1 ((long) (a0), \ - (long) (number)) + __mips16_syscall1 ((long int) (a0), \ + (long int) (number)) -long long __nomips16 __mips16_syscall2 (long a0, long a1, - long number); +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1, + long int number); #define __mips16_syscall2(a0, a1, number) \ - __mips16_syscall2 ((long) (a0), (long) (a1), \ - (long) (number)) + __mips16_syscall2 ((long int) (a0), (long int) (a1), \ + (long int) (number)) -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2, - long number); +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1, + long int a2, + long int number); #define __mips16_syscall3(a0, a1, a2, number) \ - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (number)) + __mips16_syscall3 ((long int) (a0), (long int) (a1), \ + (long int) (a2), \ + (long int) (number)) -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3, - long number); +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1, + long int a2, long int a3, + long int number); #define __mips16_syscall4(a0, a1, a2, a3, number) \ - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), \ - (long) (number)) + __mips16_syscall4 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (number)) /* The remaining ones use regular MIPS wrappers. */ #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \ - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), \ - (long) (number)) + __mips_syscall5 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), \ + (long int) (number)) #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \ - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), (long) (a5), \ - (long) (number)) + __mips_syscall6 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), (long int) (a5), \ + (long int) (number)) #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \ - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \ - (long) (a3), (long) (a4), (long) (a5), \ - (long) (a6), \ - (long) (number)) + __mips_syscall7 ((long int) (a0), (long int) (a1), \ + (long int) (a2), (long int) (a3), \ + (long int) (a4), (long int) (a5), \ + (long int) (a6), \ + (long int) (number)) #endif diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c index 92f16e272..43c05f805 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c @@ -20,8 +20,8 @@ #undef __mips16_syscall0 -long long __nomips16 -__mips16_syscall0 (long number) +long long int __nomips16 +__mips16_syscall0 (long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0); diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c index fa985a96e..16a567e83 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c @@ -20,9 +20,9 @@ #undef __mips16_syscall1 -long long __nomips16 -__mips16_syscall1 (long a0, - long number) +long long int __nomips16 +__mips16_syscall1 (long int a0, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c index f042ac815..c0a856c34 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c @@ -20,9 +20,9 @@ #undef __mips16_syscall2 -long long __nomips16 -__mips16_syscall2 (long a0, long a1, - long number) +long long int __nomips16 +__mips16_syscall2 (long int a0, long int a1, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c index dfe2f7feb..042768ebf 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c @@ -20,9 +20,9 @@ #undef __mips16_syscall3 -long long __nomips16 -__mips16_syscall3 (long a0, long a1, long a2, - long number) +long long int __nomips16 +__mips16_syscall3 (long int a0, long int a1, long int a2, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c index 39de51035..8658d822a 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c @@ -20,9 +20,9 @@ #undef __mips16_syscall4 -long long __nomips16 -__mips16_syscall4 (long a0, long a1, long a2, long a3, - long number) +long long int __nomips16 +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3, + long int number) { union __mips_syscall_return ret; ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4, diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h index beefcf284..0c6a83e9b 100644 --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h @@ -52,7 +52,7 @@ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -61,10 +61,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -103,11 +103,11 @@ union __mips_syscall_return { - long long val; + long long int val; struct { - long v0; - long v1; + long int v0; + long int v1; } reg; }; @@ -152,13 +152,13 @@ union __mips_syscall_return #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -175,14 +175,15 @@ union __mips_syscall_return #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -199,15 +200,17 @@ union __mips_syscall_return #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -225,16 +228,19 @@ union __mips_syscall_return #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -252,16 +258,20 @@ union __mips_syscall_return #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -285,63 +295,66 @@ union __mips_syscall_return compiler specifics required for the stack arguments to be pushed, which would be the case if these syscalls were inlined. */ -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3, - long arg4, long arg5, - long number); +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, + long int number); libc_hidden_proto (__mips_syscall5, nomips16) #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall5 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, - long number); +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, long int arg6, + long int number); libc_hidden_proto (__mips_syscall6, nomips16) #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall6 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (arg6), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (arg6), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3, - long arg4, long arg5, long arg6, - long arg7, - long number); +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2, + long int arg3, long int arg4, + long int arg5, long int arg6, + long int arg7, + long int number); libc_hidden_proto (__mips_syscall7, nomips16) #define internal_syscall7(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ ({ \ union __mips_syscall_return _sc_ret; \ - _sc_ret.val = __mips_syscall7 ((long) (arg1), \ - (long) (arg2), \ - (long) (arg3), \ - (long) (arg4), \ - (long) (arg5), \ - (long) (arg6), \ - (long) (arg7), \ - (long) (number)); \ + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \ + (long int) (arg2), \ + (long int) (arg3), \ + (long int) (arg4), \ + (long int) (arg5), \ + (long int) (arg6), \ + (long int) (arg7), \ + (long int) (number)); \ err = _sc_ret.reg.v1; \ _sc_ret.reg.v0; \ }) diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h index f96636538..4a9d7054f 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h @@ -47,14 +47,14 @@ /* Convert X to a long long, without losing any bits if it is one already or warning if it is a 32-bit pointer. */ -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X)) +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X)) /* Define a macro which expands into the inline wrapper code for a system call. */ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -63,10 +63,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -112,13 +112,13 @@ #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -135,14 +135,15 @@ #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -159,15 +160,17 @@ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -185,16 +188,19 @@ #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7"); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -212,16 +218,20 @@ #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -239,17 +249,22 @@ #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + long long int _arg5 = ARGIFY (arg5); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - register long long __a4 asm ("$8") = ARGIFY (arg5); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ + register long long int __a4 asm ("$8") = _arg5; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -267,18 +282,24 @@ #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + long long int _arg1 = ARGIFY (arg1); \ + long long int _arg2 = ARGIFY (arg2); \ + long long int _arg3 = ARGIFY (arg3); \ + long long int _arg4 = ARGIFY (arg4); \ + long long int _arg5 = ARGIFY (arg5); \ + long long int _arg6 = ARGIFY (arg6); \ + register long long int __s0 asm ("$16") __attribute__ ((unused))\ = (number); \ - register long long __v0 asm ("$2"); \ - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - register long long __a4 asm ("$8") = ARGIFY (arg5); \ - register long long __a5 asm ("$9") = ARGIFY (arg6); \ + register long long int __v0 asm ("$2"); \ + register long long int __a0 asm ("$4") = _arg1; \ + register long long int __a1 asm ("$5") = _arg2; \ + register long long int __a2 asm ("$6") = _arg3; \ + register long long int __a3 asm ("$7") = _arg4; \ + register long long int __a4 asm ("$8") = _arg5; \ + register long long int __a5 asm ("$9") = _arg6; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h index 9d30291f8..3e1f1cc3c 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h @@ -50,7 +50,7 @@ #undef INLINE_SYSCALL #define INLINE_SYSCALL(name, nr, args...) \ ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ { \ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ @@ -59,10 +59,10 @@ result_var; }) #undef INTERNAL_SYSCALL_DECL -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) #undef INTERNAL_SYSCALL_ERROR_P -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) #undef INTERNAL_SYSCALL_ERRNO #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) @@ -108,13 +108,13 @@ #define internal_syscall0(v0_init, input, number, err, dummy...) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -131,14 +131,15 @@ #define internal_syscall1(v0_init, input, number, err, arg1) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -155,15 +156,17 @@ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -181,16 +184,19 @@ #define internal_syscall3(v0_init, input, number, err, \ arg1, arg2, arg3) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7"); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7"); \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -208,16 +214,20 @@ #define internal_syscall4(v0_init, input, number, err, \ arg1, arg2, arg3, arg4) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -235,17 +245,22 @@ #define internal_syscall5(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ - register long __a4 asm ("$8") = (long) (arg5); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ + register long int __a4 asm ("$8") = _arg5; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ @@ -263,18 +278,24 @@ #define internal_syscall6(v0_init, input, number, err, \ arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ - long _sys_result; \ + long int _sys_result; \ \ { \ - register long __s0 asm ("$16") __attribute__ ((unused)) \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + long int _arg6 = (long int) (arg6); \ + register long int __s0 asm ("$16") __attribute__ ((unused)) \ = (number); \ - register long __v0 asm ("$2"); \ - register long __a0 asm ("$4") = (long) (arg1); \ - register long __a1 asm ("$5") = (long) (arg2); \ - register long __a2 asm ("$6") = (long) (arg3); \ - register long __a3 asm ("$7") = (long) (arg4); \ - register long __a4 asm ("$8") = (long) (arg5); \ - register long __a5 asm ("$9") = (long) (arg6); \ + register long int __v0 asm ("$2"); \ + register long int __a0 asm ("$4") = _arg1; \ + register long int __a1 asm ("$5") = _arg2; \ + register long int __a2 asm ("$6") = _arg3; \ + register long int __a3 asm ("$7") = _arg4; \ + register long int __a4 asm ("$8") = _arg5; \ + register long int __a5 asm ("$9") = _arg6; \ __asm__ volatile ( \ ".set\tnoreorder\n\t" \ v0_init \ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S index 26adf2cd0..a9baff3c1 100644 --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S @@ -20,7 +20,7 @@ #include /* Usage: - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) We need to do some arg shifting, syscall_number will be in v0. */ diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h index cdfc0b1b5..a4cf1540f 100644 --- a/sysdeps/unix/sysv/linux/mips/sysdep.h +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h @@ -36,8 +36,8 @@ the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */ #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \ ({ \ - long _ret = funcptr (args); \ - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \ + long int _ret = funcptr (args); \ + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \ if (err) \ _ret = -_ret; \ _ret; \ diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h new file mode 100644 index 000000000..a00989998 --- /dev/null +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h @@ -0,0 +1,67 @@ +/* Return backtrace of current program state. Arch-specific bits. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _UNWIND_ARCH_H +#define _UNWIND_ARCH_H + +#include + +/* MIPS fallback code handle a frame where its FDE can not be obtained + (for instance a signal frame) by reading the kernel allocated signal frame + and adding '2' to the value of 'sc_pc' [1]. The added value is used to + recognize an end of an EH region on mips16 [2]. + + The idea here is to adjust the obtained signal frame ADDR value and remove + the libgcc added value by checking if the previous frame is a signal frame + one. + + [1] libgcc/config/mips/linux-unwind.h from gcc code. + [2] gcc/config/mips/mips.h from gcc code. */ + +static inline void * +unwind_arch_adjustment (void *prev, void *addr) +{ + uint32_t *pc = (uint32_t *) prev; + + if (pc == NULL) + return addr; + + /* For MIPS16 or microMIPS frame libgcc makes no adjustment. */ + if ((uintptr_t) pc & 0x3) + return addr; + + /* The vDSO containes either + + 24021061 li v0, 0x1061 (rt_sigreturn) + 0000000c syscall + or + 24021017 li v0, 0x1017 (sigreturn) + 0000000c syscall */ + if (pc[1] != 0x0000000c) + return addr; +#if _MIPS_SIM == _ABIO32 + if (pc[0] == (0x24020000 | __NR_sigreturn)) + return (void *) ((uintptr_t) addr - 2); +#endif + if (pc[0] == (0x24020000 | __NR_rt_sigreturn)) + return (void *) ((uintptr_t) addr - 2); + + return addr; +} + +#endif diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c index 27879e76c..fd46aec1a 100644 --- a/sysdeps/unix/sysv/linux/msgctl.c +++ b/sysdeps/unix/sysv/linux/msgctl.c @@ -21,6 +21,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ #ifndef DEFAULT_VERSION # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T @@ -61,7 +62,6 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) int ret = msgctl_syscall (msqid, cmd, buf); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -69,10 +69,16 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) case IPC_STAT: case MSG_STAT: case MSG_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T buf->msg_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct msqid_ds){0}.msg_perm.mode) + != sizeof (__kernel_mode_t)) + buf->msg_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h index 725dfafde..ffc150851 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h @@ -134,47 +134,47 @@ # define LOADARGS_0(name, dummy) \ r0 = name # define LOADARGS_1(name, __arg1) \ - long int arg1 = (long int) (__arg1); \ + long int _arg1 = (long int) (__arg1); \ LOADARGS_0(name, 0); \ extern void __illegally_sized_syscall_arg1 (void); \ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 4) \ __illegally_sized_syscall_arg1 (); \ - r3 = arg1 + r3 = _arg1 # define LOADARGS_2(name, __arg1, __arg2) \ - long int arg2 = (long int) (__arg2); \ + long int _arg2 = (long int) (__arg2); \ LOADARGS_1(name, __arg1); \ extern void __illegally_sized_syscall_arg2 (void); \ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 4) \ __illegally_sized_syscall_arg2 (); \ - r4 = arg2 + r4 = _arg2 # define LOADARGS_3(name, __arg1, __arg2, __arg3) \ - long int arg3 = (long int) (__arg3); \ + long int _arg3 = (long int) (__arg3); \ LOADARGS_2(name, __arg1, __arg2); \ extern void __illegally_sized_syscall_arg3 (void); \ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 4) \ __illegally_sized_syscall_arg3 (); \ - r5 = arg3 + r5 = _arg3 # define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ - long int arg4 = (long int) (__arg4); \ + long int _arg4 = (long int) (__arg4); \ LOADARGS_3(name, __arg1, __arg2, __arg3); \ extern void __illegally_sized_syscall_arg4 (void); \ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 4) \ __illegally_sized_syscall_arg4 (); \ - r6 = arg4 + r6 = _arg4 # define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ - long int arg5 = (long int) (__arg5); \ + long int _arg5 = (long int) (__arg5); \ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ extern void __illegally_sized_syscall_arg5 (void); \ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 4) \ __illegally_sized_syscall_arg5 (); \ - r7 = arg5 + r7 = _arg5 # define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ - long int arg6 = (long int) (__arg6); \ + long int _arg6 = (long int) (__arg6); \ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ extern void __illegally_sized_syscall_arg6 (void); \ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 4) \ __illegally_sized_syscall_arg6 (); \ - r8 = arg6 + r8 = _arg6 # define ASM_INPUT_0 "0" (r0) # define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h index ee7f43653..8a3f1c43e 100644 --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h @@ -139,47 +139,47 @@ #define LOADARGS_0(name, dummy) \ r0 = name #define LOADARGS_1(name, __arg1) \ - long int arg1 = (long int) (__arg1); \ + long int _arg1 = (long int) (__arg1); \ LOADARGS_0(name, 0); \ extern void __illegally_sized_syscall_arg1 (void); \ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 8) \ __illegally_sized_syscall_arg1 (); \ - r3 = arg1 + r3 = _arg1 #define LOADARGS_2(name, __arg1, __arg2) \ - long int arg2 = (long int) (__arg2); \ + long int _arg2 = (long int) (__arg2); \ LOADARGS_1(name, __arg1); \ extern void __illegally_sized_syscall_arg2 (void); \ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 8) \ __illegally_sized_syscall_arg2 (); \ - r4 = arg2 + r4 = _arg2 #define LOADARGS_3(name, __arg1, __arg2, __arg3) \ - long int arg3 = (long int) (__arg3); \ + long int _arg3 = (long int) (__arg3); \ LOADARGS_2(name, __arg1, __arg2); \ extern void __illegally_sized_syscall_arg3 (void); \ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 8) \ __illegally_sized_syscall_arg3 (); \ - r5 = arg3 + r5 = _arg3 #define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ - long int arg4 = (long int) (__arg4); \ + long int _arg4 = (long int) (__arg4); \ LOADARGS_3(name, __arg1, __arg2, __arg3); \ extern void __illegally_sized_syscall_arg4 (void); \ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 8) \ __illegally_sized_syscall_arg4 (); \ - r6 = arg4 + r6 = _arg4 #define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ - long int arg5 = (long int) (__arg5); \ + long int _arg5 = (long int) (__arg5); \ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ extern void __illegally_sized_syscall_arg5 (void); \ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 8) \ __illegally_sized_syscall_arg5 (); \ - r7 = arg5 + r7 = _arg5 #define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ - long int arg6 = (long int) (__arg6); \ + long int _arg6 = (long int) (__arg6); \ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ extern void __illegally_sized_syscall_arg6 (void); \ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 8) \ __illegally_sized_syscall_arg6 (); \ - r8 = arg6 + r8 = _arg6 #define ASM_INPUT_0 "0" (r0) #define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) diff --git a/sysdeps/unix/sysv/linux/prctl.c b/sysdeps/unix/sysv/linux/prctl.c new file mode 100644 index 000000000..d5725f14c --- /dev/null +++ b/sysdeps/unix/sysv/linux/prctl.c @@ -0,0 +1,42 @@ +/* prctl - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include + +/* Unconditionally read all potential arguments. This may pass + garbage values to the kernel, but avoids the need for teaching + glibc the argument counts of individual options (including ones + that are added to the kernel in the future). */ + +int +__prctl (int option, ...) +{ + va_list arg; + va_start (arg, option); + unsigned long int arg2 = va_arg (arg, unsigned long int); + unsigned long int arg3 = va_arg (arg, unsigned long int); + unsigned long int arg4 = va_arg (arg, unsigned long int); + unsigned long int arg5 = va_arg (arg, unsigned long int); + va_end (arg); + return INLINE_SYSCALL_CALL (prctl, option, arg2, arg3, arg4, arg5); +} + +libc_hidden_def (__prctl) +weak_alias (__prctl, prctl) diff --git a/sysdeps/unix/sysv/linux/process_vm_readv.c b/sysdeps/unix/sysv/linux/process_vm_readv.c new file mode 100644 index 000000000..e1377f7e5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/process_vm_readv.c @@ -0,0 +1,32 @@ +/* process_vm_readv - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +ssize_t +process_vm_readv (pid_t pid, const struct iovec *local_iov, + unsigned long int liovcnt, + const struct iovec *remote_iov, + unsigned long int riovcnt, unsigned long int flags) +{ + return INLINE_SYSCALL_CALL (process_vm_readv, pid, local_iov, + liovcnt, remote_iov, riovcnt, flags); +} diff --git a/sysdeps/unix/sysv/linux/process_vm_writev.c b/sysdeps/unix/sysv/linux/process_vm_writev.c new file mode 100644 index 000000000..944ab9b7f --- /dev/null +++ b/sysdeps/unix/sysv/linux/process_vm_writev.c @@ -0,0 +1,32 @@ +/* process_vm_writev - Linux specific syscall. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include + +ssize_t +process_vm_writev (pid_t pid, const struct iovec *local_iov, + unsigned long int liovcnt, + const struct iovec *remote_iov, + unsigned long int riovcnt, unsigned long int flags) +{ + return INLINE_SYSCALL_CALL (process_vm_writev, pid, local_iov, + liovcnt, remote_iov, riovcnt, flags); +} diff --git a/sysdeps/unix/sysv/linux/ptsname.c b/sysdeps/unix/sysv/linux/ptsname.c index 81d9d26f1..3e9be3f0d 100644 --- a/sysdeps/unix/sysv/linux/ptsname.c +++ b/sysdeps/unix/sysv/linux/ptsname.c @@ -21,39 +21,14 @@ #include #include #include -#include -#include #include #include #include <_itoa.h> -/* Check if DEV corresponds to a master pseudo terminal device. */ -#define MASTER_P(Dev) \ - (__gnu_dev_major ((Dev)) == 2 \ - || (__gnu_dev_major ((Dev)) == 4 \ - && __gnu_dev_minor ((Dev)) >= 128 && __gnu_dev_minor ((Dev)) < 192) \ - || (__gnu_dev_major ((Dev)) >= 128 && __gnu_dev_major ((Dev)) < 136)) - -/* Check if DEV corresponds to a slave pseudo terminal device. */ -#define SLAVE_P(Dev) \ - (__gnu_dev_major ((Dev)) == 3 \ - || (__gnu_dev_major ((Dev)) == 4 \ - && __gnu_dev_minor ((Dev)) >= 192 && __gnu_dev_minor ((Dev)) < 256) \ - || (__gnu_dev_major ((Dev)) >= 136 && __gnu_dev_major ((Dev)) < 144)) - -/* Note that major number 4 corresponds to the old BSD style pseudo - terminal devices. As of Linux 2.1.115 these are no longer - supported. They have been replaced by major numbers 2 (masters) - and 3 (slaves). */ - /* Directory where we can find the slave pty nodes. */ #define _PATH_DEVPTS "/dev/pts/" -/* The are declared in getpt.c. */ -extern const char __libc_ptyname1[] attribute_hidden; -extern const char __libc_ptyname2[] attribute_hidden; - /* Static buffer for `ptsname'. */ static char buffer[sizeof (_PATH_DEVPTS) + 20]; @@ -68,19 +43,15 @@ ptsname (int fd) } +/* Store at most BUFLEN characters of the pathname of the slave pseudo + terminal associated with the master FD is open on in BUF. + Return 0 on success, otherwise an error number. */ int -__ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) +__ptsname_r (int fd, char *buf, size_t buflen) { int save_errno = errno; unsigned int ptyno; - if (!__isatty (fd)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - -#ifdef TIOCGPTN if (__ioctl (fd, TIOCGPTN, &ptyno) == 0) { /* Buffer we use to print the number in. For a maximum size for @@ -101,67 +72,11 @@ __ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) memcpy (__stpcpy (buf, devpts), p, &numbuf[sizeof (numbuf)] - p); } - else if (errno != EINVAL) - return errno; else -#endif - { - char *p; - - if (buflen < strlen (_PATH_TTY) + 3) - { - __set_errno (ERANGE); - return ERANGE; - } - - if (__fxstat64 (_STAT_VER, fd, stp) < 0) - return errno; - - /* Check if FD really is a master pseudo terminal. */ - if (! MASTER_P (stp->st_rdev)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - - ptyno = __gnu_dev_minor (stp->st_rdev); - - if (ptyno / 16 >= strlen (__libc_ptyname1)) - { - __set_errno (ENOTTY); - return ENOTTY; - } - - p = __stpcpy (buf, _PATH_TTY); - p[0] = __libc_ptyname1[ptyno / 16]; - p[1] = __libc_ptyname2[ptyno % 16]; - p[2] = '\0'; - } - - if (__xstat64 (_STAT_VER, buf, stp) < 0) + /* Bad file descriptor, or not a ptmx descriptor. */ return errno; - /* Check if the name we're about to return really corresponds to a - slave pseudo terminal. */ - if (! S_ISCHR (stp->st_mode) || ! SLAVE_P (stp->st_rdev)) - { - /* This really is a configuration problem. */ - __set_errno (ENOTTY); - return ENOTTY; - } - __set_errno (save_errno); return 0; } - - -/* Store at most BUFLEN characters of the pathname of the slave pseudo - terminal associated with the master FD is open on in BUF. - Return 0 on success, otherwise an error number. */ -int -__ptsname_r (int fd, char *buf, size_t buflen) -{ - struct stat64 st; - return __ptsname_internal (fd, buf, buflen, &st); -} weak_alias (__ptsname_r, ptsname_r) diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h index 201bf9a91..2bd9b16f3 100644 --- a/sysdeps/unix/sysv/linux/riscv/sysdep.h +++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h @@ -176,10 +176,11 @@ # define internal_syscall1(number, err, arg0) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ + register long int __a0 asm ("a0") = _arg0; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -193,11 +194,13 @@ # define internal_syscall2(number, err, arg0, arg1) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -211,12 +214,15 @@ # define internal_syscall3(number, err, arg0, arg1, arg2) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -230,13 +236,17 @@ # define internal_syscall4(number, err, arg0, arg1, arg2, arg3) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -250,14 +260,19 @@ # define internal_syscall5(number, err, arg0, arg1, arg2, arg3, arg4) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -271,15 +286,21 @@ # define internal_syscall6(number, err, arg0, arg1, arg2, arg3, arg4, arg5) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ - register long int __a5 asm ("a5") = (long int) (arg5); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ + register long int __a5 asm ("a5") = _arg5; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ @@ -294,16 +315,23 @@ # define internal_syscall7(number, err, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ ({ \ long int _sys_result; \ + long int _arg0 = (long int) (arg0); \ + long int _arg1 = (long int) (arg1); \ + long int _arg2 = (long int) (arg2); \ + long int _arg3 = (long int) (arg3); \ + long int _arg4 = (long int) (arg4); \ + long int _arg5 = (long int) (arg5); \ + long int _arg6 = (long int) (arg6); \ \ { \ register long int __a7 asm ("a7") = number; \ - register long int __a0 asm ("a0") = (long int) (arg0); \ - register long int __a1 asm ("a1") = (long int) (arg1); \ - register long int __a2 asm ("a2") = (long int) (arg2); \ - register long int __a3 asm ("a3") = (long int) (arg3); \ - register long int __a4 asm ("a4") = (long int) (arg4); \ - register long int __a5 asm ("a5") = (long int) (arg5); \ - register long int __a6 asm ("a6") = (long int) (arg6); \ + register long int __a0 asm ("a0") = _arg0; \ + register long int __a1 asm ("a1") = _arg1; \ + register long int __a2 asm ("a2") = _arg2; \ + register long int __a3 asm ("a3") = _arg3; \ + register long int __a4 asm ("a4") = _arg4; \ + register long int __a5 asm ("a5") = _arg5; \ + register long int __a6 asm ("a6") = _arg6; \ __asm__ volatile ( \ "scall\n\t" \ : "+r" (__a0) \ diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c index 0c3eb0932..30571af49 100644 --- a/sysdeps/unix/sysv/linux/semctl.c +++ b/sysdeps/unix/sysv/linux/semctl.c @@ -22,6 +22,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ /* Define a `union semun' suitable for Linux here. */ union semun @@ -92,7 +93,6 @@ __new_semctl (int semid, int semnum, int cmd, ...) int ret = semctl_syscall (semid, semnum, cmd, arg); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -100,10 +100,16 @@ __new_semctl (int semid, int semnum, int cmd, ...) case IPC_STAT: case SEM_STAT: case SEM_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T arg.buf->sem_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct semid_ds){0}.sem_perm.mode) + != sizeof (__kernel_mode_t)) + arg.buf->sem_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies new file mode 100644 index 000000000..7eeaf15a5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies @@ -0,0 +1 @@ +unix/sysv/linux/sh/sh4/fpu diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies new file mode 100644 index 000000000..7eeaf15a5 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies @@ -0,0 +1 @@ +unix/sysv/linux/sh/sh4/fpu diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c index 39fa861e1..f41b359b8 100644 --- a/sysdeps/unix/sysv/linux/shmctl.c +++ b/sysdeps/unix/sysv/linux/shmctl.c @@ -22,6 +22,7 @@ #include #include #include +#include /* For __kernel_mode_t. */ #ifndef DEFAULT_VERSION # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T @@ -63,7 +64,6 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) int ret = shmctl_syscall (shmid, cmd, buf); -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T if (ret >= 0) { switch (cmd) @@ -71,10 +71,16 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) case IPC_STAT: case SHM_STAT: case SHM_STAT_ANY: +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T buf->shm_perm.mode >>= 16; +#else + /* Old Linux kernel versions might not clear the mode padding. */ + if (sizeof ((struct shmid_ds){0}.shm_perm.mode) + != sizeof (__kernel_mode_t)) + buf->shm_perm.mode &= 0xFFFF; +#endif } } -#endif return ret; } diff --git a/sysdeps/unix/sysv/linux/sparc/Makefile b/sysdeps/unix/sysv/linux/sparc/Makefile index b0d182a43..147503967 100644 --- a/sysdeps/unix/sysv/linux/sparc/Makefile +++ b/sysdeps/unix/sysv/linux/sparc/Makefile @@ -11,8 +11,12 @@ ifeq ($(subdir),sysvipc) sysdep_routines += getshmlba endif +ifeq ($(subdir),signal) +sysdep_routines += sigreturn_stub +endif + ifeq ($(subdir),nptl) # pull in __syscall_error routine -libpthread-routines += sysdep -libpthread-shared-only-routines += sysdep +libpthread-routines += sysdep sigreturn_stub +libpthread-shared-only-routines += sysdep sigreturn_stub endif diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c index 6b2f66422..938aa7aa8 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c @@ -24,8 +24,8 @@ #include #include -static void __rt_sigreturn_stub (void); -static void __sigreturn_stub (void); +void __rt_sigreturn_stub (void); +void __sigreturn_stub (void); #define STUB(act, sigsetsize) \ (act) ? ((unsigned long)((act->sa_flags & SA_SIGINFO) \ @@ -35,25 +35,3 @@ static void __sigreturn_stub (void); (sigsetsize) #include - -static -inhibit_stack_protector -void -__rt_sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x10\n\t" - : /* no outputs */ - : "i" (__NR_rt_sigreturn)); -} - -static -inhibit_stack_protector -void -__sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x10\n\t" - : /* no outputs */ - : "i" (__NR_sigreturn)); -} diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S new file mode 100644 index 000000000..727cc9473 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S @@ -0,0 +1,34 @@ +/* Sigreturn stub function used on sa_restore field. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* These functions must not change the register window or the stack + pointer [1]. + + [1] https://lkml.org/lkml/2016/5/27/465 */ + +ENTRY (__rt_sigreturn_stub) + mov __NR_rt_sigreturn, %g1 + ta 0x10 +END (__rt_sigreturn_stub) + +ENTRY (__sigreturn_stub) + mov __NR_sigreturn, %g1 + ta 0x10 +END (__sigreturn_stub) diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c index 9c0dc2a63..4e2617232 100644 --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c @@ -22,21 +22,11 @@ #include #include -static void __rt_sigreturn_stub (void); +/* Defined on sigreturn_stub.S. */ +void __rt_sigreturn_stub (void); #define STUB(act, sigsetsize) \ (((unsigned long) &__rt_sigreturn_stub) - 8), \ (sigsetsize) #include - -static -inhibit_stack_protector -void -__rt_sigreturn_stub (void) -{ - __asm__ ("mov %0, %%g1\n\t" - "ta 0x6d\n\t" - : /* no outputs */ - : "i" (__NR_rt_sigreturn)); -} diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S new file mode 100644 index 000000000..add476683 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S @@ -0,0 +1,29 @@ +/* Sigreturn stub function used on sa_restore field. + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* This function must not change the register window or the stack + pointer [1]. + + [1] https://lkml.org/lkml/2016/5/27/465 */ + +ENTRY (__rt_sigreturn_stub) + mov __NR_rt_sigreturn, %g1 + ta 0x6d +END (__rt_sigreturn_stub) diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list index 36e087d8f..3d8981400 100644 --- a/sysdeps/unix/sysv/linux/syscall-names.list +++ b/sysdeps/unix/sysv/linux/syscall-names.list @@ -21,8 +21,8 @@ # This file can list all potential system calls. The names are only # used if the installed kernel headers also provide them. -# The list of system calls is current as of Linux 5.4. -kernel 5.4 +# The list of system calls is current as of Linux 5.5. +kernel 5.5 FAST_atomic_update FAST_cmpxchg diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list index 5f1352ad4..52e6dafc8 100644 --- a/sysdeps/unix/sysv/linux/syscalls.list +++ b/sysdeps/unix/sysv/linux/syscalls.list @@ -28,25 +28,24 @@ inotify_add_watch EXTRA inotify_add_watch i:isi inotify_add_watch inotify_init EXTRA inotify_init i: inotify_init inotify_init1 EXTRA inotify_init1 i:I inotify_init1 inotify_rm_watch EXTRA inotify_rm_watch i:ii inotify_rm_watch -ioperm - ioperm i:iii ioperm +ioperm - ioperm i:UUi ioperm iopl - iopl i:i iopl klogctl EXTRA syslog i:isi klogctl lchown - lchown i:sii __lchown lchown -mincore - mincore i:anV mincore -mlock - mlock i:bn mlock +mincore - mincore i:aUV mincore +mlock - mlock i:bU mlock mlockall - mlockall i:i mlockall -mount EXTRA mount i:sssip __mount mount -mremap EXTRA mremap b:ainip __mremap mremap -munlock - munlock i:ai munlock +mount EXTRA mount i:sssUp __mount mount +mremap EXTRA mremap b:aUUip __mremap mremap +munlock - munlock i:aU munlock munlockall - munlockall i: munlockall nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28 pipe - pipe i:f __pipe pipe pipe2 - pipe2 i:fi __pipe2 pipe2 pivot_root EXTRA pivot_root i:ss pivot_root -prctl EXTRA prctl i:iiiii __prctl prctl query_module EXTRA query_module i:sipip __compat_query_module query_module@GLIBC_2.0:GLIBC_2.23 quotactl EXTRA quotactl i:isip quotactl -remap_file_pages - remap_file_pages i:piiii __remap_file_pages remap_file_pages +remap_file_pages - remap_file_pages i:pUiUi __remap_file_pages remap_file_pages sched_getp - sched_getparam i:ip __sched_getparam sched_getparam sched_gets - sched_getscheduler i:i __sched_getscheduler sched_getscheduler sched_primax - sched_get_priority_max i:i __sched_get_priority_max sched_get_priority_max @@ -55,8 +54,8 @@ sched_rr_gi - sched_rr_get_interval i:ip __sched_rr_get_interval sched_rr_get_in sched_setp - sched_setparam i:ip __sched_setparam sched_setparam sched_sets - sched_setscheduler i:iip __sched_setscheduler sched_setscheduler sched_yield - sched_yield i: __sched_yield sched_yield -sendfile - sendfile i:iipi sendfile -sendfile64 - sendfile64 i:iipi sendfile64 +sendfile - sendfile i:iipU sendfile +sendfile64 - sendfile64 i:iipU sendfile64 setfsgid EXTRA setfsgid i:i setfsgid setfsuid EXTRA setfsuid i:i setfsuid setpgid - setpgid i:ii __setpgid setpgid @@ -73,19 +72,19 @@ chown - chown i:sii __libc_chown __chown chown fchownat - fchownat i:isiii fchownat linkat - linkat i:isisi linkat mkdirat - mkdirat i:isi mkdirat -readlinkat - readlinkat i:issi readlinkat +readlinkat - readlinkat i:issU readlinkat symlinkat - symlinkat i:sis symlinkat unlinkat - unlinkat i:isi unlinkat -setxattr - setxattr i:sspii setxattr -lsetxattr - lsetxattr i:sspii lsetxattr -fsetxattr - fsetxattr i:ispii fsetxattr -getxattr - getxattr i:sspi getxattr -lgetxattr - lgetxattr i:sspi lgetxattr -fgetxattr - fgetxattr i:ispi fgetxattr -listxattr - listxattr i:ssi listxattr -llistxattr - llistxattr i:ssi llistxattr -flistxattr - flistxattr i:isi flistxattr +setxattr - setxattr i:sspUi setxattr +lsetxattr - lsetxattr i:sspUi lsetxattr +fsetxattr - fsetxattr i:ispUi fsetxattr +getxattr - getxattr i:sspU getxattr +lgetxattr - lgetxattr i:sspU lgetxattr +fgetxattr - fgetxattr i:ispU fgetxattr +listxattr - listxattr i:ssU listxattr +llistxattr - llistxattr i:ssU llistxattr +flistxattr - flistxattr i:isU flistxattr removexattr - removexattr i:ss removexattr lremovexattr - lremovexattr i:ss lremovexattr fremovexattr - fremovexattr i:is fremovexattr @@ -102,8 +101,6 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at setns EXTRA setns i:ii setns -process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv -process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev memfd_create EXTRA memfd_create i:si memfd_create pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc pkey_free EXTRA pkey_free i:i pkey_free diff --git a/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c new file mode 100644 index 000000000..55362f606 --- /dev/null +++ b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c @@ -0,0 +1,259 @@ +/* Verify that getcwd returns ERANGE for size 1 byte and does not underflow + buffer when the CWD is too long and is also a mount target of /. See bug + #28769 or CVE-2021-3999 for more context. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static char *base; +#define BASENAME "tst-getcwd-smallbuff" +#define MOUNT_NAME "mpoint" +static int sockfd[2]; + +static void +do_cleanup (void) +{ + support_chdir_toolong_temp_directory (base); + TEST_VERIFY_EXIT (rmdir (MOUNT_NAME) == 0); + free (base); +} + +static void +send_fd (const int sock, const int fd) +{ + struct msghdr msg = {0}; + union + { + struct cmsghdr hdr; + char buf[CMSG_SPACE (sizeof (int))]; + } cmsgbuf = {0}; + struct cmsghdr *cmsg; + struct iovec vec; + char ch = 'A'; + ssize_t n; + + msg.msg_control = &cmsgbuf.buf; + msg.msg_controllen = sizeof (cmsgbuf.buf); + + cmsg = CMSG_FIRSTHDR (&msg); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &fd, sizeof (fd)); + + vec.iov_base = &ch; + vec.iov_len = 1; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + + while ((n = sendmsg (sock, &msg, 0)) == -1 && errno == EINTR); + + TEST_VERIFY_EXIT (n == 1); +} + +static int +recv_fd (const int sock) +{ + struct msghdr msg = {0}; + union + { + struct cmsghdr hdr; + char buf[CMSG_SPACE(sizeof(int))]; + } cmsgbuf = {0}; + struct cmsghdr *cmsg; + struct iovec vec; + ssize_t n; + char ch = '\0'; + int fd = -1; + + vec.iov_base = &ch; + vec.iov_len = 1; + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + + msg.msg_control = &cmsgbuf.buf; + msg.msg_controllen = sizeof (cmsgbuf.buf); + + while ((n = recvmsg (sock, &msg, 0)) == -1 && errno == EINTR); + if (n != 1 || ch != 'A') + return -1; + + cmsg = CMSG_FIRSTHDR (&msg); + if (cmsg == NULL) + return -1; + if (cmsg->cmsg_type != SCM_RIGHTS) + return -1; + memcpy (&fd, CMSG_DATA (cmsg), sizeof (fd)); + if (fd < 0) + return -1; + return fd; +} + +static int +child_func (void * const arg) +{ + xclose (sockfd[0]); + const int sock = sockfd[1]; + char ch; + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + TEST_VERIFY_EXIT (ch == '1'); + + if (mount ("/", MOUNT_NAME, NULL, MS_BIND | MS_REC, NULL)) + FAIL_EXIT1 ("mount failed: %m\n"); + const int fd = xopen ("mpoint", + O_RDONLY | O_PATH | O_DIRECTORY | O_NOFOLLOW, 0); + + send_fd (sock, fd); + xclose (fd); + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + TEST_VERIFY_EXIT (ch == 'a'); + + xclose (sock); + return 0; +} + +static void +update_map (char * const mapping, const char * const map_file) +{ + const size_t map_len = strlen (mapping); + + const int fd = xopen (map_file, O_WRONLY, 0); + xwrite (fd, mapping, map_len); + xclose (fd); +} + +static void +proc_setgroups_write (const long child_pid, const char * const str) +{ + const size_t str_len = strlen(str); + + char setgroups_path[sizeof ("/proc//setgroups") + INT_STRLEN_BOUND (long)]; + + snprintf (setgroups_path, sizeof (setgroups_path), + "/proc/%ld/setgroups", child_pid); + + const int fd = open (setgroups_path, O_WRONLY); + + if (fd < 0) + { + TEST_VERIFY_EXIT (errno == ENOENT); + FAIL_UNSUPPORTED ("/proc/%ld/setgroups not found\n", child_pid); + } + + xwrite (fd, str, str_len); + xclose(fd); +} + +static char child_stack[1024 * 1024]; + +int +do_test (void) +{ + base = support_create_and_chdir_toolong_temp_directory (BASENAME); + + xmkdir (MOUNT_NAME, S_IRWXU); + atexit (do_cleanup); + + /* Check whether user namespaces are supported. */ + { + pid_t pid = xfork (); + if (pid == 0) + { + if (unshare (CLONE_NEWUSER | CLONE_NEWNS) != 0) + _exit (EXIT_UNSUPPORTED); + else + _exit (0); + } + int status; + xwaitpid (pid, &status, 0); + TEST_VERIFY_EXIT (WIFEXITED (status)); + if (WEXITSTATUS (status) != 0) + return WEXITSTATUS (status); + } + + TEST_VERIFY_EXIT (socketpair (AF_UNIX, SOCK_STREAM, 0, sockfd) == 0); + pid_t child_pid = xclone (child_func, NULL, child_stack, + sizeof (child_stack), + CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD); + + xclose (sockfd[1]); + const int sock = sockfd[0]; + + char map_path[sizeof ("/proc//uid_map") + INT_STRLEN_BOUND (long)]; + char map_buf[sizeof ("0 1") + INT_STRLEN_BOUND (long)]; + + snprintf (map_path, sizeof (map_path), "/proc/%ld/uid_map", + (long) child_pid); + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getuid()); + update_map (map_buf, map_path); + + proc_setgroups_write ((long) child_pid, "deny"); + snprintf (map_path, sizeof (map_path), "/proc/%ld/gid_map", + (long) child_pid); + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getgid()); + update_map (map_buf, map_path); + + TEST_VERIFY_EXIT (send (sock, "1", 1, MSG_NOSIGNAL) == 1); + const int fd = recv_fd (sock); + TEST_VERIFY_EXIT (fd >= 0); + TEST_VERIFY_EXIT (fchdir (fd) == 0); + + static char buf[2 * 10 + 1]; + memset (buf, 'A', sizeof (buf)); + + /* Finally, call getcwd and check if it resulted in a buffer underflow. */ + char * cwd = getcwd (buf + sizeof (buf) / 2, 1); + TEST_VERIFY (cwd == NULL); + TEST_VERIFY (errno == ERANGE); + + for (int i = 0; i < sizeof (buf); i++) + if (buf[i] != 'A') + { + printf ("buf[%d] = %02x\n", i, (unsigned int) buf[i]); + support_record_failure (); + } + + TEST_VERIFY_EXIT (send (sock, "a", 1, MSG_NOSIGNAL) == 1); + xclose (sock); + TEST_VERIFY_EXIT (xwaitpid (child_pid, NULL, 0) == child_pid); + + return 0; +} + +#define CLEANUP_HANDLER do_cleanup +#include diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h index c2eb37e57..c7f740a1d 100644 --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h @@ -61,13 +61,31 @@ # define SYSCALL_ERROR_LABEL syscall_error # endif +/* PSEUDO and T_PSEUDO macros have 2 extra arguments for unsigned long + int arguments. */ +# define PSEUDOS_HAVE_ULONG_INDICES 1 + +# ifndef SYSCALL_ULONG_ARG_1 +# define SYSCALL_ULONG_ARG_1 0 +# define SYSCALL_ULONG_ARG_2 0 +# endif + # undef PSEUDO -# define PSEUDO(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args); \ - cmpq $-4095, %rax; \ +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + cmpq $-4095, %rax; \ jae SYSCALL_ERROR_LABEL +# else +# define PSEUDO(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0); \ + cmpq $-4095, %rax; \ + jae SYSCALL_ERROR_LABEL +# endif # undef PSEUDO_END # define PSEUDO_END(name) \ @@ -75,10 +93,17 @@ END (name) # undef PSEUDO_NOERRNO -# define PSEUDO_NOERRNO(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args) +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO_NOERRNO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2) +# else +# define PSEUDO_NOERRNO(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0) +# endif # undef PSEUDO_END_NOERRNO # define PSEUDO_END_NOERRNO(name) \ @@ -87,11 +112,19 @@ # define ret_NOERRNO ret # undef PSEUDO_ERRVAL -# define PSEUDO_ERRVAL(name, syscall_name, args) \ - .text; \ - ENTRY (name) \ - DO_CALL (syscall_name, args); \ +# if SYSCALL_ULONG_ARG_1 +# define PSEUDO_ERRVAL(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + negq %rax +# else +# define PSEUDO_ERRVAL(name, syscall_name, args) \ + .text; \ + ENTRY (name) \ + DO_CALL (syscall_name, args, 0, 0); \ negq %rax +# endif # undef PSEUDO_END_ERRVAL # define PSEUDO_END_ERRVAL(name) \ @@ -163,8 +196,10 @@ Syscalls of more than 6 arguments are not supported. */ # undef DO_CALL -# define DO_CALL(syscall_name, args) \ +# define DO_CALL(syscall_name, args, ulong_arg_1, ulong_arg_2) \ DOARGS_##args \ + ZERO_EXTEND_##ulong_arg_1 \ + ZERO_EXTEND_##ulong_arg_2 \ movl $SYS_ify (syscall_name), %eax; \ syscall; @@ -176,6 +211,14 @@ # define DOARGS_5 DOARGS_4 # define DOARGS_6 DOARGS_5 +# define ZERO_EXTEND_0 /* nothing */ +# define ZERO_EXTEND_1 /* nothing */ +# define ZERO_EXTEND_2 /* nothing */ +# define ZERO_EXTEND_3 /* nothing */ +# define ZERO_EXTEND_4 /* nothing */ +# define ZERO_EXTEND_5 /* nothing */ +# define ZERO_EXTEND_6 /* nothing */ + #else /* !__ASSEMBLER__ */ /* Define a macro which expands inline into the wrapper code for a system call. */ @@ -210,12 +253,15 @@ /* Registers clobbered by syscall. */ # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx" -/* Create a variable 'name' based on type 'X' to avoid explicit types. - This is mainly used set use 64-bits arguments in x32. */ -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name -/* Explicit cast the argument to avoid integer from pointer warning on - x32. */ -#define ARGIFY(X) ((__typeof__ ((X) - (X))) (X)) +/* NB: This also works when X is an array. For an array X, type of + (X) - (X) is ptrdiff_t, which is signed, since size of ptrdiff_t + == size of pointer, cast is a NOP. */ +#define TYPEFY1(X) __typeof__ ((X) - (X)) +/* Explicit cast the argument. */ +#define ARGIFY(X) ((TYPEFY1 (X)) (X)) +/* Create a variable 'name' based on type of variable 'X' to avoid + explicit types. */ +#define TYPEFY(X, name) __typeof__ (ARGIFY (X)) name #undef INTERNAL_SYSCALL #define INTERNAL_SYSCALL(name, err, nr, args...) \ diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h index 5bf9eed80..62e6f8fe1 100644 --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h @@ -26,4 +26,39 @@ #undef LO_HI_LONG #define LO_HI_LONG(val) (val) +#ifdef __ASSEMBLER__ +/* Zero-extend 32-bit unsigned long int arguments to 64 bits. */ +# undef ZERO_EXTEND_1 +# define ZERO_EXTEND_1 movl %edi, %edi; +# undef ZERO_EXTEND_2 +# define ZERO_EXTEND_2 movl %esi, %esi; +# undef ZERO_EXTEND_3 +# define ZERO_EXTEND_3 movl %edx, %edx; +# if SYSCALL_ULONG_ARG_1 == 4 || SYSCALL_ULONG_ARG_2 == 4 +# undef DOARGS_4 +# define DOARGS_4 movl %ecx, %r10d; +# else +# undef ZERO_EXTEND_4 +# define ZERO_EXTEND_4 movl %r10d, %r10d; +# endif +# undef ZERO_EXTEND_5 +# define ZERO_EXTEND_5 movl %r8d, %r8d; +# undef ZERO_EXTEND_6 +# define ZERO_EXTEND_6 movl %r9d, %r9d; +#else /* !__ASSEMBLER__ */ +# undef ARGIFY +/* Enforce zero-extension for pointers and array system call arguments. + For integer types, extend to int64_t (the full register) using a + regular cast, resulting in zero or sign extension based on the + signedness of the original type. */ +# define ARGIFY(X) \ + ({ \ + _Pragma ("GCC diagnostic push"); \ + _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\""); \ + (__builtin_classify_type (X) == 5 \ + ? (uintptr_t) (X) : (int64_t) (X)); \ + _Pragma ("GCC diagnostic pop"); \ + }) +#endif /* __ASSEMBLER__ */ + #endif /* linux/x86_64/x32/sysdep.h */ diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile index 95182a508..b7aec5df2 100644 --- a/sysdeps/x86/Makefile +++ b/sysdeps/x86/Makefile @@ -12,6 +12,42 @@ endif ifeq ($(subdir),setjmp) gen-as-const-headers += jmp_buf-ssp.sym sysdep_routines += __longjmp_cancel +ifneq ($(enable-cet),no) +ifneq ($(have-tunables),no) +tests += tst-setjmp-cet +tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on +endif +endif +endif + +ifeq ($(subdir),string) +sysdep_routines += cacheinfo + +tests += \ + tst-memchr-rtm \ + tst-memcmp-rtm \ + tst-memmove-rtm \ + tst-memrchr-rtm \ + tst-memset-rtm \ + tst-strchr-rtm \ + tst-strcpy-rtm \ + tst-strlen-rtm \ + tst-strncmp-rtm \ + tst-strrchr-rtm \ + tst-wcsncmp-rtm \ +# tests + +CFLAGS-tst-memchr-rtm.c += -mrtm +CFLAGS-tst-memcmp-rtm.c += -mrtm +CFLAGS-tst-memmove-rtm.c += -mrtm +CFLAGS-tst-memrchr-rtm.c += -mrtm +CFLAGS-tst-memset-rtm.c += -mrtm +CFLAGS-tst-strchr-rtm.c += -mrtm +CFLAGS-tst-strcpy-rtm.c += -mrtm +CFLAGS-tst-strlen-rtm.c += -mrtm +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error +CFLAGS-tst-strrchr-rtm.c += -mrtm +CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error endif ifeq ($(enable-cet),yes) diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c index e3e8ef27b..17db51783 100644 --- a/sysdeps/x86/cacheinfo.c +++ b/sysdeps/x86/cacheinfo.c @@ -494,6 +494,7 @@ init_cacheinfo (void) int max_cpuid_ex; long int data = -1; long int shared = -1; + long int shared_per_thread = -1; unsigned int level; unsigned int threads = 0; const struct cpu_features *cpu_features = __get_cpu_features (); @@ -509,7 +510,7 @@ init_cacheinfo (void) /* Try L3 first. */ level = 3; shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features); - + shared_per_thread = shared; /* Number of logical processors sharing L2 cache. */ int threads_l2; @@ -521,6 +522,7 @@ init_cacheinfo (void) /* Try L2 otherwise. */ level = 2; shared = core; + shared_per_thread = core; threads_l2 = 0; threads_l3 = -1; } @@ -688,15 +690,15 @@ intel_bug_no_cache_info: /* Cap usage of highest cache level to the number of supported threads. */ - if (shared > 0 && threads > 0) - shared /= threads; + if (shared_per_thread > 0 && threads > 0) + shared_per_thread /= threads; } /* Account for non-inclusive L2 and L3 caches. */ if (!inclusive_cache) { - if (threads_l2 > 0) - core /= threads_l2; + long int core_per_thread = threads_l2 > 0 ? (core / threads_l2) : core; + shared_per_thread += core_per_thread; shared += core; } } @@ -705,13 +707,17 @@ intel_bug_no_cache_info: data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE); shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + shared_per_thread = shared; /* Get maximum extended function. */ __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx); if (shared <= 0) - /* No shared L3 cache. All we have is the L2 cache. */ - shared = core; + { + /* No shared L3 cache. All we have is the L2 cache. */ + shared = core; + shared_per_thread = core; + } else { /* Figure out the number of logical threads that share L3. */ @@ -722,7 +728,7 @@ intel_bug_no_cache_info: threads = 1 << ((ecx >> 12) & 0x0f); } - if (threads == 0) + if (threads == 0 || cpu_features->basic.family >= 0x17) { /* If APIC ID width is not available, use logical processor count. */ @@ -735,10 +741,25 @@ intel_bug_no_cache_info: /* Cap usage of highest cache level to the number of supported threads. */ if (threads > 0) - shared /= threads; + shared_per_thread /= threads; - /* Account for exclusive L2 and L3 caches. */ - shared += core; + /* Get shared cache per ccx for Zen architectures. */ + if (cpu_features->basic.family >= 0x17) + { + unsigned int eax; + + /* Get number of threads share the L3 cache in CCX. */ + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); + + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; + shared_per_thread *= threads_per_ccx; + } + else + { + /* Account for exclusive L2 and L3 caches. */ + shared += core; + shared_per_thread += core; + } } #ifndef DISABLE_PREFETCHW @@ -766,26 +787,51 @@ intel_bug_no_cache_info: } if (cpu_features->shared_cache_size != 0) - shared = cpu_features->shared_cache_size; + shared_per_thread = cpu_features->shared_cache_size; - if (shared > 0) + if (shared_per_thread > 0) { - __x86_raw_shared_cache_size_half = shared / 2; - __x86_raw_shared_cache_size = shared; + __x86_raw_shared_cache_size_half = shared_per_thread / 2; + __x86_raw_shared_cache_size = shared_per_thread; /* Round shared cache size to multiple of 256 bytes. */ - shared = shared & ~255L; - __x86_shared_cache_size_half = shared / 2; - __x86_shared_cache_size = shared; + shared_per_thread = shared_per_thread & ~255L; + __x86_shared_cache_size_half = shared_per_thread / 2; + __x86_shared_cache_size = shared_per_thread; } - /* The large memcpy micro benchmark in glibc shows that 6 times of - shared cache size is the approximate value above which non-temporal - store becomes faster on a 8-core processor. This is the 3/4 of the - total shared cache size. */ + /* The default setting for the non_temporal threshold is [1/8, 1/2] of size + of the chip's cache (depending on `cachesize_non_temporal_divisor` which + is microarch specific. The default is 1/4). For most Intel processors + with an initial release date between 2017 and 2023, a thread's + typical share of the cache is from 18-64MB. Using a reasonable size + fraction of L3 is meant to estimate the point where non-temporal stores + begin out-competing REP MOVSB. As well the point where the fact that + non-temporal stores are forced back to main memory would already occurred + to the majority of the lines in the copy. Note, concerns about the entire + L3 cache being evicted by the copy are mostly alleviated by the fact that + modern HW detects streaming patterns and provides proper LRU hints so that + the maximum thrashing capped at 1/associativity. */ + unsigned long int non_temporal_threshold = shared / 4; + + /* If the computed non_temporal_threshold <= 3/4 * per-thread L3, we most + likely have incorrect/incomplete cache info in which case, default to + 3/4 * per-thread L3 to avoid regressions. */ + unsigned long int non_temporal_threshold_lowbound + = shared_per_thread * 3 / 4; + if (non_temporal_threshold < non_temporal_threshold_lowbound) + non_temporal_threshold = non_temporal_threshold_lowbound; + + /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run + a higher risk of actually thrashing the cache as they don't have a HW LRU + hint. As well, their performance in highly parallel situations is + noticeably worse. */ + if (!CPU_FEATURES_CPU_P (cpu_features, ERMS)) + non_temporal_threshold = non_temporal_threshold_lowbound; + __x86_shared_non_temporal_threshold = (cpu_features->non_temporal_threshold != 0 ? cpu_features->non_temporal_threshold - : __x86_shared_cache_size * threads * 3 / 4); + : non_temporal_threshold); } #endif diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 81a170a81..e1c22e3e5 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -333,6 +333,9 @@ init_cpu_features (struct cpu_features *cpu_features) get_extended_indices (cpu_features); + if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT)) + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; + if (family == 0x06) { model += extended_model; @@ -394,11 +397,42 @@ init_cpu_features (struct cpu_features *cpu_features) break; } - /* Disable TSX on some Haswell processors to avoid TSX on kernels that - weren't updated with the latest microcode package (which disables - broken feature by default). */ + /* Disable TSX on some processors to avoid TSX on kernels that + weren't updated with the latest microcode package (which + disables broken feature by default). */ switch (model) { + case 0x55: + if (stepping <= 5) + goto disable_tsx; + break; + case 0x8e: + /* NB: Although the errata documents that for model == 0x8e, + only 0xb stepping or lower are impacted, the intention of + the errata was to disable TSX on all client processors on + all steppings. Include 0xc stepping which is an Intel + Core i7-8665U, a client mobile processor. */ + case 0x9e: + if (stepping > 0xc) + break; + /* Fall through. */ + case 0x4e: + case 0x5e: + { + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + processors listed in: + +https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + */ +disable_tsx: + cpu_features->cpuid[index_cpu_HLE].reg_HLE + &= ~bit_cpu_HLE; + cpu_features->cpuid[index_cpu_RTM].reg_RTM + &= ~bit_cpu_RTM; + cpu_features->cpuid[index_cpu_RTM_ALWAYS_ABORT].reg_RTM_ALWAYS_ABORT + |= bit_cpu_RTM_ALWAYS_ABORT; + } + break; case 0x3f: /* Xeon E7 v3 with stepping >= 4 has working TSX. */ if (stepping >= 4) @@ -424,8 +458,24 @@ init_cpu_features (struct cpu_features *cpu_features) cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] |= bit_arch_Prefer_No_VZEROUPPER; else - cpu_features->feature[index_arch_Prefer_No_AVX512] - |= bit_arch_Prefer_No_AVX512; + { + cpu_features->feature[index_arch_Prefer_No_AVX512] + |= bit_arch_Prefer_No_AVX512; + + /* Avoid RTM abort triggered by VZEROUPPER inside a + transactionally executing RTM region. */ + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + + /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp + requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp + requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, + AVX2 strcmp is faster than EVEX strcmp. */ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + cpu_features->feature[index_arch_Prefer_AVX2_STRCMP] + |= bit_arch_Prefer_AVX2_STRCMP; + } } /* This spells out "AuthenticAMD" or "HygonGenuine". */ else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h index aea83e6e3..9fb97907b 100644 --- a/sysdeps/x86/cpu-features.h +++ b/sysdeps/x86/cpu-features.h @@ -499,6 +499,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define bit_cpu_AVX512_4VNNIW (1u << 2) #define bit_cpu_AVX512_4FMAPS (1u << 3) #define bit_cpu_FSRM (1u << 4) +#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11) #define bit_cpu_PCONFIG (1u << 18) #define bit_cpu_IBT (1u << 20) #define bit_cpu_IBRS_IBPB (1u << 26) @@ -667,6 +668,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7 #define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7 #define index_cpu_FSRM COMMON_CPUID_INDEX_7 +#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7 #define index_cpu_PCONFIG COMMON_CPUID_INDEX_7 #define index_cpu_IBT COMMON_CPUID_INDEX_7 #define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7 @@ -835,6 +837,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define reg_AVX512_4VNNIW edx #define reg_AVX512_4FMAPS edx #define reg_FSRM edx +#define reg_RTM_ALWAYS_ABORT edx #define reg_PCONFIG edx #define reg_IBT edx #define reg_IBRS_IBPB edx @@ -897,6 +900,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define bit_arch_Prefer_FSRM (1u << 13) #define bit_arch_Prefer_No_AVX512 (1u << 14) #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15) +#define bit_arch_Prefer_AVX2_STRCMP (1u << 16) #define index_arch_Fast_Rep_String FEATURE_INDEX_2 #define index_arch_Fast_Copy_Backward FEATURE_INDEX_2 @@ -914,6 +918,7 @@ extern const struct cpu_features *__get_cpu_features (void) #define index_arch_Prefer_No_AVX512 FEATURE_INDEX_2 #define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_2 #define index_arch_Prefer_FSRM FEATURE_INDEX_2 +#define index_arch_Prefer_AVX2_STRCMP FEATURE_INDEX_2 /* XCR0 Feature flags. */ #define bit_XMM_state (1u << 1) diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c index 861bd7bca..cb83ecc3b 100644 --- a/sysdeps/x86/cpu-tunables.c +++ b/sysdeps/x86/cpu-tunables.c @@ -282,6 +282,9 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, Fast_Copy_Backward, disable, 18); + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + (n, cpu_features, Prefer_AVX2_STRCMP, AVX2_Usable, + disable, 18); } break; case 19: diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c index ca3b5849b..8ffaf94a0 100644 --- a/sysdeps/x86/dl-cet.c +++ b/sysdeps/x86/dl-cet.c @@ -105,7 +105,11 @@ dl_cet_check (struct link_map *m, const char *program) /* No legacy object check if both IBT and SHSTK are always on. */ if (enable_ibt_type == CET_ALWAYS_ON && enable_shstk_type == CET_ALWAYS_ON) - return; + { + THREAD_SETMEM (THREAD_SELF, header.feature_1, + GL(dl_x86_feature_1)[0]); + return; + } /* Check if IBT is enabled by kernel. */ bool ibt_enabled diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c index 0f55987ae..bbb5cd356 100644 --- a/sysdeps/x86/tst-get-cpu-features.c +++ b/sysdeps/x86/tst-get-cpu-features.c @@ -176,6 +176,7 @@ do_test (void) CHECK_CPU_FEATURE (AVX512_4VNNIW); CHECK_CPU_FEATURE (AVX512_4FMAPS); CHECK_CPU_FEATURE (FSRM); + CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT); CHECK_CPU_FEATURE (PCONFIG); CHECK_CPU_FEATURE (IBT); CHECK_CPU_FEATURE (IBRS_IBPB); diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c new file mode 100644 index 000000000..e47494011 --- /dev/null +++ b/sysdeps/x86/tst-memchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for memchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = memchr (string1, 'c', STRING_SIZE); + if (p == &string1[100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = memchr (string1, 'c', STRING_SIZE); + if (p == &string1[100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c new file mode 100644 index 000000000..e4c8a623b --- /dev/null +++ b/sysdeps/x86/tst-memcmp-rtm.c @@ -0,0 +1,52 @@ +/* Test case for memcmp inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + memset (string2, 'a', STRING_SIZE); + if (memcmp (string1, string2, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (memcmp (string1, string2, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memcmp", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c new file mode 100644 index 000000000..4bf97ef1e --- /dev/null +++ b/sysdeps/x86/tst-memmove-rtm.c @@ -0,0 +1,53 @@ +/* Test case for memmove inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + if (memmove (string2, string1, STRING_SIZE) == string2 + && memcmp (string2, string1, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (memmove (string2, string1, STRING_SIZE) == string2 + && memcmp (string2, string1, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memmove", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c new file mode 100644 index 000000000..a57a5a8eb --- /dev/null +++ b/sysdeps/x86/tst-memrchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for memrchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = memrchr (string1, 'c', STRING_SIZE); + if (p == &string1[STRING_SIZE - 100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = memrchr (string1, 'c', STRING_SIZE); + if (p == &string1[STRING_SIZE - 100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("memrchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c new file mode 100644 index 000000000..bf343a4da --- /dev/null +++ b/sysdeps/x86/tst-memset-rtm.c @@ -0,0 +1,45 @@ +/* Test case for memset inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE); + return EXIT_SUCCESS; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + memset (string1, 'a', STRING_SIZE); + return 0; +} + +static int +do_test (void) +{ + return do_test_1 ("memset", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c new file mode 100644 index 000000000..42c795d2a --- /dev/null +++ b/sysdeps/x86/tst-setjmp-cet.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c new file mode 100644 index 000000000..a82e29c07 --- /dev/null +++ b/sysdeps/x86/tst-strchr-rtm.c @@ -0,0 +1,54 @@ +/* Test case for strchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[100] = 'c'; + string1[STRING_SIZE - 100] = 'c'; + char *p = strchr (string1, 'c'); + if (p == &string1[100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = strchr (string1, 'c'); + if (p == &string1[100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strchr", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c new file mode 100644 index 000000000..2b2a583fb --- /dev/null +++ b/sysdeps/x86/tst-strcpy-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strcpy inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; +char string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + if (strcpy (string2, string1) == string2 + && strcmp (string2, string1) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (strcpy (string2, string1) == string2 + && strcmp (string2, string1) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strcpy", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h new file mode 100644 index 000000000..6ed9eca01 --- /dev/null +++ b/sysdeps/x86/tst-string-rtm.h @@ -0,0 +1,72 @@ +/* Test string function in a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test_1 (const char *name, unsigned int loop, int (*prepare) (void), + int (*function) (void)) +{ + if (!CPU_FEATURE_USABLE (RTM)) + return EXIT_UNSUPPORTED; + + int status = prepare (); + if (status != EXIT_SUCCESS) + return status; + + unsigned int i; + unsigned int naborts = 0; + unsigned int failed = 0; + for (i = 0; i < loop; i++) + { + failed |= function (); + if (_xbegin() == _XBEGIN_STARTED) + { + failed |= function (); + _xend(); + } + else + { + failed |= function (); + ++naborts; + } + } + + if (failed) + FAIL_EXIT1 ("%s() failed", name); + + if (naborts) + { + /* NB: Low single digit (<= 5%) noise-level aborts are normal for + TSX. */ + double rate = 100 * ((double) naborts) / ((double) loop); + if (rate > 5) + FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)", + rate, naborts, loop); + } + + return EXIT_SUCCESS; +} + +static int do_test (void); + +#include diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c new file mode 100644 index 000000000..0dcf14db8 --- /dev/null +++ b/sysdeps/x86/tst-strlen-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strlen inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[STRING_SIZE - 100] = '\0'; + size_t len = strlen (string1); + if (len == STRING_SIZE - 100) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + size_t len = strlen (string1); + if (len == STRING_SIZE - 100) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strlen", LOOP, prepare, function); +} diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c new file mode 100644 index 000000000..aef9866cf --- /dev/null +++ b/sysdeps/x86/tst-strncmp-rtm.c @@ -0,0 +1,81 @@ +/* Test case for strncmp inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#ifdef WIDE +# define CHAR wchar_t +# define MEMSET wmemset +# define STRNCMP wcsncmp +# define TEST_NAME "wcsncmp" +#else /* !WIDE */ +# define CHAR char +# define MEMSET memset +# define STRNCMP strncmp +# define TEST_NAME "strncmp" +#endif /* !WIDE */ + + + +#define LOOP 3000 +#define STRING_SIZE 1024 +CHAR string1[STRING_SIZE]; +CHAR string2[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + MEMSET (string1, 'a', STRING_SIZE - 1); + MEMSET (string2, 'a', STRING_SIZE - 1); + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + return 0; + else + return 1; +} + +__attribute__ ((noinline, noclone)) +static int +function_overflow (void) +{ + if (STRNCMP (string1, string2, SIZE_MAX) == 0) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + int status = do_test_1 (TEST_NAME, LOOP, prepare, function); + if (status != EXIT_SUCCESS) + return status; + status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow); + return status; +} diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c new file mode 100644 index 000000000..e32bfaf5f --- /dev/null +++ b/sysdeps/x86/tst-strrchr-rtm.c @@ -0,0 +1,53 @@ +/* Test case for strrchr inside a transactionally executing RTM region. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define LOOP 3000 +#define STRING_SIZE 1024 +char string1[STRING_SIZE]; + +__attribute__ ((noinline, noclone)) +static int +prepare (void) +{ + memset (string1, 'a', STRING_SIZE - 1); + string1[STRING_SIZE - 100] = 'c'; + char *p = strrchr (string1, 'c'); + if (p == &string1[STRING_SIZE - 100]) + return EXIT_SUCCESS; + else + return EXIT_FAILURE; +} + +__attribute__ ((noinline, noclone)) +static int +function (void) +{ + char *p = strrchr (string1, 'c'); + if (p == &string1[STRING_SIZE - 100]) + return 0; + else + return 1; +} + +static int +do_test (void) +{ + return do_test_1 ("strrchr", LOOP, prepare, function); +} diff --git a/sysdeps/unix/sysv/linux/nios2/kernel-features.h b/sysdeps/x86/tst-wcsncmp-rtm.c similarity index 74% rename from sysdeps/unix/sysv/linux/nios2/kernel-features.h rename to sysdeps/x86/tst-wcsncmp-rtm.c index d68d11498..bad3b8637 100644 --- a/sysdeps/unix/sysv/linux/nios2/kernel-features.h +++ b/sysdeps/x86/tst-wcsncmp-rtm.c @@ -1,6 +1,5 @@ -/* Set flags signalling availability of kernel features based on given - kernel version number. NIOS2 version. - Copyright (C) 2019-2020 Free Software Foundation, Inc. +/* Test case for wcsncmp inside a transactionally executing RTM region. + Copyright (C) 2022 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,6 +16,6 @@ License along with the GNU C Library; if not, see . */ -#include_next - -#undef __ASSUME_SYSVIPC_DEFAULT_IPC_64 +#define WIDE 1 +#include +#include "tst-strncmp-rtm.c" diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index d51cf03ac..b1951adce 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -20,6 +20,8 @@ endif ifeq ($(subdir),string) sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii gen-as-const-headers += locale-defines.sym +tests += \ + tst-rsi-strlen endif ifeq ($(subdir),elf) @@ -150,6 +152,11 @@ ifeq ($(subdir),csu) gen-as-const-headers += tlsdesc.sym rtld-offsets.sym endif +ifeq ($(subdir),wcsmbs) +tests += \ + tst-rsi-wcslen +endif + $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os $(make-target-directory) rm -f $@ diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure old mode 100644 new mode 100755 index 84f82c240..fc1840e23 --- a/sysdeps/x86_64/configure +++ b/sysdeps/x86_64/configure @@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -if test "$static_pie" = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5 -$as_echo_n "checking for linker static PIE support... " >&6; } -if ${libc_cv_ld_static_pie+:} false; then : - $as_echo_n "(cached) " >&6 -else - cat > conftest.s <<\EOF - .text - .global _start - .weak foo -_start: - leaq foo(%rip), %rax -EOF - libc_cv_pie_option="-Wl,-pie" - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5' - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 - (eval $ac_try) 2>&5 - ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then - libc_cv_ld_static_pie=yes - else - libc_cv_ld_static_pie=no - fi -rm -f conftest* -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5 -$as_echo "$libc_cv_ld_static_pie" >&6; } - if test "$libc_cv_ld_static_pie" != yes; then - as_fn_error $? "linker support for static PIE needed" "$LINENO" 5 - fi -fi - $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac index cdaba0c07..611a7d9ba 100644 --- a/sysdeps/x86_64/configure.ac +++ b/sysdeps/x86_64/configure.ac @@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then build_mathvec=yes fi -dnl Check if linker supports static PIE with the fix for -dnl -dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782 -dnl -if test "$static_pie" = yes; then - AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl -cat > conftest.s <<\EOF - .text - .global _start - .weak foo -_start: - leaq foo(%rip), %rax -EOF - libc_cv_pie_option="-Wl,-pie" - if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then - libc_cv_ld_static_pie=yes - else - libc_cv_ld_static_pie=no - fi -rm -f conftest*]) - if test "$libc_cv_ld_static_pie" != yes; then - AC_MSG_ERROR([linker support for static PIE needed]) - fi -fi - dnl It is always possible to access static and hidden symbols in an dnl position independent way. AC_DEFINE(PI_STATIC_AND_HIDDEN) diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 8e9baffeb..74029871d 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, { # ifndef RTLD_BOOTSTRAP if (sym_map != map - && sym_map->l_type != lt_executable && !sym_map->l_relocated) { const char *strtab = (const char *) D_PTR (map, l_info[DT_STRTAB]); - _dl_error_printf ("\ + if (sym_map->l_type == lt_executable) + _dl_fatal_printf ("\ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ +and creates an unsatisfiable circular dependency.\n", + RTLD_PROGNAME, strtab + refsym->st_name, + map->l_name); + else + _dl_error_printf ("\ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - RTLD_PROGNAME, map->l_name, - sym_map->l_name, - strtab + refsym->st_name); + RTLD_PROGNAME, map->l_name, + sym_map->l_name, + strtab + refsym->st_name); } # endif value = ((ElfW(Addr) (*) (void)) value) (); diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c index 594ee5c68..e056ac4b4 100644 --- a/sysdeps/x86_64/ffsll.c +++ b/sysdeps/x86_64/ffsll.c @@ -27,13 +27,13 @@ int ffsll (long long int x) { long long int cnt; - long long int tmp; - asm ("bsfq %2,%0\n" /* Count low bits in X and store in %1. */ - "cmoveq %1,%0\n" /* If number was zero, use -1 as result. */ - : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1)); + asm ("mov $-1,%k0\n" /* Initialize cnt to -1. */ + "bsf %1,%0\n" /* Count low bits in x and store in cnt. */ + "inc %k0\n" /* Increment cnt by 1. */ + : "=&r" (cnt) : "r" (x)); - return cnt + 1; + return cnt; } #ifndef __ILP32__ diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S index a5c879d2a..070e5ef90 100644 --- a/sysdeps/x86_64/memchr.S +++ b/sysdeps/x86_64/memchr.S @@ -21,9 +21,11 @@ #ifdef USE_AS_WMEMCHR # define MEMCHR wmemchr # define PCMPEQ pcmpeqd +# define CHAR_PER_VEC 4 #else # define MEMCHR memchr # define PCMPEQ pcmpeqb +# define CHAR_PER_VEC 16 #endif /* fast SSE2 version with using pmaxub and 64 byte loop */ @@ -33,15 +35,14 @@ ENTRY(MEMCHR) movd %esi, %xmm1 mov %edi, %ecx +#ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +#endif #ifdef USE_AS_WMEMCHR test %RDX_LP, %RDX_LP jz L(return_null) - shl $2, %RDX_LP #else -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %edx, %edx -# endif punpcklbw %xmm1, %xmm1 test %RDX_LP, %RDX_LP jz L(return_null) @@ -60,13 +61,16 @@ ENTRY(MEMCHR) test %eax, %eax jnz L(matches_1) - sub $16, %rdx + sub $CHAR_PER_VEC, %rdx jbe L(return_null) add $16, %rdi and $15, %ecx and $-16, %rdi +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif add %rcx, %rdx - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) jmp L(loop_prolog) @@ -77,16 +81,21 @@ L(crosscache): movdqa (%rdi), %xmm0 PCMPEQ %xmm1, %xmm0 -/* Check if there is a match. */ + /* Check if there is a match. */ pmovmskb %xmm0, %eax -/* Remove the leading bytes. */ + /* Remove the leading bytes. */ sar %cl, %eax test %eax, %eax je L(unaligned_no_match) -/* Check which byte is a match. */ + /* Check which byte is a match. */ bsf %eax, %eax - +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) add %rdi, %rax add %rcx, %rax @@ -94,15 +103,18 @@ L(crosscache): .p2align 4 L(unaligned_no_match): - /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void possible addition overflow. */ neg %rcx add $16, %rcx +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif sub %rcx, %rdx jbe L(return_null) add $16, %rdi - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) .p2align 4 @@ -135,7 +147,7 @@ L(loop_prolog): test $0x3f, %rdi jz L(align64_loop) - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) movdqa (%rdi), %xmm0 @@ -167,11 +179,14 @@ L(loop_prolog): mov %rdi, %rcx and $-64, %rdi and $63, %ecx +#ifdef USE_AS_WMEMCHR + shr $2, %ecx +#endif add %rcx, %rdx .p2align 4 L(align64_loop): - sub $64, %rdx + sub $(CHAR_PER_VEC * 4), %rdx jbe L(exit_loop) movdqa (%rdi), %xmm0 movdqa 16(%rdi), %xmm2 @@ -218,7 +233,7 @@ L(align64_loop): .p2align 4 L(exit_loop): - add $32, %edx + add $(CHAR_PER_VEC * 2), %edx jle L(exit_loop_32) movdqa (%rdi), %xmm0 @@ -238,7 +253,7 @@ L(exit_loop): pmovmskb %xmm3, %eax test %eax, %eax jnz L(matches32_1) - sub $16, %edx + sub $CHAR_PER_VEC, %edx jle L(return_null) PCMPEQ 48(%rdi), %xmm1 @@ -250,13 +265,13 @@ L(exit_loop): .p2align 4 L(exit_loop_32): - add $32, %edx + add $(CHAR_PER_VEC * 2), %edx movdqa (%rdi), %xmm0 PCMPEQ %xmm1, %xmm0 pmovmskb %xmm0, %eax test %eax, %eax jnz L(matches_1) - sub $16, %edx + sub $CHAR_PER_VEC, %edx jbe L(return_null) PCMPEQ 16(%rdi), %xmm1 @@ -293,7 +308,13 @@ L(matches32): .p2align 4 L(matches_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) add %rdi, %rax ret @@ -301,7 +322,13 @@ L(matches_1): .p2align 4 L(matches16_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 16(%rdi, %rax), %rax ret @@ -309,7 +336,13 @@ L(matches16_1): .p2align 4 L(matches32_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 32(%rdi, %rax), %rax ret @@ -317,7 +350,13 @@ L(matches32_1): .p2align 4 L(matches48_1): bsf %eax, %eax +#ifdef USE_AS_WMEMCHR + mov %eax, %esi + shr $2, %esi + sub %rsi, %rdx +#else sub %rax, %rdx +#endif jbe L(return_null) lea 48(%rdi, %rax), %rax ret diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 395e432c0..da1446d73 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \ memmove-avx512-unaligned-erms \ memset-sse2-unaligned-erms \ memset-avx2-unaligned-erms \ - memset-avx512-unaligned-erms + memset-avx512-unaligned-erms \ + memchr-avx2-rtm \ + memcmp-avx2-movbe-rtm \ + memmove-avx-unaligned-erms-rtm \ + memrchr-avx2-rtm \ + memset-avx2-unaligned-erms-rtm \ + rawmemchr-avx2-rtm \ + strchr-avx2-rtm \ + strcmp-avx2-rtm \ + strchrnul-avx2-rtm \ + stpcpy-avx2-rtm \ + stpncpy-avx2-rtm \ + strcat-avx2-rtm \ + strcpy-avx2-rtm \ + strlen-avx2-rtm \ + strncat-avx2-rtm \ + strncmp-avx2-rtm \ + strncpy-avx2-rtm \ + strnlen-avx2-rtm \ + strrchr-avx2-rtm \ + memchr-evex \ + memcmp-evex-movbe \ + memmove-evex-unaligned-erms \ + memrchr-evex \ + memset-evex-unaligned-erms \ + rawmemchr-evex \ + stpcpy-evex \ + stpncpy-evex \ + strcat-evex \ + strchr-evex \ + strchrnul-evex \ + strcmp-evex \ + strcpy-evex \ + strlen-evex \ + strncat-evex \ + strncmp-evex \ + strncpy-evex \ + strnlen-evex \ + strrchr-evex CFLAGS-varshift.c += -msse4 CFLAGS-strcspn-c.c += -msse4 CFLAGS-strpbrk-c.c += -msse4 @@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ wcscpy-ssse3 wcscpy-c \ wcschr-sse2 wcschr-avx2 \ wcsrchr-sse2 wcsrchr-avx2 \ - wcsnlen-sse4_1 wcsnlen-c \ - wcslen-sse2 wcslen-avx2 wcsnlen-avx2 + wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ + wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ + wcschr-avx2-rtm \ + wcscmp-avx2-rtm \ + wcslen-avx2-rtm \ + wcsncmp-avx2-rtm \ + wcsnlen-avx2-rtm \ + wcsrchr-avx2-rtm \ + wmemchr-avx2-rtm \ + wmemcmp-avx2-movbe-rtm \ + wcschr-evex \ + wcscmp-evex \ + wcslen-evex \ + wcsncmp-evex \ + wcsnlen-evex \ + wcsrchr-evex \ + wmemchr-evex \ + wmemcmp-evex-movbe endif ifeq ($(subdir),debug) diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h index 69f30398a..925e5b61e 100644 --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h @@ -21,16 +21,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } return OPTIMIZE (sse2); } diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c index ce7eb1eec..e712b148f 100644 --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c @@ -41,8 +41,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memchr.c. */ IFUNC_IMPL (i, name, memchr, IFUNC_IMPL_ADD (array, i, memchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __memchr_avx2) + IFUNC_IMPL_ADD (array, i, memchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __memchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, memchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __memchr_evex) IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2)) /* Support sysdeps/x86_64/multiarch/memcmp.c. */ @@ -51,6 +62,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (HAS_ARCH_FEATURE (AVX2_Usable) && HAS_CPU_FEATURE (MOVBE)), __memcmp_avx2_movbe) + IFUNC_IMPL_ADD (array, i, memcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE) + && HAS_CPU_FEATURE (RTM)), + __memcmp_avx2_movbe_rtm) + IFUNC_IMPL_ADD (array, i, memcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __memcmp_evex_movbe) IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1), __memcmp_sse4_1) IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), @@ -64,10 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memmove_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -75,6 +96,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_ARCH_FEATURE (AVX_Usable), __memmove_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_CPU_FEATURE (SSSE3), __memmove_chk_ssse3_back) @@ -97,14 +132,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX_Usable), __memmove_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memmove, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memmove_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, memmove, HAS_ARCH_FEATURE (AVX512F_Usable), __memmove_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memmove, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memmove, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memmove_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), __memmove_ssse3_back) @@ -119,8 +168,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/memrchr.c. */ IFUNC_IMPL (i, name, memrchr, IFUNC_IMPL_ADD (array, i, memrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __memrchr_avx2) + IFUNC_IMPL_ADD (array, i, memrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __memrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, memrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __memrchr_evex) + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) #ifdef SHARED @@ -139,10 +200,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __memset_chk_avx2_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_chk_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_chk_avx2_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memset_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -164,10 +243,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __memset_avx2_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __memset_avx2_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), __memset_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memset, HAS_ARCH_FEATURE (AVX512F_Usable), @@ -177,22 +274,55 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */ IFUNC_IMPL (i, name, rawmemchr, IFUNC_IMPL_ADD (array, i, rawmemchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __rawmemchr_avx2) + IFUNC_IMPL_ADD (array, i, rawmemchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __rawmemchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, rawmemchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __rawmemchr_evex) IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) /* Support sysdeps/x86_64/multiarch/strlen.c. */ IFUNC_IMPL (i, name, strlen, IFUNC_IMPL_ADD (array, i, strlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strlen_avx2) + IFUNC_IMPL_ADD (array, i, strlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strlen_evex) IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) /* Support sysdeps/x86_64/multiarch/strnlen.c. */ IFUNC_IMPL (i, name, strnlen, IFUNC_IMPL_ADD (array, i, strnlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strnlen_avx2) + IFUNC_IMPL_ADD (array, i, strnlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strnlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strnlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strnlen_evex) IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ @@ -201,6 +331,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpncpy_ssse3) IFUNC_IMPL_ADD (array, i, stpncpy, HAS_ARCH_FEATURE (AVX2_Usable), __stpncpy_avx2) + IFUNC_IMPL_ADD (array, i, stpncpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __stpncpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, stpncpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __stpncpy_evex) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) @@ -211,6 +349,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __stpcpy_ssse3) IFUNC_IMPL_ADD (array, i, stpcpy, HAS_ARCH_FEATURE (AVX2_Usable), __stpcpy_avx2) + IFUNC_IMPL_ADD (array, i, stpcpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __stpcpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, stpcpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __stpcpy_evex) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2)) @@ -245,6 +391,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcat, IFUNC_IMPL_ADD (array, i, strcat, HAS_ARCH_FEATURE (AVX2_Usable), __strcat_avx2) + IFUNC_IMPL_ADD (array, i, strcat, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcat_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcat, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strcat_evex) IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), __strcat_ssse3) IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned) @@ -253,23 +407,56 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/strchr.c. */ IFUNC_IMPL (i, name, strchr, IFUNC_IMPL_ADD (array, i, strchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strchr_avx2) + IFUNC_IMPL_ADD (array, i, strchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strchr_evex) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ IFUNC_IMPL (i, name, strchrnul, IFUNC_IMPL_ADD (array, i, strchrnul, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strchrnul_avx2) + IFUNC_IMPL_ADD (array, i, strchrnul, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strchrnul_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strchrnul, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strchrnul_evex) IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) /* Support sysdeps/x86_64/multiarch/strrchr.c. */ IFUNC_IMPL (i, name, strrchr, IFUNC_IMPL_ADD (array, i, strrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __strrchr_avx2) + IFUNC_IMPL_ADD (array, i, strrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __strrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strrchr_evex) IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) /* Support sysdeps/x86_64/multiarch/strcmp.c. */ @@ -277,6 +464,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strcmp, HAS_ARCH_FEATURE (AVX2_Usable), __strcmp_avx2) + IFUNC_IMPL_ADD (array, i, strcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __strcmp_evex) IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), __strcmp_sse42) IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), @@ -288,6 +484,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strcpy, IFUNC_IMPL_ADD (array, i, strcpy, HAS_ARCH_FEATURE (AVX2_Usable), __strcpy_avx2) + IFUNC_IMPL_ADD (array, i, strcpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strcpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strcpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strcpy_evex) IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), __strcpy_ssse3) IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned) @@ -331,6 +535,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strncat, IFUNC_IMPL_ADD (array, i, strncat, HAS_ARCH_FEATURE (AVX2_Usable), __strncat_avx2) + IFUNC_IMPL_ADD (array, i, strncat, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncat_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncat, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncat_evex) IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), __strncat_ssse3) IFUNC_IMPL_ADD (array, i, strncat, 1, @@ -341,6 +553,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL (i, name, strncpy, IFUNC_IMPL_ADD (array, i, strncpy, HAS_ARCH_FEATURE (AVX2_Usable), __strncpy_avx2) + IFUNC_IMPL_ADD (array, i, strncpy, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncpy_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncpy, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncpy_evex) IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), __strncpy_ssse3) IFUNC_IMPL_ADD (array, i, strncpy, 1, @@ -368,29 +588,73 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcschr.c. */ IFUNC_IMPL (i, name, wcschr, IFUNC_IMPL_ADD (array, i, wcschr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcschr_avx2) + IFUNC_IMPL_ADD (array, i, wcschr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcschr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcschr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcschr_evex) IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ IFUNC_IMPL (i, name, wcsrchr, IFUNC_IMPL_ADD (array, i, wcsrchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsrchr_avx2) + IFUNC_IMPL_ADD (array, i, wcsrchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsrchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsrchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsrchr_evex) IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ IFUNC_IMPL (i, name, wcscmp, IFUNC_IMPL_ADD (array, i, wcscmp, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcscmp_avx2) + IFUNC_IMPL_ADD (array, i, wcscmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcscmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcscmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcscmp_evex) IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ IFUNC_IMPL (i, name, wcsncmp, IFUNC_IMPL_ADD (array, i, wcsncmp, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsncmp_avx2) + IFUNC_IMPL_ADD (array, i, wcsncmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsncmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsncmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsncmp_evex) IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ @@ -402,15 +666,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wcslen.c. */ IFUNC_IMPL (i, name, wcslen, IFUNC_IMPL_ADD (array, i, wcslen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcslen_avx2) + IFUNC_IMPL_ADD (array, i, wcslen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcslen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcslen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcslen_evex) + IFUNC_IMPL_ADD (array, i, wcslen, + CPU_FEATURE_USABLE (SSE4_1), + __wcslen_sse4_1) IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ IFUNC_IMPL (i, name, wcsnlen, IFUNC_IMPL_ADD (array, i, wcsnlen, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wcsnlen_avx2) + IFUNC_IMPL_ADD (array, i, wcsnlen, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wcsnlen_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wcsnlen, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wcsnlen_evex) IFUNC_IMPL_ADD (array, i, wcsnlen, HAS_CPU_FEATURE (SSE4_1), __wcsnlen_sse4_1) @@ -419,8 +708,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ IFUNC_IMPL (i, name, wmemchr, IFUNC_IMPL_ADD (array, i, wmemchr, - HAS_ARCH_FEATURE (AVX2_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2)), __wmemchr_avx2) + IFUNC_IMPL_ADD (array, i, wmemchr, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (BMI2) + && HAS_CPU_FEATURE (RTM)), + __wmemchr_avx2_rtm) + IFUNC_IMPL_ADD (array, i, wmemchr, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (BMI2)), + __wmemchr_evex) IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2)) /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */ @@ -429,6 +729,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, (HAS_ARCH_FEATURE (AVX2_Usable) && HAS_CPU_FEATURE (MOVBE)), __wmemcmp_avx2_movbe) + IFUNC_IMPL_ADD (array, i, wmemcmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE) + && HAS_CPU_FEATURE (RTM)), + __wmemcmp_avx2_movbe_rtm) + IFUNC_IMPL_ADD (array, i, wmemcmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __wmemcmp_evex_movbe) IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1), __wmemcmp_sse4_1) IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), @@ -443,7 +753,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX2_Usable), __wmemset_avx2_unaligned) IFUNC_IMPL_ADD (array, i, wmemset, - HAS_ARCH_FEATURE (AVX512F_Usable), + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __wmemset_avx2_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, wmemset, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __wmemset_evex_unaligned) + IFUNC_IMPL_ADD (array, i, wmemset, + HAS_ARCH_FEATURE (AVX512VL_Usable), __wmemset_avx512_unaligned)) #ifdef SHARED @@ -453,10 +770,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __memcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -464,6 +781,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __memcpy_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_CPU_FEATURE (SSSE3), __memcpy_chk_ssse3_back) @@ -486,6 +817,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, HAS_ARCH_FEATURE (AVX_Usable), __memcpy_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, memcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __memcpy_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_evex_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), __memcpy_ssse3_back) IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), @@ -494,10 +839,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __memcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, memcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_avx512_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __memcpy_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) IFUNC_IMPL_ADD (array, i, memcpy, 1, @@ -511,10 +856,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_chk_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_chk_avx512_unaligned) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_chk_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), @@ -522,6 +867,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_chk_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_chk_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_chk_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_CPU_FEATURE (SSSE3), __mempcpy_chk_ssse3_back) @@ -542,10 +901,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, HAS_ARCH_FEATURE (AVX512F_Usable), __mempcpy_avx512_no_vzeroupper) IFUNC_IMPL_ADD (array, i, mempcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_avx512_unaligned) IFUNC_IMPL_ADD (array, i, mempcpy, - HAS_ARCH_FEATURE (AVX512F_Usable), + HAS_ARCH_FEATURE (AVX512VL_Usable), __mempcpy_avx512_unaligned_erms) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), @@ -553,6 +912,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, mempcpy, HAS_ARCH_FEATURE (AVX_Usable), __mempcpy_avx_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_avx_unaligned_rtm) + IFUNC_IMPL_ADD (array, i, mempcpy, + (HAS_ARCH_FEATURE (AVX_Usable) + && HAS_CPU_FEATURE (RTM)), + __mempcpy_avx_unaligned_erms_rtm) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_evex_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_evex_unaligned_erms) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), __mempcpy_ssse3_back) IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), @@ -568,6 +941,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, strncmp, HAS_ARCH_FEATURE (AVX2_Usable), __strncmp_avx2) + IFUNC_IMPL_ADD (array, i, strncmp, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (RTM)), + __strncmp_avx2_rtm) + IFUNC_IMPL_ADD (array, i, strncmp, + (HAS_ARCH_FEATURE (AVX512VL_Usable) + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __strncmp_evex) IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), __strncmp_sse42) IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), @@ -582,6 +963,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, __wmemset_chk, HAS_ARCH_FEATURE (AVX2_Usable), __wmemset_chk_avx2_unaligned) + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + HAS_ARCH_FEATURE (AVX512VL_Usable), + __wmemset_chk_evex_unaligned) IFUNC_IMPL_ADD (array, i, __wmemset_chk, HAS_ARCH_FEATURE (AVX512F_Usable), __wmemset_chk_avx512_unaligned)) diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h index c14db39cf..ebbb0c01c 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h +++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h @@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_CPU_P (cpu_features, MOVBE) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2_movbe); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex_movbe); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_movbe_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2_movbe); + } if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) return OPTIMIZE (sse4_1); diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h index 81673d201..dfc5a2848 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memmove.h +++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h @@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) @@ -48,21 +56,42 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) { - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_no_vzeroupper); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx512_unaligned_erms); + return OPTIMIZE (avx512_unaligned); + } - return OPTIMIZE (avx512_unaligned); + return OPTIMIZE (avx512_no_vzeroupper); } if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) { - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx_unaligned_erms); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (evex_unaligned_erms); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx_unaligned_erms_rtm); + + return OPTIMIZE (avx_unaligned_rtm); + } + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx_unaligned_erms); - return OPTIMIZE (avx_unaligned); + return OPTIMIZE (avx_unaligned); + } } if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h index d69029338..48fdb24b0 100644 --- a/sysdeps/x86_64/multiarch/ifunc-memset.h +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h @@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) @@ -45,21 +53,44 @@ IFUNC_SELECTOR (void) if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) { - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) - return OPTIMIZE (avx512_no_vzeroupper); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx512_unaligned_erms); - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx512_unaligned_erms); + return OPTIMIZE (avx512_unaligned); + } - return OPTIMIZE (avx512_unaligned); + return OPTIMIZE (avx512_no_vzeroupper); } if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) { - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) - return OPTIMIZE (avx2_unaligned_erms); - else - return OPTIMIZE (avx2_unaligned); + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (evex_unaligned_erms); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms_rtm); + + return OPTIMIZE (avx2_unaligned_rtm); + } + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE (avx2_unaligned_erms); + + return OPTIMIZE (avx2_unaligned); + } } if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h index ae4f45180..f38a3b750 100644 --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h @@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h new file mode 100644 index 000000000..564cc8cbe --- /dev/null +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h @@ -0,0 +1,52 @@ +/* Common definition for ifunc selections for wcslen and wcsnlen + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + +static inline void * +IFUNC_SELECTOR (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4_1); + + return OPTIMIZE (sse2); +} diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h index 583f6310a..0ce29a229 100644 --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h @@ -20,6 +20,9 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; static inline void * @@ -27,14 +30,21 @@ IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) { - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) - return OPTIMIZE (avx512_unaligned); - else + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + { + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + return OPTIMIZE (avx512_unaligned); + + return OPTIMIZE (evex_unaligned); + } + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_unaligned_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) return OPTIMIZE (avx2_unaligned); } diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S new file mode 100644 index 000000000..87b076c7c --- /dev/null +++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMCHR +# define MEMCHR __memchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S index e5a9abd21..0987616a1 100644 --- a/sysdeps/x86_64/multiarch/memchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S @@ -26,319 +26,407 @@ # ifdef USE_AS_WMEMCHR # define VPCMPEQ vpcmpeqd +# define VPBROADCAST vpbroadcastd +# define CHAR_SIZE 4 # else # define VPCMPEQ vpcmpeqb +# define VPBROADCAST vpbroadcastb +# define CHAR_SIZE 1 +# endif + +# ifdef USE_AS_RAWMEMCHR +# define ERAW_PTR_REG ecx +# define RRAW_PTR_REG rcx +# define ALGN_PTR_REG rdi +# else +# define ERAW_PTR_REG edi +# define RRAW_PTR_REG rdi +# define ALGN_PTR_REG rcx # endif # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (MEMCHR) # ifndef USE_AS_RAWMEMCHR /* Check for zero length. */ +# ifdef __ILP32__ + /* Clear upper bits. */ + and %RDX_LP, %RDX_LP +# else test %RDX_LP, %RDX_LP +# endif jz L(null) # endif - movl %edi, %ecx - /* Broadcast CHAR to YMM0. */ + /* Broadcast CHAR to YMMMATCH. */ vmovd %esi, %xmm0 -# ifdef USE_AS_WMEMCHR - shl $2, %RDX_LP - vpbroadcastd %xmm0, %ymm0 -# else -# ifdef __ILP32__ - /* Clear the upper 32 bits. */ - movl %edx, %edx -# endif - vpbroadcastb %xmm0, %ymm0 -# endif + VPBROADCAST %xmm0, %ymm0 /* Check if we may cross page boundary with one vector load. */ - andl $(2 * VEC_SIZE - 1), %ecx - cmpl $VEC_SIZE, %ecx - ja L(cros_page_boundary) + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) /* Check the first VEC_SIZE bytes. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 + VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - # ifndef USE_AS_RAWMEMCHR - jnz L(first_vec_x0_check) - /* Adjust length and check the end of data. */ - subq $VEC_SIZE, %rdx - jbe L(zero) -# else - jnz L(first_vec_x0) + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rdx + jbe L(first_vec_x0) # endif - - /* Align data for aligned loads in the loop. */ - addq $VEC_SIZE, %rdi - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax + addq %rdi, %rax + VZEROUPPER_RETURN # ifndef USE_AS_RAWMEMCHR - /* Adjust length. */ - addq %rcx, %rdx + .p2align 5 +L(first_vec_x0): + /* Check if first match was before length. */ + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + xorl %ecx, %ecx + cmpl %eax, %edx + leaq (%rdi, %rax), %rax + cmovle %rcx, %rax + VZEROUPPER_RETURN - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) +L(null): + xorl %eax, %eax + ret # endif - jmp L(more_4x_vec) - .p2align 4 -L(cros_page_boundary): - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 +L(cross_page_boundary): + /* Save pointer before aligning as its original value is + necessary for computer return address if byte is found or + adjusting length if it is not and this is memchr. */ + movq %rdi, %rcx + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr + and rdi for rawmemchr. */ + orq $(VEC_SIZE - 1), %ALGN_PTR_REG + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 vpmovmskb %ymm1, %eax +# ifndef USE_AS_RAWMEMCHR + /* Calculate length until end of page (length checked for a + match). */ + leaq 1(%ALGN_PTR_REG), %rsi + subq %RRAW_PTR_REG, %rsi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %esi +# endif +# endif /* Remove the leading bytes. */ - sarl %cl, %eax - testl %eax, %eax - jz L(aligned_more) - tzcntl %eax, %eax + sarxl %ERAW_PTR_REG, %eax, %eax # ifndef USE_AS_RAWMEMCHR /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) + cmpq %rsi, %rdx + jbe L(first_vec_x0) # endif + testl %eax, %eax + jz L(cross_page_continue) + tzcntl %eax, %eax + addq %RRAW_PTR_REG, %rax +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + incq %rdi addq %rdi, %rax - addq %rcx, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(aligned_more): -# ifndef USE_AS_RAWMEMCHR - /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)" - instead of "(rdx + rcx) - VEC_SIZE" to void possible addition - overflow. */ - negq %rcx - addq $VEC_SIZE, %rcx +L(first_vec_x2): + tzcntl %eax, %eax + addq $(VEC_SIZE + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN - /* Check the end of data. */ - subq %rcx, %rdx - jbe L(zero) -# endif + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + addq $(VEC_SIZE * 2 + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN - addq $VEC_SIZE, %rdi -# ifndef USE_AS_RAWMEMCHR - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) -# endif + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + addq $(VEC_SIZE * 3 + 1), %rdi + addq %rdi, %rax + VZEROUPPER_RETURN -L(more_4x_vec): + .p2align 4 +L(aligned_more): /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time since data is only aligned to VEC_SIZE. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 +# ifndef USE_AS_RAWMEMCHR +L(cross_page_continue): + /* Align data to VEC_SIZE - 1. */ + xorl %ecx, %ecx + subl %edi, %ecx + orq $(VEC_SIZE - 1), %rdi + /* esi is for adjusting length to see if near the end. */ + leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %esi +# endif +# else + orq $(VEC_SIZE - 1), %rdi +L(cross_page_continue): +# endif + /* Load first VEC regardless. */ + VPCMPEQ 1(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax +# ifndef USE_AS_RAWMEMCHR + /* Adjust length. If near end handle specially. */ + subq %rsi, %rdx + jbe L(last_4x_vec_or_less) +# endif testl %eax, %eax jnz L(first_vec_x1) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x2) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x3) - addq $(VEC_SIZE * 4), %rdi - -# ifndef USE_AS_RAWMEMCHR - subq $(VEC_SIZE * 4), %rdx - jbe L(last_4x_vec_or_less) -# endif - - /* Align data to 4 * VEC_SIZE. */ - movq %rdi, %rcx - andl $(4 * VEC_SIZE - 1), %ecx - andq $-(4 * VEC_SIZE), %rdi + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x4) # ifndef USE_AS_RAWMEMCHR - /* Adjust length. */ + /* Check if at last VEC_SIZE * 4 length. */ + subq $(CHAR_PER_VEC * 4), %rdx + jbe L(last_4x_vec_or_less_cmpeq) + /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust + length. */ + incq %rdi + movl %edi, %ecx + orq $(VEC_SIZE * 4 - 1), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif addq %rcx, %rdx +# else + /* Align data to VEC_SIZE * 4 - 1 for loop. */ + incq %rdi + orq $(VEC_SIZE * 4 - 1), %rdi # endif + /* Compare 4 * VEC at a time forward. */ .p2align 4 L(loop_4x_vec): - /* Compare 4 * VEC at a time forward. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2 - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3 - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4 - + VPCMPEQ 1(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3 + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4 vpor %ymm1, %ymm2, %ymm5 vpor %ymm3, %ymm4, %ymm6 vpor %ymm5, %ymm6, %ymm5 - vpmovmskb %ymm5, %eax - testl %eax, %eax - jnz L(4x_vec_end) - - addq $(VEC_SIZE * 4), %rdi - + vpmovmskb %ymm5, %ecx # ifdef USE_AS_RAWMEMCHR - jmp L(loop_4x_vec) + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) # else - subq $(VEC_SIZE * 4), %rdx - ja L(loop_4x_vec) + testl %ecx, %ecx + jnz L(loop_4x_vec_end) -L(last_4x_vec_or_less): - /* Less than 4 * VEC and aligned to VEC_SIZE. */ - addl $(VEC_SIZE * 2), %edx - jle L(last_2x_vec) + subq $-(VEC_SIZE * 4), %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) + subq $(CHAR_PER_VEC * 4), %rdx + ja L(loop_4x_vec) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + /* Fall through into less than 4 remaining vectors of length + case. */ + VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + .p2align 4 +L(last_4x_vec_or_less): +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + /* Check if first VEC contained match. */ testl %eax, %eax - jnz L(first_vec_x1) + jnz L(first_vec_x1_check) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax + /* If remaining length > VEC_SIZE * 2. */ + addl $(VEC_SIZE * 2), %edx + jg L(last_4x_vec) - jnz L(first_vec_x2_check) - subl $VEC_SIZE, %edx - jle L(zero) +L(last_2x_vec): + /* If remaining length < VEC_SIZE. */ + addl $VEC_SIZE, %edx + jle L(zero_end) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + /* Check VEC2 and compare any match with remaining length. */ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - - jnz L(first_vec_x3_check) - xorl %eax, %eax - VZEROUPPER - ret + tzcntl %eax, %eax + cmpl %eax, %edx + jbe L(set_zero_end) + addq $(VEC_SIZE + 1), %rdi + addq %rdi, %rax +L(zero_end): + VZEROUPPER_RETURN .p2align 4 -L(last_2x_vec): - addl $(VEC_SIZE * 2), %edx - VPCMPEQ (%rdi), %ymm0, %ymm1 +L(loop_4x_vec_end): +# endif + /* rawmemchr will fall through into this if match was found in + loop. */ + vpmovmskb %ymm1, %eax testl %eax, %eax + jnz L(last_vec_x1_return) - jnz L(first_vec_x0_check) - subl $VEC_SIZE, %edx - jle L(zero) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax + vpmovmskb %ymm2, %eax testl %eax, %eax - jnz L(first_vec_x1_check) - xorl %eax, %eax - VZEROUPPER - ret + jnz L(last_vec_x2_return) - .p2align 4 -L(first_vec_x0_check): - tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) + vpmovmskb %ymm3, %eax + /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */ + salq $32, %rcx + orq %rcx, %rax + tzcntq %rax, %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 2 - 1), %rdi +# else + subq $-(VEC_SIZE * 2 + 1), %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# ifndef USE_AS_RAWMEMCHR .p2align 4 L(first_vec_x1_check): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $VEC_SIZE, %rax + /* Adjust length. */ + subl $-(VEC_SIZE * 4), %edx + /* Check if match within remaining length. */ + cmpl %eax, %edx + jbe L(set_zero_end) + incq %rdi addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN + .p2align 4 +L(set_zero_end): + xorl %eax, %eax + VZEROUPPER_RETURN +# endif .p2align 4 -L(first_vec_x2_check): +L(last_vec_x1_return): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $(VEC_SIZE * 2), %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 4 - 1), %rdi +# else + incq %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x3_check): +L(last_vec_x2_return): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rdx - jbe L(zero) - addq $(VEC_SIZE * 3), %rax +# ifdef USE_AS_RAWMEMCHR + subq $(VEC_SIZE * 3 - 1), %rdi +# else + subq $-(VEC_SIZE + 1), %rdi +# endif addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# ifndef USE_AS_RAWMEMCHR .p2align 4 -L(zero): - VZEROUPPER -L(null): - xorl %eax, %eax - ret -# endif +L(last_4x_vec_or_less_cmpeq): + VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %edx +# endif + subq $-(VEC_SIZE * 4), %rdi + /* Check first VEC regardless. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + /* If remaining length <= CHAR_PER_VEC * 2. */ + addl $(VEC_SIZE * 2), %edx + jle L(last_2x_vec) .p2align 4 -L(first_vec_x0): - tzcntl %eax, %eax - addq %rdi, %rax - VZEROUPPER - ret +L(last_4x_vec): + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x2_return) - .p2align 4 -L(first_vec_x1): - tzcntl %eax, %eax - addq $VEC_SIZE, %rax - addq %rdi, %rax - VZEROUPPER - ret + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax - .p2align 4 -L(first_vec_x2): + /* Create mask for possible matches within remaining length. */ + movq $-1, %rcx + bzhiq %rdx, %rcx, %rcx + + /* Test matches in data against length match. */ + andl %ecx, %eax + jnz L(last_vec_x3) + + /* if remaining length <= VEC_SIZE * 3 (Note this is after + remaining length was found to be > VEC_SIZE * 2. */ + subl $VEC_SIZE, %edx + jbe L(zero_end2) + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + /* Shift remaining length mask for last VEC. */ + shrq $32, %rcx + andl %ecx, %eax + jz L(zero_end2) tzcntl %eax, %eax - addq $(VEC_SIZE * 2), %rax + addq $(VEC_SIZE * 3 + 1), %rdi addq %rdi, %rax - VZEROUPPER - ret +L(zero_end2): + VZEROUPPER_RETURN .p2align 4 -L(4x_vec_end): - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - vpmovmskb %ymm2, %eax - testl %eax, %eax - jnz L(first_vec_x1) - vpmovmskb %ymm3, %eax - testl %eax, %eax - jnz L(first_vec_x2) - vpmovmskb %ymm4, %eax - testl %eax, %eax -L(first_vec_x3): +L(last_vec_x3): tzcntl %eax, %eax - addq $(VEC_SIZE * 3), %rax + subq $-(VEC_SIZE * 2 + 1), %rdi addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN +# endif END (MEMCHR) #endif diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S new file mode 100644 index 000000000..f3fdad4fd --- /dev/null +++ b/sysdeps/x86_64/multiarch/memchr-evex.S @@ -0,0 +1,478 @@ +/* memchr/wmemchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef MEMCHR +# define MEMCHR __memchr_evex +# endif + +# ifdef USE_AS_WMEMCHR +# define VPBROADCAST vpbroadcastd +# define VPMINU vpminud +# define VPCMP vpcmpd +# define VPCMPEQ vpcmpeqd +# define CHAR_SIZE 4 +# else +# define VPBROADCAST vpbroadcastb +# define VPMINU vpminub +# define VPCMP vpcmpb +# define VPCMPEQ vpcmpeqb +# define CHAR_SIZE 1 +# endif + +# ifdef USE_AS_RAWMEMCHR +# define RAW_PTR_REG rcx +# define ALGN_PTR_REG rdi +# else +# define RAW_PTR_REG rdi +# define ALGN_PTR_REG rcx +# endif + +# define XMMZERO xmm23 +# define YMMZERO ymm23 +# define XMMMATCH xmm16 +# define YMMMATCH ymm16 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) +# define PAGE_SIZE 4096 + + .section .text.evex,"ax",@progbits +ENTRY (MEMCHR) +# ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ + test %RDX_LP, %RDX_LP + jz L(zero) + +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif +# endif + /* Broadcast CHAR to YMMMATCH. */ + VPBROADCAST %esi, %YMMMATCH + /* Check if we may cross page boundary with one vector load. */ + movl %edi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. */ + VPCMP $0, (%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax +# ifndef USE_AS_RAWMEMCHR + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rdx + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%rdi, %rax, CHAR_SIZE), %rax +# else + addq %rdi, %rax +# endif + ret + +# ifndef USE_AS_RAWMEMCHR +L(zero): + xorl %eax, %eax + ret + + .p2align 5 +L(first_vec_x0): + /* Check if first match was before length. */ + tzcntl %eax, %eax + xorl %ecx, %ecx + cmpl %eax, %edx + leaq (%rdi, %rax, CHAR_SIZE), %rax + cmovle %rcx, %rax + ret +# else + /* NB: first_vec_x0 is 17 bytes which will leave + cross_page_boundary (which is relatively cold) close enough + to ideal alignment. So only realign L(cross_page_boundary) if + rawmemchr. */ + .p2align 4 +# endif +L(cross_page_boundary): + /* Save pointer before aligning as its original value is + necessary for computer return address if byte is found or + adjusting length if it is not and this is memchr. */ + movq %rdi, %rcx + /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi + for rawmemchr. */ + andq $-VEC_SIZE, %ALGN_PTR_REG + VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0 + kmovd %k0, %r8d +# ifdef USE_AS_WMEMCHR + /* NB: Divide shift count by 4 since each bit in K0 represent 4 + bytes. */ + sarl $2, %eax +# endif +# ifndef USE_AS_RAWMEMCHR + movl $(PAGE_SIZE / CHAR_SIZE), %esi + subl %eax, %esi +# endif +# ifdef USE_AS_WMEMCHR + andl $(CHAR_PER_VEC - 1), %eax +# endif + /* Remove the leading bytes. */ + sarxl %eax, %r8d, %eax +# ifndef USE_AS_RAWMEMCHR + /* Check the end of data. */ + cmpq %rsi, %rdx + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(cross_page_continue) + tzcntl %eax, %eax +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax +# else + addq %RAW_PTR_REG, %rax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 5 +L(aligned_more): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + +# ifndef USE_AS_RAWMEMCHR + /* Align data to VEC_SIZE. */ +L(cross_page_continue): + xorl %ecx, %ecx + subl %edi, %ecx + andq $-VEC_SIZE, %rdi + /* esi is for adjusting length to see if near the end. */ + leal (VEC_SIZE * 5)(%rdi, %rcx), %esi +# ifdef USE_AS_WMEMCHR + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %esi +# endif +# else + andq $-VEC_SIZE, %rdi +L(cross_page_continue): +# endif + /* Load first VEC regardless. */ + VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax +# ifndef USE_AS_RAWMEMCHR + /* Adjust length. If near end handle specially. */ + subq %rsi, %rdx + jbe L(last_4x_vec_or_less) +# endif + testl %eax, %eax + jnz L(first_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x2) + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x3) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x4) + + +# ifndef USE_AS_RAWMEMCHR + /* Check if at last CHAR_PER_VEC * 4 length. */ + subq $(CHAR_PER_VEC * 4), %rdx + jbe L(last_4x_vec_or_less_cmpeq) + addq $VEC_SIZE, %rdi + + /* Align data to VEC_SIZE * 4 for the loop and readjust length. + */ +# ifdef USE_AS_WMEMCHR + movl %edi, %ecx + andq $-(4 * VEC_SIZE), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx + addq %rcx, %rdx +# else + addq %rdi, %rdx + andq $-(4 * VEC_SIZE), %rdi + subq %rdi, %rdx +# endif +# else + addq $VEC_SIZE, %rdi + andq $-(4 * VEC_SIZE), %rdi +# endif + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Compare 4 * VEC at a time forward. */ + .p2align 4 +L(loop_4x_vec): + /* It would be possible to save some instructions using 4x VPCMP + but bottleneck on port 5 makes it not woth it. */ + VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1 + /* xor will set bytes match esi to zero. */ + vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2 + vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3 + VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3 + /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */ + VPMINU %YMM2, %YMM3, %YMM3{%k1}{z} + VPCMP $0, %YMM3, %YMMZERO, %k2 +# ifdef USE_AS_RAWMEMCHR + subq $-(VEC_SIZE * 4), %rdi + kortestd %k2, %k3 + jz L(loop_4x_vec) +# else + kortestd %k2, %k3 + jnz L(loop_4x_vec_end) + + subq $-(VEC_SIZE * 4), %rdi + + subq $(CHAR_PER_VEC * 4), %rdx + ja L(loop_4x_vec) + + /* Fall through into less than 4 remaining vectors of length case. + */ + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + addq $(VEC_SIZE * 3), %rdi + .p2align 4 +L(last_4x_vec_or_less): + /* Check if first VEC contained match. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + + /* If remaining length > CHAR_PER_VEC * 2. */ + addl $(CHAR_PER_VEC * 2), %edx + jg L(last_4x_vec) + +L(last_2x_vec): + /* If remaining length < CHAR_PER_VEC. */ + addl $CHAR_PER_VEC, %edx + jle L(zero_end) + + /* Check VEC2 and compare any match with remaining length. */ + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + cmpl %eax, %edx + jbe L(set_zero_end) + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax +L(zero_end): + ret + + + .p2align 4 +L(first_vec_x1_check): + tzcntl %eax, %eax + /* Adjust length. */ + subl $-(CHAR_PER_VEC * 4), %edx + /* Check if match within remaining length. */ + cmpl %eax, %edx + jbe L(set_zero_end) + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + ret +L(set_zero_end): + xorl %eax, %eax + ret + + .p2align 4 +L(loop_4x_vec_end): +# endif + /* rawmemchr will fall through into this if match was found in + loop. */ + + /* k1 has not of matches with VEC1. */ + kmovd %k1, %eax +# ifdef USE_AS_WMEMCHR + subl $((1 << CHAR_PER_VEC) - 1), %eax +# else + incl %eax +# endif + jnz L(last_vec_x1_return) + + VPCMP $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2_return) + + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x3_return) + + kmovd %k3, %eax + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax +# else + leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x1_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR +# ifdef USE_AS_WMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (%rdi, %rax, CHAR_SIZE), %rax +# else + addq %rdi, %rax +# endif +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x2_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + .p2align 4 +L(last_vec_x3_return): + tzcntl %eax, %eax +# ifdef USE_AS_RAWMEMCHR + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax +# else + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax +# endif + ret + + +# ifndef USE_AS_RAWMEMCHR +L(last_4x_vec_or_less_cmpeq): + VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + subq $-(VEC_SIZE * 4), %rdi + /* Check first VEC regardless. */ + testl %eax, %eax + jnz L(first_vec_x1_check) + + /* If remaining length <= CHAR_PER_VEC * 2. */ + addl $(CHAR_PER_VEC * 2), %edx + jle L(last_2x_vec) + + .p2align 4 +L(last_4x_vec): + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + /* Create mask for possible matches within remaining length. */ +# ifdef USE_AS_WMEMCHR + movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx + bzhil %edx, %ecx, %ecx +# else + movq $-1, %rcx + bzhiq %rdx, %rcx, %rcx +# endif + /* Test matches in data against length match. */ + andl %ecx, %eax + jnz L(last_vec_x3) + + /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after + remaining length was found to be > CHAR_PER_VEC * 2. */ + subl $CHAR_PER_VEC, %edx + jbe L(zero_end2) + + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + kmovd %k0, %eax + /* Shift remaining length mask for last VEC. */ +# ifdef USE_AS_WMEMCHR + shrl $CHAR_PER_VEC, %ecx +# else + shrq $CHAR_PER_VEC, %rcx +# endif + andl %ecx, %eax + jz L(zero_end2) + tzcntl %eax, %eax + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax +L(zero_end2): + ret + +L(last_vec_x2): + tzcntl %eax, %eax + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + ret + + .p2align 4 +L(last_vec_x3): + tzcntl %eax, %eax + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + ret +# endif + +END (MEMCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S new file mode 100644 index 000000000..cf4eff5d4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMCMP +# define MEMCMP __memcmp_avx2_movbe_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memcmp-avx2-movbe.S" diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S index 67fc575b5..87f9478ea 100644 --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S @@ -47,6 +47,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 # define VEC_MASK ((1 << VEC_SIZE) - 1) @@ -55,7 +59,7 @@ memcmp has to use UNSIGNED comparison for elemnts. */ - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (MEMCMP) # ifdef USE_AS_WMEMCMP shl $2, %RDX_LP @@ -123,8 +127,8 @@ ENTRY (MEMCMP) vptest %ymm0, %ymm5 jnc L(4x_vec_end) xorl %eax, %eax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(last_2x_vec): @@ -144,8 +148,7 @@ L(last_vec): vpmovmskb %ymm2, %eax subl $VEC_MASK, %eax jnz L(first_vec) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec): @@ -164,8 +167,7 @@ L(wmemcmp_return): movzbl (%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_WMEMCMP .p2align 4 @@ -367,8 +369,7 @@ L(last_4x_vec): vpmovmskb %ymm2, %eax subl $VEC_MASK, %eax jnz L(first_vec) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(4x_vec_end): @@ -394,8 +395,7 @@ L(4x_vec_end): movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x1): @@ -410,8 +410,7 @@ L(first_vec_x1): movzbl VEC_SIZE(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x2): @@ -426,7 +425,6 @@ L(first_vec_x2): movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx sub %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN END (MEMCMP) #endif diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S new file mode 100644 index 000000000..9c093972e --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S @@ -0,0 +1,440 @@ +/* memcmp/wmemcmp optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +/* memcmp/wmemcmp is implemented as: + 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap + to avoid branches. + 2. Use overlapping compare to avoid branch. + 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8 + bytes for wmemcmp. + 4. If size is 8 * VEC_SIZE or less, unroll the loop. + 5. Compare 4 * VEC_SIZE at a time with the aligned first memory + area. + 6. Use 2 vector compares when size is 2 * VEC_SIZE or less. + 7. Use 4 vector compares when size is 4 * VEC_SIZE or less. + 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */ + +# include + +# ifndef MEMCMP +# define MEMCMP __memcmp_evex_movbe +# endif + +# define VMOVU vmovdqu64 + +# ifdef USE_AS_WMEMCMP +# define VPCMPEQ vpcmpeqd +# else +# define VPCMPEQ vpcmpeqb +# endif + +# define XMM1 xmm17 +# define XMM2 xmm18 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# ifdef USE_AS_WMEMCMP +# define VEC_MASK 0xff +# define XMM_MASK 0xf +# else +# define VEC_MASK 0xffffffff +# define XMM_MASK 0xffff +# endif + +/* Warning! + wmemcmp has to use SIGNED comparison for elements. + memcmp has to use UNSIGNED comparison for elemnts. +*/ + + .section .text.evex,"ax",@progbits +ENTRY (MEMCMP) +# ifdef USE_AS_WMEMCMP + shl $2, %RDX_LP +# elif defined __ILP32__ + /* Clear the upper 32 bits. */ + movl %edx, %edx +# endif + cmp $VEC_SIZE, %RDX_LP + jb L(less_vec) + + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k1 + kmovd %k1, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + cmpq $(VEC_SIZE * 2), %rdx + jbe L(last_vec) + + /* More than 2 * VEC. */ + cmpq $(VEC_SIZE * 8), %rdx + ja L(more_8x_vec) + cmpq $(VEC_SIZE * 4), %rdx + jb L(last_4x_vec) + + /* From 4 * VEC to 8 * VEC, inclusively. */ + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + + kandd %k1, %k2, %k5 + kandd %k3, %k4, %k6 + kandd %k5, %k6, %k6 + + kmovd %k6, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + + leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi + leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + kandd %k1, %k2, %k5 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + kandd %k3, %k5, %k5 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + kandd %k4, %k5, %k5 + + kmovd %k5, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + xorl %eax, %eax + ret + + .p2align 4 +L(last_2x_vec): + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + +L(last_vec): + /* Use overlapping loads to avoid branches. */ + leaq -VEC_SIZE(%rdi, %rdx), %rdi + leaq -VEC_SIZE(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(first_vec): + /* A byte or int32 is different within 16 or 32 bytes. */ + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (%rdi, %rcx, 4), %edx + cmpl (%rsi, %rcx, 4), %edx +L(wmemcmp_return): + setl %al + negl %eax + orl $1, %eax +# else + movzbl (%rdi, %rcx), %eax + movzbl (%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + +# ifdef USE_AS_WMEMCMP + .p2align 4 +L(4): + xorl %eax, %eax + movl (%rdi), %edx + cmpl (%rsi), %edx + jne L(wmemcmp_return) + ret +# else + .p2align 4 +L(between_4_7): + /* Load as big endian with overlapping movbe to avoid branches. */ + movbe (%rdi), %eax + movbe (%rsi), %ecx + shlq $32, %rax + shlq $32, %rcx + movbe -4(%rdi, %rdx), %edi + movbe -4(%rsi, %rdx), %esi + orq %rdi, %rax + orq %rsi, %rcx + subq %rcx, %rax + je L(exit) + sbbl %eax, %eax + orl $1, %eax + ret + + .p2align 4 +L(exit): + ret + + .p2align 4 +L(between_2_3): + /* Load as big endian to avoid branches. */ + movzwl (%rdi), %eax + movzwl (%rsi), %ecx + shll $8, %eax + shll $8, %ecx + bswap %eax + bswap %ecx + movb -1(%rdi, %rdx), %al + movb -1(%rsi, %rdx), %cl + /* Subtraction is okay because the upper 8 bits are zero. */ + subl %ecx, %eax + ret + + .p2align 4 +L(1): + movzbl (%rdi), %eax + movzbl (%rsi), %ecx + subl %ecx, %eax + ret +# endif + + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(less_vec): +# ifdef USE_AS_WMEMCMP + /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */ + cmpb $4, %dl + je L(4) + jb L(zero) +# else + cmpb $1, %dl + je L(1) + jb L(zero) + cmpb $4, %dl + jb L(between_2_3) + cmpb $8, %dl + jb L(between_4_7) +# endif + cmpb $16, %dl + jae L(between_16_31) + /* It is between 8 and 15 bytes. */ + vmovq (%rdi), %XMM1 + vmovq (%rsi), %XMM2 + VPCMPEQ %XMM1, %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + /* Use overlapping loads to avoid branches. */ + leaq -8(%rdi, %rdx), %rdi + leaq -8(%rsi, %rdx), %rsi + vmovq (%rdi), %XMM1 + vmovq (%rsi), %XMM2 + VPCMPEQ %XMM1, %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(between_16_31): + /* From 16 to 31 bytes. No branch when size == 16. */ + VMOVU (%rsi), %XMM2 + VPCMPEQ (%rdi), %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + + /* Use overlapping loads to avoid branches. */ + leaq -16(%rdi, %rdx), %rdi + leaq -16(%rsi, %rdx), %rsi + VMOVU (%rsi), %XMM2 + VPCMPEQ (%rdi), %XMM2, %k2 + kmovw %k2, %eax + subl $XMM_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(more_8x_vec): + /* More than 8 * VEC. Check the first VEC. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + /* Align the first memory area for aligned loads in the loop. + Compute how much the first memory area is misaligned. */ + movq %rdi, %rcx + andl $(VEC_SIZE - 1), %ecx + /* Get the negative of offset for alignment. */ + subq $VEC_SIZE, %rcx + /* Adjust the second memory area. */ + subq %rcx, %rsi + /* Adjust the first memory area which should be aligned now. */ + subq %rcx, %rdi + /* Adjust length. */ + addq %rcx, %rdx + +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + VMOVU (%rsi), %YMM1 + VPCMPEQ (%rdi), %YMM1, %k1 + + VMOVU VEC_SIZE(%rsi), %YMM2 + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + kandd %k2, %k1, %k5 + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + kandd %k3, %k5, %k5 + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + kandd %k4, %k5, %k5 + + kmovd %k5, %eax + cmpl $VEC_MASK, %eax + jne L(4x_vec_end) + + addq $(VEC_SIZE * 4), %rdi + addq $(VEC_SIZE * 4), %rsi + + subq $(VEC_SIZE * 4), %rdx + cmpq $(VEC_SIZE * 4), %rdx + jae L(loop_4x_vec) + + /* Less than 4 * VEC. */ + cmpq $VEC_SIZE, %rdx + jbe L(last_vec) + cmpq $(VEC_SIZE * 2), %rdx + jbe L(last_2x_vec) + +L(last_4x_vec): + /* From 2 * VEC to 4 * VEC. */ + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + /* Use overlapping loads to avoid branches. */ + leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi + leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rsi + VMOVU (%rsi), %YMM2 + VPCMPEQ (%rdi), %YMM2, %k2 + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + ret + + .p2align 4 +L(4x_vec_end): + kmovd %k1, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + kmovd %k2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec_x1) + kmovd %k3, %eax + subl $VEC_MASK, %eax + jnz L(first_vec_x2) + kmovd %k4, %eax + subl $VEC_MASK, %eax + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx + cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl VEC_SIZE(%rdi, %rcx, 4), %edx + cmpl VEC_SIZE(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl VEC_SIZE(%rdi, %rcx), %eax + movzbl VEC_SIZE(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %ecx +# ifdef USE_AS_WMEMCMP + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx + cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx + jmp L(wmemcmp_return) +# else + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx + sub %edx, %eax +# endif + ret +END (MEMCMP) +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S new file mode 100644 index 000000000..1ec1962e8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S @@ -0,0 +1,17 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define VEC(i) ymm##i +# define VMOVNT vmovntdq +# define VMOVU vmovdqu +# define VMOVA vmovdqa + +# define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +# define VZEROUPPER_RETURN jmp L(return) + +# define SECTION(p) p##.avx.rtm +# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S index aac1515cf..7dad1ad74 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S @@ -1,11 +1,25 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define XMM1 xmm17 +# define YMM0 ymm16 +# define YMM1 ymm17 +# define VEC0 zmm16 +# define VEC1 zmm17 +# define VEC2 zmm18 +# define VEC3 zmm19 +# define VEC4 zmm20 +# define VEC5 zmm21 +# define VEC6 zmm22 +# define VEC7 zmm23 +# define VEC8 zmm24 +# define VEC(i) VEC##i # define VMOVNT vmovntdq # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s # include "memmove-vec-unaligned-erms.S" diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S new file mode 100644 index 000000000..b879007e8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S @@ -0,0 +1,26 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define XMM0 xmm16 +# define XMM1 xmm17 +# define YMM0 ymm16 +# define YMM1 ymm17 +# define VEC0 ymm16 +# define VEC1 ymm17 +# define VEC2 ymm18 +# define VEC3 ymm19 +# define VEC4 ymm20 +# define VEC5 ymm21 +# define VEC6 ymm22 +# define VEC7 ymm23 +# define VEC8 ymm24 +# define VEC(i) VEC##i +# define VMOVNT vmovntdq +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define SECTION(p) p##.evex +# define MEMMOVE_SYMBOL(p,s) p##_evex_##s + +# include "memmove-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index c763b7d87..d13d23d6c 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -48,6 +48,14 @@ # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s) #endif +#ifndef XMM0 +# define XMM0 xmm0 +#endif + +#ifndef YMM0 +# define YMM0 ymm0 +#endif + #ifndef VZEROUPPER # if VEC_SIZE > 16 # define VZEROUPPER vzeroupper @@ -67,6 +75,13 @@ # define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16)) #endif +/* Avoid short distance rep movsb only with non-SSE vector. */ +#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB +# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16) +#else +# define AVOID_SHORT_DISTANCE_REP_MOVSB 0 +#endif + #ifndef PREFETCH # define PREFETCH(addr) prefetcht0 addr #endif @@ -143,11 +158,12 @@ L(last_2x_vec): VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) VMOVU %VEC(0), (%rdi) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) - VZEROUPPER #if !defined USE_MULTIARCH || !IS_IN (libc) L(nop): -#endif ret +#else + VZEROUPPER_RETURN +#endif #if defined USE_MULTIARCH && IS_IN (libc) END (MEMMOVE_SYMBOL (__memmove, unaligned)) @@ -240,11 +256,14 @@ L(last_2x_vec): VMOVU %VEC(0), (%rdi) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) L(return): - VZEROUPPER +#if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +#else ret +#endif L(movsb): - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP jae L(more_8x_vec) cmpq %rsi, %rdi jb 1f @@ -257,7 +276,21 @@ L(movsb): # error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE! # endif jb L(more_8x_vec_backward) +# if AVOID_SHORT_DISTANCE_REP_MOVSB + movq %rdi, %rcx + subq %rsi, %rcx + jmp 2f +# endif 1: +# if AVOID_SHORT_DISTANCE_REP_MOVSB + movq %rsi, %rcx + subq %rdi, %rcx +2: +/* Avoid "rep movsb" if RCX, the distance between source and destination, + is N*4GB + [1..63] with N >= 0. */ + cmpl $63, %ecx + jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ +# endif mov %RDX_LP, %RCX_LP rep movsb L(nop): @@ -291,21 +324,20 @@ L(less_vec): #if VEC_SIZE > 32 L(between_32_63): /* From 32 to 63. No branch when size == 32. */ - vmovdqu (%rsi), %ymm0 - vmovdqu -32(%rsi,%rdx), %ymm1 - vmovdqu %ymm0, (%rdi) - vmovdqu %ymm1, -32(%rdi,%rdx) - VZEROUPPER - ret + VMOVU (%rsi), %YMM0 + VMOVU -32(%rsi,%rdx), %YMM1 + VMOVU %YMM0, (%rdi) + VMOVU %YMM1, -32(%rdi,%rdx) + VZEROUPPER_RETURN #endif #if VEC_SIZE > 16 /* From 16 to 31. No branch when size == 16. */ L(between_16_31): - vmovdqu (%rsi), %xmm0 - vmovdqu -16(%rsi,%rdx), %xmm1 - vmovdqu %xmm0, (%rdi) - vmovdqu %xmm1, -16(%rdi,%rdx) - ret + VMOVU (%rsi), %XMM0 + VMOVU -16(%rsi,%rdx), %XMM1 + VMOVU %XMM0, (%rdi) + VMOVU %XMM1, -16(%rdi,%rdx) + VZEROUPPER_RETURN #endif L(between_8_15): /* From 8 to 15. No branch when size == 8. */ @@ -358,8 +390,7 @@ L(more_2x_vec): VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) - VZEROUPPER - ret + VZEROUPPER_RETURN L(last_4x_vec): /* Copy from 2 * VEC to 4 * VEC. */ VMOVU (%rsi), %VEC(0) @@ -370,8 +401,7 @@ L(last_4x_vec): VMOVU %VEC(1), VEC_SIZE(%rdi) VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) - VZEROUPPER - ret + VZEROUPPER_RETURN L(more_8x_vec): cmpq %rsi, %rdi @@ -402,7 +432,7 @@ L(more_8x_vec): addq %r8, %rdx #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) /* Check non-temporal store threshold. */ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ja L(large_forward) #endif L(loop_4x_vec_forward): @@ -427,8 +457,7 @@ L(loop_4x_vec_forward): VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) /* Store the first VEC. */ VMOVU %VEC(4), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN L(more_8x_vec_backward): /* Load the first 4 * VEC and last VEC to support overlapping @@ -454,7 +483,7 @@ L(more_8x_vec_backward): subq %r8, %rdx #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) /* Check non-temporal store threshold. */ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP ja L(large_backward) #endif L(loop_4x_vec_backward): @@ -479,8 +508,7 @@ L(loop_4x_vec_backward): VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) /* Store the last VEC. */ VMOVU %VEC(8), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) L(large_forward): @@ -515,8 +543,7 @@ L(loop_large_forward): VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) /* Store the first VEC. */ VMOVU %VEC(4), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN L(large_backward): /* Don't use non-temporal store if there is overlap between @@ -550,8 +577,7 @@ L(loop_large_backward): VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) /* Store the last VEC. */ VMOVU %VEC(8), (%r11) - VZEROUPPER - ret + VZEROUPPER_RETURN #endif END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S new file mode 100644 index 000000000..cea2d2a72 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef MEMRCHR +# define MEMRCHR __memrchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "memrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S index f5437b54d..c8d54c08d 100644 --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S @@ -20,14 +20,22 @@ # include +# ifndef MEMRCHR +# define MEMRCHR __memrchr_avx2 +# endif + # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits -ENTRY (__memrchr_avx2) + .section SECTION(.text),"ax",@progbits +ENTRY (MEMRCHR) /* Broadcast CHAR to YMM0. */ vmovd %esi, %xmm0 vpbroadcastb %xmm0, %ymm0 @@ -134,8 +142,8 @@ L(loop_4x_vec): vpmovmskb %ymm1, %eax bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(last_4x_vec_or_less): @@ -169,8 +177,7 @@ L(last_4x_vec_or_less): addq %rax, %rdx jl L(zero) addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_2x_vec): @@ -191,31 +198,27 @@ L(last_2x_vec): jl L(zero) addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x0): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x1): bsrl %eax, %eax addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x2): bsrl %eax, %eax addl $(VEC_SIZE * 2), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3): @@ -232,8 +235,7 @@ L(last_vec_x1_check): jl L(zero) addl $VEC_SIZE, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_x3_check): @@ -243,12 +245,14 @@ L(last_vec_x3_check): jl L(zero) addl $(VEC_SIZE * 3), %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(zero): - VZEROUPPER + xorl %eax, %eax + VZEROUPPER_RETURN + + .p2align 4 L(null): xorl %eax, %eax ret @@ -273,8 +277,7 @@ L(last_vec_or_less_aligned): bsrl %eax, %eax addq %rdi, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_or_less): @@ -315,8 +318,7 @@ L(last_vec_or_less): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(last_vec_2x_aligned): @@ -353,7 +355,6 @@ L(last_vec_2x_aligned): bsrl %eax, %eax addq %rdi, %rax addq %r8, %rax - VZEROUPPER - ret -END (__memrchr_avx2) + VZEROUPPER_RETURN +END (MEMRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S new file mode 100644 index 000000000..16bf8e02b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S @@ -0,0 +1,337 @@ +/* memrchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# define VMOVA vmovdqa64 + +# define YMMMATCH ymm16 + +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (__memrchr_evex) + /* Broadcast CHAR to YMMMATCH. */ + vpbroadcastb %esi, %YMMMATCH + + sub $VEC_SIZE, %RDX_LP + jbe L(last_vec_or_less) + + add %RDX_LP, %RDI_LP + + /* Check the last VEC_SIZE bytes. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x0) + + subq $(VEC_SIZE * 4), %rdi + movl %edi, %ecx + andl $(VEC_SIZE - 1), %ecx + jz L(aligned_more) + + /* Align data for aligned loads in the loop. */ + addq $VEC_SIZE, %rdi + addq $VEC_SIZE, %rdx + andq $-VEC_SIZE, %rdi + subq %rcx, %rdx + + .p2align 4 +L(aligned_more): + subq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec_or_less) + + /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x1) + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + kmovd %k4, %eax + testl %eax, %eax + jnz L(last_vec_x0) + + /* Align data to 4 * VEC_SIZE for loop with fewer branches. + There are some overlaps with above if data isn't aligned + to 4 * VEC_SIZE. */ + movl %edi, %ecx + andl $(VEC_SIZE * 4 - 1), %ecx + jz L(loop_4x_vec) + + addq $(VEC_SIZE * 4), %rdi + addq $(VEC_SIZE * 4), %rdx + andq $-(VEC_SIZE * 4), %rdi + subq %rcx, %rdx + + .p2align 4 +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + subq $(VEC_SIZE * 4), %rdi + subq $(VEC_SIZE * 4), %rdx + jbe L(last_4x_vec_or_less) + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2 + kord %k1, %k2, %k5 + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3 + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4 + + kord %k3, %k4, %k6 + kortestd %k5, %k6 + jz L(loop_4x_vec) + + /* There is a match. */ + kmovd %k4, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x1) + + kmovd %k1, %eax + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_4x_vec_or_less): + addl $(VEC_SIZE * 4), %edx + cmpl $(VEC_SIZE * 2), %edx + jbe L(last_2x_vec) + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + kmovd %k2, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + kmovd %k3, %eax + testl %eax, %eax + jnz L(last_vec_x1_check) + cmpl $(VEC_SIZE * 3), %edx + jbe L(zero) + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + kmovd %k4, %eax + testl %eax, %eax + jz L(zero) + bsrl %eax, %eax + subq $(VEC_SIZE * 4), %rdx + addq %rax, %rdx + jl L(zero) + addq %rdi, %rax + ret + + .p2align 4 +L(last_2x_vec): + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(last_vec_x3_check) + cmpl $VEC_SIZE, %edx + jbe L(zero) + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + testl %eax, %eax + jz L(zero) + bsrl %eax, %eax + subq $(VEC_SIZE * 2), %rdx + addq %rax, %rdx + jl L(zero) + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x0): + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x1): + bsrl %eax, %eax + addl $VEC_SIZE, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x2): + bsrl %eax, %eax + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x3): + bsrl %eax, %eax + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x1_check): + bsrl %eax, %eax + subq $(VEC_SIZE * 3), %rdx + addq %rax, %rdx + jl L(zero) + addl $VEC_SIZE, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_x3_check): + bsrl %eax, %eax + subq $VEC_SIZE, %rdx + addq %rax, %rdx + jl L(zero) + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax + ret + + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(last_vec_or_less_aligned): + movl %edx, %ecx + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + movl $1, %edx + /* Support rdx << 32. */ + salq %cl, %rdx + subq $1, %rdx + + kmovd %k1, %eax + + /* Remove the trailing bytes. */ + andl %edx, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + ret + + .p2align 4 +L(last_vec_or_less): + addl $VEC_SIZE, %edx + + /* Check for zero length. */ + testl %edx, %edx + jz L(zero) + + movl %edi, %ecx + andl $(VEC_SIZE - 1), %ecx + jz L(last_vec_or_less_aligned) + + movl %ecx, %esi + movl %ecx, %r8d + addl %edx, %esi + andq $-VEC_SIZE, %rdi + + subl $VEC_SIZE, %esi + ja L(last_vec_2x_aligned) + + /* Check the last VEC. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + kmovd %k1, %eax + + /* Remove the leading and trailing bytes. */ + sarl %cl, %eax + movl %edx, %ecx + + movl $1, %edx + sall %cl, %edx + subl $1, %edx + + andl %edx, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + ret + + .p2align 4 +L(last_vec_2x_aligned): + movl %esi, %ecx + + /* Check the last VEC. */ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1 + + movl $1, %edx + sall %cl, %edx + subl $1, %edx + + kmovd %k1, %eax + + /* Remove the trailing bytes. */ + andl %edx, %eax + + testl %eax, %eax + jnz L(last_vec_x1) + + /* Check the second last VEC. */ + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + movl %r8d, %ecx + + kmovd %k1, %eax + + /* Remove the leading bytes. Must use unsigned right shift for + bsrl below. */ + shrl %cl, %eax + testl %eax, %eax + jz L(zero) + + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + ret +END (__memrchr_evex) +#endif diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S new file mode 100644 index 000000000..8ac3e479b --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S @@ -0,0 +1,10 @@ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return) + +#define SECTION(p) p##.avx.rtm +#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm +#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + +#include "memset-avx2-unaligned-erms.S" diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S index 7ab3d8984..ae0860f36 100644 --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S @@ -14,9 +14,15 @@ movq r, %rax; \ vpbroadcastd %xmm0, %ymm0 -# define SECTION(p) p##.avx -# define MEMSET_SYMBOL(p,s) p##_avx2_##s -# define WMEMSET_SYMBOL(p,s) p##_avx2_##s +# ifndef SECTION +# define SECTION(p) p##.avx +# endif +# ifndef MEMSET_SYMBOL +# define MEMSET_SYMBOL(p,s) p##_avx2_##s +# endif +# ifndef WMEMSET_SYMBOL +# define WMEMSET_SYMBOL(p,s) p##_avx2_##s +# endif # include "memset-vec-unaligned-erms.S" #endif diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S index 0783979ca..22e7b187c 100644 --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S @@ -1,22 +1,22 @@ #if IS_IN (libc) # define VEC_SIZE 64 -# define VEC(i) zmm##i +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 zmm16 +# define VEC(i) VEC##i # define VMOVU vmovdqu64 # define VMOVA vmovdqa64 +# define VZEROUPPER # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastb %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastb d, %VEC0 # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ - vmovd d, %xmm0; \ movq r, %rax; \ - vpbroadcastd %xmm0, %xmm0; \ - vpbroadcastq %xmm0, %zmm0 + vpbroadcastd d, %VEC0 -# define SECTION(p) p##.avx512 +# define SECTION(p) p##.evex512 # define MEMSET_SYMBOL(p,s) p##_avx512_##s # define WMEMSET_SYMBOL(p,s) p##_avx512_##s diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S new file mode 100644 index 000000000..ae0a4d6e4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S @@ -0,0 +1,24 @@ +#if IS_IN (libc) +# define VEC_SIZE 32 +# define XMM0 xmm16 +# define YMM0 ymm16 +# define VEC0 ymm16 +# define VEC(i) VEC##i +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 +# define VZEROUPPER + +# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movq r, %rax; \ + vpbroadcastb d, %VEC0 + +# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + movq r, %rax; \ + vpbroadcastd d, %VEC0 + +# define SECTION(p) p##.evex +# define MEMSET_SYMBOL(p,s) p##_evex_##s +# define WMEMSET_SYMBOL(p,s) p##_evex_##s + +# include "memset-vec-unaligned-erms.S" +#endif diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S index af2299709..16bed6ec1 100644 --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S @@ -34,20 +34,25 @@ # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s) #endif +#ifndef XMM0 +# define XMM0 xmm0 +#endif + +#ifndef YMM0 +# define YMM0 ymm0 +#endif + #ifndef VZEROUPPER # if VEC_SIZE > 16 # define VZEROUPPER vzeroupper +# define VZEROUPPER_SHORT_RETURN vzeroupper; ret # else # define VZEROUPPER # endif #endif #ifndef VZEROUPPER_SHORT_RETURN -# if VEC_SIZE > 16 -# define VZEROUPPER_SHORT_RETURN vzeroupper -# else -# define VZEROUPPER_SHORT_RETURN rep -# endif +# define VZEROUPPER_SHORT_RETURN rep; ret #endif #ifndef MOVQ @@ -77,7 +82,7 @@ ENTRY (__bzero) mov %RDI_LP, %RAX_LP /* Set return value. */ mov %RSI_LP, %RDX_LP /* Set n. */ - pxor %xmm0, %xmm0 + pxor %XMM0, %XMM0 jmp L(entry_from_bzero) END (__bzero) weak_alias (__bzero, bzero) @@ -119,8 +124,7 @@ L(entry_from_bzero): /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN #if defined USE_MULTIARCH && IS_IN (libc) END (MEMSET_SYMBOL (__memset, unaligned)) @@ -143,14 +147,12 @@ ENTRY (__memset_erms) ENTRY (MEMSET_SYMBOL (__memset, erms)) # endif L(stosb): - /* Issue vzeroupper before rep stosb. */ - VZEROUPPER mov %RDX_LP, %RCX_LP movzbl %sil, %eax mov %RDI_LP, %RDX_LP rep stosb mov %RDX_LP, %RAX_LP - ret + VZEROUPPER_RETURN # if VEC_SIZE == 16 END (__memset_erms) # else @@ -177,8 +179,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(stosb_more_2x_vec): cmpq $REP_STOSB_THRESHOLD, %rdx @@ -192,8 +193,11 @@ L(more_2x_vec): VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) L(return): - VZEROUPPER +#if VEC_SIZE > 16 + ZERO_UPPER_VEC_REGISTERS_RETURN +#else ret +#endif L(loop_start): leaq (VEC_SIZE * 4)(%rdi), %rcx @@ -219,7 +223,6 @@ L(loop): cmpq %rcx, %rdx jne L(loop) VZEROUPPER_SHORT_RETURN - ret L(less_vec): /* Less than 1 VEC. */ # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 @@ -233,7 +236,7 @@ L(less_vec): cmpb $16, %dl jae L(between_16_31) # endif - MOVQ %xmm0, %rcx + MOVQ %XMM0, %rcx cmpb $8, %dl jae L(between_8_15) cmpb $4, %dl @@ -243,40 +246,34 @@ L(less_vec): jb 1f movb %cl, (%rdi) 1: - VZEROUPPER - ret + VZEROUPPER_RETURN # if VEC_SIZE > 32 /* From 32 to 63. No branch when size == 32. */ L(between_32_63): - vmovdqu %ymm0, -32(%rdi,%rdx) - vmovdqu %ymm0, (%rdi) - VZEROUPPER - ret + VMOVU %YMM0, -32(%rdi,%rdx) + VMOVU %YMM0, (%rdi) + VZEROUPPER_RETURN # endif # if VEC_SIZE > 16 /* From 16 to 31. No branch when size == 16. */ L(between_16_31): - vmovdqu %xmm0, -16(%rdi,%rdx) - vmovdqu %xmm0, (%rdi) - VZEROUPPER - ret + VMOVU %XMM0, -16(%rdi,%rdx) + VMOVU %XMM0, (%rdi) + VZEROUPPER_RETURN # endif /* From 8 to 15. No branch when size == 8. */ L(between_8_15): movq %rcx, -8(%rdi,%rdx) movq %rcx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(between_4_7): /* From 4 to 7. No branch when size == 4. */ movl %ecx, -4(%rdi,%rdx) movl %ecx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN L(between_2_3): /* From 2 to 3. No branch when size == 2. */ movw %cx, -2(%rdi,%rdx) movw %cx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN END (MEMSET_SYMBOL (__memset, unaligned_erms)) diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S new file mode 100644 index 000000000..acc5f6e2f --- /dev/null +++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S @@ -0,0 +1,4 @@ +#define MEMCHR __rawmemchr_avx2_rtm +#define USE_AS_RAWMEMCHR 1 + +#include "memchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S new file mode 100644 index 000000000..ec942b77b --- /dev/null +++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S @@ -0,0 +1,4 @@ +#define MEMCHR __rawmemchr_evex +#define USE_AS_RAWMEMCHR 1 + +#include "memchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S new file mode 100644 index 000000000..2b9c07a59 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STPCPY +#define STRCPY __stpcpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S new file mode 100644 index 000000000..7c6f26cd9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STPCPY +#define STRCPY __stpcpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S new file mode 100644 index 000000000..60a2ccfe5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S @@ -0,0 +1,4 @@ +#define USE_AS_STPCPY +#define USE_AS_STRNCPY +#define STRCPY __stpncpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S new file mode 100644 index 000000000..1570014d1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S @@ -0,0 +1,4 @@ +#define USE_AS_STPCPY +#define USE_AS_STRNCPY +#define STRCPY __stpncpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S new file mode 100644 index 000000000..637fb557c --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCAT +# define STRCAT __strcat_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcat-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S index a4143bf8f..1e6d4827e 100644 --- a/sysdeps/x86_64/multiarch/strcat-avx2.S +++ b/sysdeps/x86_64/multiarch/strcat-avx2.S @@ -30,7 +30,11 @@ /* Number of bytes in a vector register */ # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + + .section SECTION(.text),"ax",@progbits ENTRY (STRCAT) mov %rdi, %r9 # ifdef USE_AS_STRNCAT diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S new file mode 100644 index 000000000..97c3d85b6 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcat-evex.S @@ -0,0 +1,283 @@ +/* strcat with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCAT +# define STRCAT __strcat_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +/* zero register */ +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 + +# define USE_AS_STRCAT + +/* Number of bytes in a vector register */ +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (STRCAT) + mov %rdi, %r9 +# ifdef USE_AS_STRNCAT + mov %rdx, %r8 +# endif + + xor %eax, %eax + mov %edi, %ecx + and $((VEC_SIZE * 4) - 1), %ecx + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + cmp $(VEC_SIZE * 3), %ecx + ja L(fourth_vector_boundary) + vpcmpb $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_first_vector) + mov %rdi, %rax + and $-VEC_SIZE, %rax + jmp L(align_vec_size_start) +L(fourth_vector_boundary): + mov %rdi, %rax + and $-VEC_SIZE, %rax + vpcmpb $0, (%rax), %YMMZERO, %k0 + mov $-1, %r10d + sub %rax, %rcx + shl %cl, %r10d + kmovd %k0, %edx + and %r10d, %edx + jnz L(exit) + +L(align_vec_size_start): + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 4), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + kmovd %k4, %edx + add $(VEC_SIZE * 4), %rax + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 4), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(exit_null_on_fifth_vector) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + add $(VEC_SIZE * 5), %rax + kmovd %k4, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + add $VEC_SIZE, %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + add $VEC_SIZE, %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit) + + test $((VEC_SIZE * 4) - 1), %rax + jz L(align_four_vec_loop) + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1 + add $VEC_SIZE, %rax + kmovd %k1, %edx + test %edx, %edx + jnz L(exit) + + add $VEC_SIZE, %rax + + .p2align 4 +L(align_four_vec_loop): + VMOVA (%rax), %YMM0 + VMOVA (VEC_SIZE * 2)(%rax), %YMM1 + vpminub VEC_SIZE(%rax), %YMM0, %YMM0 + vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1 + vpminub %YMM0, %YMM1, %YMM0 + /* If K0 != 0, there is a null byte. */ + vpcmpb $0, %YMM0, %YMMZERO, %k0 + add $(VEC_SIZE * 4), %rax + ktestd %k0, %k0 + jz L(align_four_vec_loop) + + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0 + sub $(VEC_SIZE * 5), %rax + kmovd %k0, %edx + test %edx, %edx + jnz L(exit_null_on_second_vector) + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(exit_null_on_third_vector) + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + kmovd %k2, %edx + test %edx, %edx + jnz L(exit_null_on_fourth_vector) + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + kmovd %k3, %edx + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 4), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit): + sub %rdi, %rax +L(exit_null_on_first_vector): + bsf %rdx, %rdx + add %rdx, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_second_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $VEC_SIZE, %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_third_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 2), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_fourth_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 3), %rax + jmp L(StartStrcpyPart) + + .p2align 4 +L(exit_null_on_fifth_vector): + sub %rdi, %rax + bsf %rdx, %rdx + add %rdx, %rax + add $(VEC_SIZE * 4), %rax + + .p2align 4 +L(StartStrcpyPart): + lea (%r9, %rax), %rdi + mov %rsi, %rcx + mov %r9, %rax /* save result */ + +# ifdef USE_AS_STRNCAT + test %r8, %r8 + jz L(ExitZero) +# define USE_AS_STRNCPY +# endif + +# include "strcpy-evex.S" +#endif diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S new file mode 100644 index 000000000..81f20d1d8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCHR +# define STRCHR __strchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S index 39fc69da7..0a5217514 100644 --- a/sysdeps/x86_64/multiarch/strchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S @@ -38,9 +38,13 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCHR) movl %edi, %ecx /* Broadcast CHAR to YMM0. */ @@ -93,8 +97,8 @@ L(cros_page_boundary): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(aligned_more): @@ -190,8 +194,7 @@ L(first_vec_x0): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x1): @@ -205,8 +208,7 @@ L(first_vec_x1): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(first_vec_x2): @@ -220,8 +222,7 @@ L(first_vec_x2): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(4x_vec_end): @@ -247,8 +248,7 @@ L(first_vec_x3): cmp (%rax), %CHAR_REG cmovne %rdx, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S new file mode 100644 index 000000000..ddc86a705 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchr-evex.S @@ -0,0 +1,335 @@ +/* strchr/strchrnul optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCHR +# define STRCHR __strchr_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSCHR +# define VPBROADCAST vpbroadcastd +# define VPCMP vpcmpd +# define VPMINU vpminud +# define CHAR_REG esi +# define SHIFT_REG r8d +# else +# define VPBROADCAST vpbroadcastb +# define VPCMP vpcmpb +# define VPMINU vpminub +# define CHAR_REG sil +# define SHIFT_REG ecx +# endif + +# define XMMZERO xmm16 + +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 +# define YMM2 ymm19 +# define YMM3 ymm20 +# define YMM4 ymm21 +# define YMM5 ymm22 +# define YMM6 ymm23 +# define YMM7 ymm24 +# define YMM8 ymm25 + +# define VEC_SIZE 32 +# define PAGE_SIZE 4096 + + .section .text.evex,"ax",@progbits +ENTRY (STRCHR) + movl %edi, %ecx +# ifndef USE_AS_STRCHRNUL + xorl %edx, %edx +# endif + + /* Broadcast CHAR to YMM0. */ + VPBROADCAST %esi, %YMM0 + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Check if we cross page boundary with one vector load. */ + andl $(PAGE_SIZE - 1), %ecx + cmpl $(PAGE_SIZE - VEC_SIZE), %ecx + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. Search for both CHAR and the + null bytes. */ + VMOVU (%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + ktestd %k0, %k0 + jz L(more_vecs) + kmovd %k0, %eax + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(more_vecs): + /* Align data for aligned loads in the loop. */ + andq $-VEC_SIZE, %rdi +L(aligned_more): + + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + VMOVA VEC_SIZE(%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x0) + + VMOVA VEC_SIZE(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + VMOVA (VEC_SIZE * 2)(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x2) + + VMOVA (VEC_SIZE * 3)(%rdi), %YMM1 + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + ktestd %k0, %k0 + jz L(prep_loop_4x) + + kmovd %k0, %eax + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 3)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x0): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq VEC_SIZE(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + /* Found CHAR or the null byte. */ +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + +L(prep_loop_4x): + /* Align data to 4 * VEC_SIZE. */ + andq $-(VEC_SIZE * 4), %rdi + + .p2align 4 +L(loop_4x_vec): + /* Compare 4 * VEC at a time forward. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 + VMOVA (VEC_SIZE * 5)(%rdi), %YMM2 + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 + VMOVA (VEC_SIZE * 7)(%rdi), %YMM4 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM5 + vpxorq %YMM2, %YMM0, %YMM6 + vpxorq %YMM3, %YMM0, %YMM7 + vpxorq %YMM4, %YMM0, %YMM8 + + VPMINU %YMM5, %YMM1, %YMM5 + VPMINU %YMM6, %YMM2, %YMM6 + VPMINU %YMM7, %YMM3, %YMM7 + VPMINU %YMM8, %YMM4, %YMM8 + + VPMINU %YMM5, %YMM6, %YMM1 + VPMINU %YMM7, %YMM8, %YMM2 + + VPMINU %YMM1, %YMM2, %YMM1 + + /* Each bit in K0 represents a CHAR or a null byte. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + + addq $(VEC_SIZE * 4), %rdi + + ktestd %k0, %k0 + jz L(loop_4x_vec) + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM5, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x0) + + /* Each bit in K1 represents a CHAR or a null byte in YMM2. */ + VPCMP $0, %YMMZERO, %YMM6, %k1 + kmovd %k1, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + /* Each bit in K2 represents a CHAR or a null byte in YMM3. */ + VPCMP $0, %YMMZERO, %YMM7, %k2 + /* Each bit in K3 represents a CHAR or a null byte in YMM4. */ + VPCMP $0, %YMMZERO, %YMM8, %k3 + +# ifdef USE_AS_WCSCHR + /* NB: Each bit in K2/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k1 +# else + kshiftlq $32, %k3, %k1 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rax + + tzcntq %rax, %rax +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax +# else + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + + /* Cold case for crossing page with first load. */ + .p2align 4 +L(cross_page_boundary): + andq $-VEC_SIZE, %rdi + andl $(VEC_SIZE - 1), %ecx + + VMOVA (%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + vpxorq %YMM1, %YMM0, %YMM2 + VPMINU %YMM2, %YMM1, %YMM2 + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM2, %k0 + kmovd %k0, %eax + testl %eax, %eax + +# ifdef USE_AS_WCSCHR + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG + sarl $2, %SHIFT_REG +# endif + + /* Remove the leading bits. */ + sarxl %SHIFT_REG, %eax, %eax + testl %eax, %eax + + jz L(aligned_more) + tzcntl %eax, %eax + addq %rcx, %rdi +# ifdef USE_AS_WCSCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq (%rdi, %rax, 4), %rax +# else + addq %rdi, %rax +# endif +# ifndef USE_AS_STRCHRNUL + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax +# endif + ret + +END (STRCHR) +# endif diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c index f27980dd3..a04ac8eb1 100644 --- a/sysdeps/x86_64/multiarch/strchr.c +++ b/sysdeps/x86_64/multiarch/strchr.c @@ -29,16 +29,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) return OPTIMIZE (sse2_no_bsf); diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S new file mode 100644 index 000000000..cdcf818b9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRCHR __strchrnul_avx2_rtm +#define USE_AS_STRCHRNUL 1 +#include "strchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S new file mode 100644 index 000000000..064fe7ca9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S @@ -0,0 +1,3 @@ +#define STRCHR __strchrnul_evex +#define USE_AS_STRCHRNUL 1 +#include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S new file mode 100644 index 000000000..aecd30d97 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCMP +# define STRCMP __strcmp_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S index 48d03a9f4..4d434fd14 100644 --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S @@ -55,6 +55,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + /* Warning! wcscmp/wcsncmp have to use SIGNED comparison for elements. strcmp/strncmp have to use UNSIGNED comparison for elements. @@ -75,7 +79,7 @@ the maximum offset is reached before a difference is found, zero is returned. */ - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCMP) # ifdef USE_AS_STRNCMP /* Check for simple cases (0 or 1) in offset. */ @@ -83,6 +87,16 @@ ENTRY (STRCMP) je L(char0) jb L(zero) # ifdef USE_AS_WCSCMP +# ifndef __ILP32__ + movq %rdx, %rcx + /* Check if length could overflow when multiplied by + sizeof(wchar_t). Checking top 8 bits will cover all potential + overflow cases as well as redirect cases where its impossible to + length to bound a valid memory region. In these cases just use + 'wcscmp'. */ + shrq $56, %rcx + jnz OVERFLOW_STRCMP +# endif /* Convert units: from wide to byte char. */ shl $2, %RDX_LP # endif @@ -127,8 +141,8 @@ L(return): movzbl (%rsi, %rdx), %edx subl %edx, %eax # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(return_vec_size): @@ -161,8 +175,7 @@ L(return_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_2_vec_size): @@ -195,8 +208,7 @@ L(return_2_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_3_vec_size): @@ -229,8 +241,7 @@ L(return_3_vec_size): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(next_3_vectors): @@ -356,8 +367,7 @@ L(back_to_loop): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_vec): @@ -400,8 +410,7 @@ L(test_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_2_vec): @@ -444,8 +453,7 @@ L(test_2_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(test_3_vec): @@ -486,8 +494,7 @@ L(test_3_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(loop_cross_page): @@ -556,8 +563,7 @@ L(loop_cross_page): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(loop_cross_page_2_vec): @@ -591,7 +597,14 @@ L(loop_cross_page_2_vec): movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi testq %rdi, %rdi +# ifdef USE_AS_STRNCMP + /* At this point, if %rdi value is 0, it already tested + VEC_SIZE*4+%r10 byte starting from %rax. This label + checks whether strncmp maximum offset reached or not. */ + je L(string_nbyte_offset_check) +# else je L(back_to_loop) +# endif tzcntq %rdi, %rcx addq %r10, %rcx /* Adjust for number of bytes skipped. */ @@ -624,8 +637,15 @@ L(loop_cross_page_2_vec): subl %edx, %eax # endif # endif - VZEROUPPER - ret + VZEROUPPER_RETURN + +# ifdef USE_AS_STRNCMP +L(string_nbyte_offset_check): + leaq (VEC_SIZE * 4)(%r10), %r10 + cmpq %r10, %r11 + jbe L(zero) + jmp L(back_to_loop) +# endif .p2align 4 L(cross_page_loop): @@ -659,8 +679,7 @@ L(cross_page_loop): # ifndef USE_AS_WCSCMP L(different): # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_WCSCMP .p2align 4 @@ -670,16 +689,14 @@ L(different): setl %al negl %eax orl $1, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN # endif # ifdef USE_AS_STRNCMP .p2align 4 L(zero): xorl %eax, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(char0): @@ -693,8 +710,7 @@ L(char0): movzbl (%rdi), %eax subl %ecx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # endif .p2align 4 @@ -719,8 +735,7 @@ L(last_vector): movzbl (%rsi, %rdx), %edx subl %edx, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN /* Comparing on page boundary region requires special treatment: It must done one vector at the time, starting with the wider @@ -841,7 +856,6 @@ L(cross_page_4bytes): testl %eax, %eax jne L(cross_page_loop) subl %ecx, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRCMP) #endif diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S new file mode 100644 index 000000000..459eeed09 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S @@ -0,0 +1,1043 @@ +/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRCMP +# define STRCMP __strcmp_evex +# endif + +# define PAGE_SIZE 4096 + +/* VEC_SIZE = Number of bytes in a ymm register */ +# define VEC_SIZE 32 + +/* Shift for dividing by (VEC_SIZE * 4). */ +# define DIVIDE_BY_VEC_4_SHIFT 7 +# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT) +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSCMP +/* Compare packed dwords. */ +# define VPCMP vpcmpd +# define SHIFT_REG32 r8d +# define SHIFT_REG64 r8 +/* 1 dword char == 4 bytes. */ +# define SIZE_OF_CHAR 4 +# else +/* Compare packed bytes. */ +# define VPCMP vpcmpb +# define SHIFT_REG32 ecx +# define SHIFT_REG64 rcx +/* 1 byte char == 1 byte. */ +# define SIZE_OF_CHAR 1 +# endif + +# define XMMZERO xmm16 +# define XMM0 xmm17 +# define XMM1 xmm18 + +# define YMMZERO ymm16 +# define YMM0 ymm17 +# define YMM1 ymm18 +# define YMM2 ymm19 +# define YMM3 ymm20 +# define YMM4 ymm21 +# define YMM5 ymm22 +# define YMM6 ymm23 +# define YMM7 ymm24 + +/* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. + strcmp/strncmp have to use UNSIGNED comparison for elements. +*/ + +/* The main idea of the string comparison (byte or dword) using 256-bit + EVEX instructions consists of comparing (VPCMP) two ymm vectors. The + latter can be on either packed bytes or dwords depending on + USE_AS_WCSCMP. In order to check the null char, algorithm keeps the + matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2 + KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes) + are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd + instructions. Main loop (away from from page boundary) compares 4 + vectors are a time, effectively comparing 4 x VEC_SIZE bytes (128 + bytes) on each loop. + + The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic + is the same as strcmp, except that an a maximum offset is tracked. If + the maximum offset is reached before a difference is found, zero is + returned. */ + + .section .text.evex,"ax",@progbits +ENTRY (STRCMP) +# ifdef USE_AS_STRNCMP + /* Check for simple cases (0 or 1) in offset. */ + cmp $1, %RDX_LP + je L(char0) + jb L(zero) +# ifdef USE_AS_WCSCMP + /* Convert units: from wide to byte char. */ + shl $2, %RDX_LP +# endif + /* Register %r11 tracks the maximum offset. */ + mov %RDX_LP, %R11_LP +# endif + movl %edi, %eax + xorl %edx, %edx + /* Make %XMMZERO (%YMMZERO) all zeros in this function. */ + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + orl %esi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax + jg L(cross_page) + /* Start comparing 4 vectors. */ + VMOVU (%rdi), %YMM0 + VMOVU (%rsi), %YMM1 + + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + + /* Check for NULL in YMM0. */ + VPCMP $0, %YMMZERO, %YMM0, %k1 + /* Check for NULL in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + + /* Each bit in K1 represents: + 1. A mismatch in YMM0 and YMM1. Or + 2. A NULL in YMM0 or YMM1. + */ + kord %k0, %k1, %k1 + + ktestd %k1, %k1 + je L(next_3_vectors) + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx) is after the maximum + offset (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + je L(return) +L(wcscmp_return): + setl %al + negl %eax + orl $1, %eax +L(return): +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif + ret + + .p2align 4 +L(return_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after + the maximum offset (%r11). */ + addq $VEC_SIZE, %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl VEC_SIZE(%rdi, %rdx), %ecx + cmpl VEC_SIZE(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl VEC_SIZE(%rdi, %rdx), %eax + movzbl VEC_SIZE(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(return_2_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is + after the maximum offset (%r11). */ + addq $(VEC_SIZE * 2), %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx + cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax + movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(return_3_vec_size): + kmovd %k1, %ecx + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is + after the maximum offset (%r11). */ + addq $(VEC_SIZE * 3), %rdx + cmpq %r11, %rdx + jae L(zero) +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx + cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax + movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(next_3_vectors): + VMOVU VEC_SIZE(%rdi), %YMM0 + VMOVU VEC_SIZE(%rsi), %YMM1 + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_vec_size) + + VMOVU (VEC_SIZE * 2)(%rdi), %YMM2 + VMOVU (VEC_SIZE * 3)(%rdi), %YMM3 + VMOVU (VEC_SIZE * 2)(%rsi), %YMM4 + VMOVU (VEC_SIZE * 3)(%rsi), %YMM5 + + /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */ + VPCMP $4, %YMM2, %YMM4, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM4, %k2 + /* Each bit in K1 represents a NULL in YMM2 or YMM4. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_2_vec_size) + + /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */ + VPCMP $4, %YMM3, %YMM5, %k0 + VPCMP $0, %YMMZERO, %YMM3, %k1 + VPCMP $0, %YMMZERO, %YMM5, %k2 + /* Each bit in K1 represents a NULL in YMM3 or YMM5. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + ktestd %k1, %k1 + jne L(return_3_vec_size) +L(main_loop_header): + leaq (VEC_SIZE * 4)(%rdi), %rdx + movl $PAGE_SIZE, %ecx + /* Align load via RAX. */ + andq $-(VEC_SIZE * 4), %rdx + subq %rdi, %rdx + leaq (%rdi, %rdx), %rax +# ifdef USE_AS_STRNCMP + /* Starting from this point, the maximum offset, or simply the + 'offset', DECREASES by the same amount when base pointers are + moved forward. Return 0 when: + 1) On match: offset <= the matched vector index. + 2) On mistmach, offset is before the mistmatched index. + */ + subq %rdx, %r11 + jbe L(zero) +# endif + addq %rsi, %rdx + movq %rdx, %rsi + andl $(PAGE_SIZE - 1), %esi + /* Number of bytes before page crossing. */ + subq %rsi, %rcx + /* Number of VEC_SIZE * 4 blocks before page crossing. */ + shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx + /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */ + movl %ecx, %esi + jmp L(loop_start) + + .p2align 4 +L(loop): +# ifdef USE_AS_STRNCMP + /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease + the maximum offset (%r11) by the same amount. */ + subq $(VEC_SIZE * 4), %r11 + jbe L(zero) +# endif + addq $(VEC_SIZE * 4), %rax + addq $(VEC_SIZE * 4), %rdx +L(loop_start): + testl %esi, %esi + leal -1(%esi), %esi + je L(loop_cross_page) +L(back_to_loop): + /* Main loop, comparing 4 vectors are a time. */ + VMOVA (%rax), %YMM0 + VMOVA VEC_SIZE(%rax), %YMM2 + VMOVA (VEC_SIZE * 2)(%rax), %YMM4 + VMOVA (VEC_SIZE * 3)(%rax), %YMM6 + VMOVU (%rdx), %YMM1 + VMOVU VEC_SIZE(%rdx), %YMM3 + VMOVU (VEC_SIZE * 2)(%rdx), %YMM5 + VMOVU (VEC_SIZE * 3)(%rdx), %YMM7 + + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + kord %k1, %k2, %k1 + /* Each bit in K4 represents a NULL or a mismatch in YMM0 and + YMM1. */ + kord %k0, %k1, %k4 + + VPCMP $4, %YMM2, %YMM3, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM3, %k2 + kord %k1, %k2, %k1 + /* Each bit in K5 represents a NULL or a mismatch in YMM2 and + YMM3. */ + kord %k0, %k1, %k5 + + VPCMP $4, %YMM4, %YMM5, %k0 + VPCMP $0, %YMMZERO, %YMM4, %k1 + VPCMP $0, %YMMZERO, %YMM5, %k2 + kord %k1, %k2, %k1 + /* Each bit in K6 represents a NULL or a mismatch in YMM4 and + YMM5. */ + kord %k0, %k1, %k6 + + VPCMP $4, %YMM6, %YMM7, %k0 + VPCMP $0, %YMMZERO, %YMM6, %k1 + VPCMP $0, %YMMZERO, %YMM7, %k2 + kord %k1, %k2, %k1 + /* Each bit in K7 represents a NULL or a mismatch in YMM6 and + YMM7. */ + kord %k0, %k1, %k7 + + kord %k4, %k5, %k0 + kord %k6, %k7, %k1 + + /* Test each mask (32 bits) individually because for VEC_SIZE + == 32 is not possible to OR the four masks and keep all bits + in a 64-bit integer register, differing from SSE2 strcmp + where ORing is possible. */ + kortestd %k0, %k1 + je L(loop) + ktestd %k4, %k4 + je L(test_vec) + kmovd %k4, %edi + tzcntl %edi, %ecx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_vec): +# ifdef USE_AS_STRNCMP + /* The first vector matched. Return 0 if the maximum offset + (%r11) <= VEC_SIZE. */ + cmpq $VEC_SIZE, %r11 + jbe L(zero) +# endif + ktestd %k5, %k5 + je L(test_2_vec) + kmovd %k5, %ecx + tzcntl %ecx, %edi +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edi +# endif +# ifdef USE_AS_STRNCMP + addq $VEC_SIZE, %rdi + cmpq %rdi, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rdi), %ecx + cmpl (%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rax, %rdi), %eax + movzbl (%rdx, %rdi), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl VEC_SIZE(%rsi, %rdi), %ecx + cmpl VEC_SIZE(%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl VEC_SIZE(%rax, %rdi), %eax + movzbl VEC_SIZE(%rdx, %rdi), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_2_vec): +# ifdef USE_AS_STRNCMP + /* The first 2 vectors matched. Return 0 if the maximum offset + (%r11) <= 2 * VEC_SIZE. */ + cmpq $(VEC_SIZE * 2), %r11 + jbe L(zero) +# endif + ktestd %k6, %k6 + je L(test_3_vec) + kmovd %k6, %ecx + tzcntl %ecx, %edi +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edi +# endif +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 2), %rdi + cmpq %rdi, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rdi), %ecx + cmpl (%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rax, %rdi), %eax + movzbl (%rdx, %rdi), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx + cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax + movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(test_3_vec): +# ifdef USE_AS_STRNCMP + /* The first 3 vectors matched. Return 0 if the maximum offset + (%r11) <= 3 * VEC_SIZE. */ + cmpq $(VEC_SIZE * 3), %r11 + jbe L(zero) +# endif + kmovd %k7, %esi + tzcntl %esi, %ecx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 3), %rcx + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %esi + cmpl (%rdx, %rcx), %esi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 3)(%rsi, %rcx), %esi + cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax + movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(loop_cross_page): + xorl %r10d, %r10d + movq %rdx, %rcx + /* Align load via RDX. We load the extra ECX bytes which should + be ignored. */ + andl $((VEC_SIZE * 4) - 1), %ecx + /* R10 is -RCX. */ + subq %rcx, %r10 + + /* This works only if VEC_SIZE * 2 == 64. */ +# if (VEC_SIZE * 2) != 64 +# error (VEC_SIZE * 2) != 64 +# endif + + /* Check if the first VEC_SIZE * 2 bytes should be ignored. */ + cmpl $(VEC_SIZE * 2), %ecx + jge L(loop_cross_page_2_vec) + + VMOVU (%rax, %r10), %YMM2 + VMOVU VEC_SIZE(%rax, %r10), %YMM3 + VMOVU (%rdx, %r10), %YMM4 + VMOVU VEC_SIZE(%rdx, %r10), %YMM5 + + VPCMP $4, %YMM4, %YMM2, %k0 + VPCMP $0, %YMMZERO, %YMM2, %k1 + VPCMP $0, %YMMZERO, %YMM4, %k2 + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch in YMM2 and + YMM4. */ + kord %k0, %k1, %k1 + + VPCMP $4, %YMM5, %YMM3, %k3 + VPCMP $0, %YMMZERO, %YMM3, %k4 + VPCMP $0, %YMMZERO, %YMM5, %k5 + kord %k4, %k5, %k4 + /* Each bit in K3 represents a NULL or a mismatch in YMM3 and + YMM5. */ + kord %k3, %k4, %k3 + +# ifdef USE_AS_WCSCMP + /* NB: Each bit in K1/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k2 + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG32 + sarl $2, %SHIFT_REG32 +# else + kshiftlq $32, %k3, %k2 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rdi + + /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */ + shrxq %SHIFT_REG64, %rdi, %rdi + testq %rdi, %rdi + je L(loop_cross_page_2_vec) + tzcntq %rdi, %rcx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif +# ifdef USE_AS_STRNCMP + cmpq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + + .p2align 4 +L(loop_cross_page_2_vec): + /* The first VEC_SIZE * 2 bytes match or are ignored. */ + VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0 + VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1 + VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2 + VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3 + + VPCMP $4, %YMM0, %YMM2, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM2, %k2 + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch in YMM0 and + YMM2. */ + kord %k0, %k1, %k1 + + VPCMP $4, %YMM1, %YMM3, %k3 + VPCMP $0, %YMMZERO, %YMM1, %k4 + VPCMP $0, %YMMZERO, %YMM3, %k5 + kord %k4, %k5, %k4 + /* Each bit in K3 represents a NULL or a mismatch in YMM1 and + YMM3. */ + kord %k3, %k4, %k3 + +# ifdef USE_AS_WCSCMP + /* NB: Each bit in K1/K3 represents 4-byte element. */ + kshiftlw $8, %k3, %k2 +# else + kshiftlq $32, %k3, %k2 +# endif + + /* Each bit in K1 represents a NULL or a mismatch. */ + korq %k1, %k2, %k1 + kmovq %k1, %rdi + + xorl %r8d, %r8d + /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */ + subl $(VEC_SIZE * 2), %ecx + jle 1f + /* R8 has number of bytes skipped. */ + movl %ecx, %r8d +# ifdef USE_AS_WCSCMP + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + sarl $2, %ecx +# endif + /* Skip ECX bytes. */ + shrq %cl, %rdi +1: + /* Before jumping back to the loop, set ESI to the number of + VEC_SIZE * 4 blocks before page crossing. */ + movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi + + testq %rdi, %rdi +# ifdef USE_AS_STRNCMP + /* At this point, if %rdi value is 0, it already tested + VEC_SIZE*4+%r10 byte starting from %rax. This label + checks whether strncmp maximum offset reached or not. */ + je L(string_nbyte_offset_check) +# else + je L(back_to_loop) +# endif + tzcntq %rdi, %rcx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %ecx +# endif + addq %r10, %rcx + /* Adjust for number of bytes skipped. */ + addq %r8, %rcx +# ifdef USE_AS_STRNCMP + addq $(VEC_SIZE * 2), %rcx + subq %rcx, %r11 + jbe L(zero) +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (%rsi, %rcx), %edi + cmpl (%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (%rax, %rcx), %eax + movzbl (%rdx, %rcx), %edx + subl %edx, %eax +# endif +# else +# ifdef USE_AS_WCSCMP + movq %rax, %rsi + xorl %eax, %eax + movl (VEC_SIZE * 2)(%rsi, %rcx), %edi + cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi + jne L(wcscmp_return) +# else + movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax + movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx + subl %edx, %eax +# endif +# endif + ret + +# ifdef USE_AS_STRNCMP +L(string_nbyte_offset_check): + leaq (VEC_SIZE * 4)(%r10), %r10 + cmpq %r10, %r11 + jbe L(zero) + jmp L(back_to_loop) +# endif + + .p2align 4 +L(cross_page_loop): + /* Check one byte/dword at a time. */ +# ifdef USE_AS_WCSCMP + cmpl %ecx, %eax +# else + subl %ecx, %eax +# endif + jne L(different) + addl $SIZE_OF_CHAR, %edx + cmpl $(VEC_SIZE * 4), %edx + je L(main_loop_header) +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + movl (%rdi, %rdx), %eax + movl (%rsi, %rdx), %ecx +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx +# endif + /* Check null char. */ + testl %eax, %eax + jne L(cross_page_loop) + /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED + comparisons. */ + subl %ecx, %eax +# ifndef USE_AS_WCSCMP +L(different): +# endif + ret + +# ifdef USE_AS_WCSCMP + .p2align 4 +L(different): + /* Use movl to avoid modifying EFLAGS. */ + movl $0, %eax + setl %al + negl %eax + orl $1, %eax + ret +# endif + +# ifdef USE_AS_STRNCMP + .p2align 4 +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(char0): +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi), %ecx + cmpl (%rsi), %ecx + jne L(wcscmp_return) +# else + movzbl (%rsi), %ecx + movzbl (%rdi), %eax + subl %ecx, %eax +# endif + ret +# endif + + .p2align 4 +L(last_vector): + addq %rdx, %rdi + addq %rdx, %rsi +# ifdef USE_AS_STRNCMP + subq %rdx, %r11 +# endif + tzcntl %ecx, %edx +# ifdef USE_AS_WCSCMP + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + sall $2, %edx +# endif +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + xorl %eax, %eax + movl (%rdi, %rdx), %ecx + cmpl (%rsi, %rdx), %ecx + jne L(wcscmp_return) +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %edx + subl %edx, %eax +# endif + ret + + /* Comparing on page boundary region requires special treatment: + It must done one vector at the time, starting with the wider + ymm vector if possible, if not, with xmm. If fetching 16 bytes + (xmm) still passes the boundary, byte comparison must be done. + */ + .p2align 4 +L(cross_page): + /* Try one ymm vector at a time. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + jg L(cross_page_1_vector) +L(loop_1_vector): + VMOVU (%rdi, %rdx), %YMM0 + VMOVU (%rsi, %rdx), %YMM1 + + /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */ + VPCMP $4, %YMM0, %YMM1, %k0 + VPCMP $0, %YMMZERO, %YMM0, %k1 + VPCMP $0, %YMMZERO, %YMM1, %k2 + /* Each bit in K1 represents a NULL in YMM0 or YMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + addl $VEC_SIZE, %edx + + addl $VEC_SIZE, %eax +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + jle L(loop_1_vector) +L(cross_page_1_vector): + /* Less than 32 bytes to check, try one xmm vector. */ + cmpl $(PAGE_SIZE - 16), %eax + jg L(cross_page_1_xmm) + VMOVU (%rdi, %rdx), %XMM0 + VMOVU (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + korw %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + korw %k0, %k1, %k1 + kmovw %k1, %ecx + testl %ecx, %ecx + jne L(last_vector) + + addl $16, %edx +# ifndef USE_AS_WCSCMP + addl $16, %eax +# endif +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_1_xmm): +# ifndef USE_AS_WCSCMP + /* Less than 16 bytes to check, try 8 byte vector. NB: No need + for wcscmp nor wcsncmp since wide char is 4 bytes. */ + cmpl $(PAGE_SIZE - 8), %eax + jg L(cross_page_8bytes) + vmovq (%rdi, %rdx), %XMM0 + vmovq (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + +# ifdef USE_AS_WCSCMP + /* Only last 2 bits are valid. */ + andl $0x3, %ecx +# else + /* Only last 8 bits are valid. */ + andl $0xff, %ecx +# endif + + testl %ecx, %ecx + jne L(last_vector) + + addl $8, %edx + addl $8, %eax +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_8bytes): + /* Less than 8 bytes to check, try 4 byte vector. */ + cmpl $(PAGE_SIZE - 4), %eax + jg L(cross_page_4bytes) + vmovd (%rdi, %rdx), %XMM0 + vmovd (%rsi, %rdx), %XMM1 + + /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */ + VPCMP $4, %XMM0, %XMM1, %k0 + VPCMP $0, %XMMZERO, %XMM0, %k1 + VPCMP $0, %XMMZERO, %XMM1, %k2 + /* Each bit in K1 represents a NULL in XMM0 or XMM1. */ + kord %k1, %k2, %k1 + /* Each bit in K1 represents a NULL or a mismatch. */ + kord %k0, %k1, %k1 + kmovd %k1, %ecx + +# ifdef USE_AS_WCSCMP + /* Only the last bit is valid. */ + andl $0x1, %ecx +# else + /* Only last 4 bits are valid. */ + andl $0xf, %ecx +# endif + + testl %ecx, %ecx + jne L(last_vector) + + addl $4, %edx +# ifdef USE_AS_STRNCMP + /* Return 0 if the current offset (%rdx) >= the maximum offset + (%r11). */ + cmpq %r11, %rdx + jae L(zero) +# endif + +L(cross_page_4bytes): +# endif + /* Less than 4 bytes to check, try one byte/dword at a time. */ +# ifdef USE_AS_STRNCMP + cmpq %r11, %rdx + jae L(zero) +# endif +# ifdef USE_AS_WCSCMP + movl (%rdi, %rdx), %eax + movl (%rsi, %rdx), %ecx +# else + movzbl (%rdi, %rdx), %eax + movzbl (%rsi, %rdx), %ecx +# endif + testl %eax, %eax + jne L(cross_page_loop) + subl %ecx, %eax + ret +END (STRCMP) +#endif diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c index 4db7332ac..358fa9015 100644 --- a/sysdeps/x86_64/multiarch/strcmp.c +++ b/sysdeps/x86_64/multiarch/strcmp.c @@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) return OPTIMIZE (sse2_unaligned); diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S new file mode 100644 index 000000000..c2c581ecf --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRCPY +# define STRCPY __strcpy_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strcpy-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S index 3f2f9e817..1ce17253a 100644 --- a/sysdeps/x86_64/multiarch/strcpy-avx2.S +++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S @@ -37,6 +37,10 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + /* zero register */ #define xmmZ xmm0 #define ymmZ ymm0 @@ -46,7 +50,7 @@ # ifndef USE_AS_STRCAT - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRCPY) # ifdef USE_AS_STRNCPY mov %RDX_LP, %R8_LP @@ -369,8 +373,8 @@ L(CopyVecSizeExit): lea 1(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(CopyTwoVecSize1): @@ -553,8 +557,7 @@ L(Exit1): lea 2(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit2): @@ -569,8 +572,7 @@ L(Exit2): lea 3(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit3): @@ -584,8 +586,7 @@ L(Exit3): lea 4(%rdi), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit4_7): @@ -602,8 +603,7 @@ L(Exit4_7): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit8_15): @@ -620,8 +620,7 @@ L(Exit8_15): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit16_31): @@ -638,8 +637,7 @@ L(Exit16_31): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Exit32_63): @@ -656,8 +654,7 @@ L(Exit32_63): lea 1(%rdi, %rdx), %rdi jnz L(StrncpyFillTailWithZero) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifdef USE_AS_STRNCPY @@ -671,8 +668,7 @@ L(StrncpyExit1): # ifdef USE_AS_STRCAT movb $0, 1(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit2): @@ -684,8 +680,7 @@ L(StrncpyExit2): # ifdef USE_AS_STRCAT movb $0, 2(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit3_4): @@ -699,8 +694,7 @@ L(StrncpyExit3_4): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit5_8): @@ -714,8 +708,7 @@ L(StrncpyExit5_8): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit9_16): @@ -729,8 +722,7 @@ L(StrncpyExit9_16): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit17_32): @@ -744,8 +736,7 @@ L(StrncpyExit17_32): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit33_64): @@ -760,8 +751,7 @@ L(StrncpyExit33_64): # ifdef USE_AS_STRCAT movb $0, (%rdi, %r8) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(StrncpyExit65): @@ -778,50 +768,43 @@ L(StrncpyExit65): # ifdef USE_AS_STRCAT movb $0, 65(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # ifndef USE_AS_STRCAT .p2align 4 L(Fill1): mov %dl, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill2): mov %dx, (%rdi) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill3_4): mov %dx, (%rdi) mov %dx, -2(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill5_8): mov %edx, (%rdi) mov %edx, -4(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill9_16): mov %rdx, (%rdi) mov %rdx, -8(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(Fill17_32): vmovdqu %xmmZ, (%rdi) vmovdqu %xmmZ, -16(%rdi, %r8) - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(CopyVecSizeUnalignedVec2): @@ -898,8 +881,7 @@ L(Fill): cmp $1, %r8d ja L(Fill2) je L(Fill1) - VZEROUPPER - ret + VZEROUPPER_RETURN /* end of ifndef USE_AS_STRCAT */ # endif @@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3): # ifdef USE_AS_STRCAT movb $0, (VEC_SIZE * 4)(%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(UnalignedFourVecSizeLeaveCase2): @@ -1001,16 +982,14 @@ L(StrncpyExit): # ifdef USE_AS_STRCAT movb $0, (%rdi) # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(ExitZero): # ifndef USE_AS_STRCAT mov %rdi, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN # endif diff --git a/sysdeps/x86_64/multiarch/strcpy-evex.S b/sysdeps/x86_64/multiarch/strcpy-evex.S new file mode 100644 index 000000000..a343a1a69 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strcpy-evex.S @@ -0,0 +1,1003 @@ +/* strcpy with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# ifndef USE_AS_STRCAT +# include + +# ifndef STRCPY +# define STRCPY __strcpy_evex +# endif + +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +/* Number of bytes in a vector register */ +# ifndef VEC_SIZE +# define VEC_SIZE 32 +# endif + +# define XMM2 xmm18 +# define XMM3 xmm19 + +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 +# define YMM7 ymm23 + +# ifndef USE_AS_STRCAT + +/* zero register */ +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM1 ymm17 + + .section .text.evex,"ax",@progbits +ENTRY (STRCPY) +# ifdef USE_AS_STRNCPY + mov %RDX_LP, %R8_LP + test %R8_LP, %R8_LP + jz L(ExitZero) +# endif + mov %rsi, %rcx +# ifndef USE_AS_STPCPY + mov %rdi, %rax /* save result */ +# endif + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO +# endif + + and $((VEC_SIZE * 4) - 1), %ecx + cmp $(VEC_SIZE * 2), %ecx + jbe L(SourceStringAlignmentLessTwoVecSize) + + and $-VEC_SIZE, %rsi + and $(VEC_SIZE - 1), %ecx + + vpcmpb $0, (%rsi), %YMMZERO, %k0 + kmovd %k0, %edx + shr %cl, %rdx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + mov $VEC_SIZE, %r10 + sub %rcx, %r10 + cmp %r10, %r8 +# else + mov $(VEC_SIZE + 1), %r10 + sub %rcx, %r10 + cmp %r10, %r8 +# endif + jbe L(CopyVecSizeTailCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyVecSizeTail) + + vpcmpb $0, VEC_SIZE(%rsi), %YMMZERO, %k1 + kmovd %k1, %edx + +# ifdef USE_AS_STRNCPY + add $VEC_SIZE, %r10 + cmp %r10, %r8 + jbe L(CopyTwoVecSizeCase2OrCase3) +# endif + test %edx, %edx + jnz L(CopyTwoVecSize) + + VMOVU (%rsi, %rcx), %YMM2 /* copy VEC_SIZE bytes */ + VMOVU %YMM2, (%rdi) + +/* If source address alignment != destination address alignment */ + .p2align 4 +L(UnalignVecSizeBoth): + sub %rcx, %rdi +# ifdef USE_AS_STRNCPY + add %rcx, %r8 + sbb %rcx, %rcx + or %rcx, %r8 +# endif + mov $VEC_SIZE, %rcx + VMOVA (%rsi, %rcx), %YMM2 + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 3), %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec3) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM3, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM4 + vpcmpb $0, %YMM4, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec4) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM4, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM2, (%rdi, %rcx) + VMOVA VEC_SIZE(%rsi, %rcx), %YMM2 + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec2) +# else + jnz L(CopyVecSize) +# endif + + VMOVA VEC_SIZE(%rsi, %rcx), %YMM3 + VMOVU %YMM2, (%rdi, %rcx) + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + add $VEC_SIZE, %rcx +# ifdef USE_AS_STRNCPY + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) +# endif + test %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec3) +# else + jnz L(CopyVecSize) +# endif + + VMOVU %YMM3, (%rdi, %rcx) + mov %rsi, %rdx + lea VEC_SIZE(%rsi, %rcx), %rsi + and $-(VEC_SIZE * 4), %rsi + sub %rsi, %rdx + sub %rdx, %rdi +# ifdef USE_AS_STRNCPY + lea (VEC_SIZE * 8)(%r8, %rdx), %r8 +# endif +L(UnalignedFourVecSizeLoop): + VMOVA (%rsi), %YMM4 + VMOVA VEC_SIZE(%rsi), %YMM5 + VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 + VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 + vpminub %YMM5, %YMM4, %YMM2 + vpminub %YMM7, %YMM6, %YMM3 + vpminub %YMM2, %YMM3, %YMM2 + /* If K7 != 0, there is a null byte. */ + vpcmpb $0, %YMM2, %YMMZERO, %k7 + kmovd %k7, %edx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 4), %r8 + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jnz L(UnalignedFourVecSizeLeave) + +L(UnalignedFourVecSizeLoop_start): + add $(VEC_SIZE * 4), %rdi + add $(VEC_SIZE * 4), %rsi + VMOVU %YMM4, -(VEC_SIZE * 4)(%rdi) + VMOVA (%rsi), %YMM4 + VMOVU %YMM5, -(VEC_SIZE * 3)(%rdi) + VMOVA VEC_SIZE(%rsi), %YMM5 + vpminub %YMM5, %YMM4, %YMM2 + VMOVU %YMM6, -(VEC_SIZE * 2)(%rdi) + VMOVA (VEC_SIZE * 2)(%rsi), %YMM6 + VMOVU %YMM7, -VEC_SIZE(%rdi) + VMOVA (VEC_SIZE * 3)(%rsi), %YMM7 + vpminub %YMM7, %YMM6, %YMM3 + vpminub %YMM2, %YMM3, %YMM2 + /* If K7 != 0, there is a null byte. */ + vpcmpb $0, %YMM2, %YMMZERO, %k7 + kmovd %k7, %edx +# ifdef USE_AS_STRNCPY + sub $(VEC_SIZE * 4), %r8 + jbe L(UnalignedLeaveCase2OrCase3) +# endif + test %edx, %edx + jz L(UnalignedFourVecSizeLoop_start) + +L(UnalignedFourVecSizeLeave): + vpcmpb $0, %YMM4, %YMMZERO, %k1 + kmovd %k1, %edx + test %edx, %edx + jnz L(CopyVecSizeUnaligned_0) + + vpcmpb $0, %YMM5, %YMMZERO, %k2 + kmovd %k2, %ecx + test %ecx, %ecx + jnz L(CopyVecSizeUnaligned_16) + + vpcmpb $0, %YMM6, %YMMZERO, %k3 + kmovd %k3, %edx + test %edx, %edx + jnz L(CopyVecSizeUnaligned_32) + + vpcmpb $0, %YMM7, %YMMZERO, %k4 + kmovd %k4, %ecx + bsf %ecx, %edx + VMOVU %YMM4, (%rdi) + VMOVU %YMM5, VEC_SIZE(%rdi) + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 3)(%rdi, %rdx), %rax +# endif + VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) + add $(VEC_SIZE - 1), %r8 + sub %rdx, %r8 + lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $(VEC_SIZE * 3), %rsi + add $(VEC_SIZE * 3), %rdi + jmp L(CopyVecSizeExit) +# endif + +/* If source address alignment == destination address alignment */ + +L(SourceStringAlignmentLessTwoVecSize): + VMOVU (%rsi), %YMM3 + VMOVU VEC_SIZE(%rsi), %YMM2 + vpcmpb $0, %YMM3, %YMMZERO, %k0 + kmovd %k0, %edx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + cmp $VEC_SIZE, %r8 +# else + cmp $(VEC_SIZE + 1), %r8 +# endif + jbe L(CopyVecSizeTail1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyVecSizeTail1) + + VMOVU %YMM3, (%rdi) + vpcmpb $0, %YMM2, %YMMZERO, %k0 + kmovd %k0, %edx + +# ifdef USE_AS_STRNCPY +# if defined USE_AS_STPCPY || defined USE_AS_STRCAT + cmp $(VEC_SIZE * 2), %r8 +# else + cmp $((VEC_SIZE * 2) + 1), %r8 +# endif + jbe L(CopyTwoVecSize1Case2OrCase3) +# endif + test %edx, %edx + jnz L(CopyTwoVecSize1) + + and $-VEC_SIZE, %rsi + and $(VEC_SIZE - 1), %ecx + jmp L(UnalignVecSizeBoth) + +/*------End of main part with loops---------------------*/ + +/* Case1 */ + +# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT) + .p2align 4 +L(CopyVecSize): + add %rcx, %rdi +# endif +L(CopyVecSizeTail): + add %rcx, %rsi +L(CopyVecSizeTail1): + bsf %edx, %edx +L(CopyVecSizeExit): + cmp $32, %edx + jae L(Exit32_63) + cmp $16, %edx + jae L(Exit16_31) + cmp $8, %edx + jae L(Exit8_15) + cmp $4, %edx + jae L(Exit4_7) + cmp $3, %edx + je L(Exit3) + cmp $1, %edx + ja L(Exit2) + je L(Exit1) + movb $0, (%rdi) +# ifdef USE_AS_STPCPY + lea (%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $1, %r8 + lea 1(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(CopyTwoVecSize1): + add $VEC_SIZE, %rsi + add $VEC_SIZE, %rdi +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $VEC_SIZE, %r8 +# endif + jmp L(CopyVecSizeTail1) + + .p2align 4 +L(CopyTwoVecSize): + bsf %edx, %edx + add %rcx, %rsi + add $VEC_SIZE, %edx + sub %ecx, %edx + jmp L(CopyVecSizeExit) + + .p2align 4 +L(CopyVecSizeUnaligned_0): + bsf %edx, %edx +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif + VMOVU %YMM4, (%rdi) + add $((VEC_SIZE * 4) - 1), %r8 + sub %rdx, %r8 + lea 1(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + jmp L(CopyVecSizeExit) +# endif + + .p2align 4 +L(CopyVecSizeUnaligned_16): + bsf %ecx, %edx + VMOVU %YMM4, (%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea VEC_SIZE(%rdi, %rdx), %rax +# endif + VMOVU %YMM5, VEC_SIZE(%rdi) + add $((VEC_SIZE * 3) - 1), %r8 + sub %rdx, %r8 + lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $VEC_SIZE, %rsi + add $VEC_SIZE, %rdi + jmp L(CopyVecSizeExit) +# endif + + .p2align 4 +L(CopyVecSizeUnaligned_32): + bsf %edx, %edx + VMOVU %YMM4, (%rdi) + VMOVU %YMM5, VEC_SIZE(%rdi) +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 2)(%rdi, %rdx), %rax +# endif + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + add $((VEC_SIZE * 2) - 1), %r8 + sub %rdx, %r8 + lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi + jmp L(StrncpyFillTailWithZero) +# else + add $(VEC_SIZE * 2), %rsi + add $(VEC_SIZE * 2), %rdi + jmp L(CopyVecSizeExit) +# endif + +# ifdef USE_AS_STRNCPY +# ifndef USE_AS_STRCAT + .p2align 4 +L(CopyVecSizeUnalignedVec6): + VMOVU %YMM6, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec5): + VMOVU %YMM5, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec4): + VMOVU %YMM4, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) + + .p2align 4 +L(CopyVecSizeUnalignedVec3): + VMOVU %YMM3, (%rdi, %rcx) + jmp L(CopyVecSizeVecExit) +# endif + +/* Case2 */ + + .p2align 4 +L(CopyVecSizeCase2): + add $VEC_SIZE, %r8 + add %rcx, %rdi + add %rcx, %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSizeCase2): + add %rcx, %rsi + bsf %edx, %edx + add $VEC_SIZE, %edx + sub %ecx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +L(CopyVecSizeTailCase2): + add %rcx, %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +L(CopyVecSizeTail1Case2): + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) + jmp L(StrncpyExit) + +/* Case2 or Case3, Case3 */ + + .p2align 4 +L(CopyVecSizeCase2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeCase2) +L(CopyVecSizeCase3): + add $VEC_SIZE, %r8 + add %rcx, %rdi + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSizeCase2OrCase3): + test %rdx, %rdx + jnz L(CopyTwoVecSizeCase2) + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyVecSizeTailCase2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeTailCase2) + add %rcx, %rsi + jmp L(StrncpyExit) + + .p2align 4 +L(CopyTwoVecSize1Case2OrCase3): + add $VEC_SIZE, %rdi + add $VEC_SIZE, %rsi + sub $VEC_SIZE, %r8 +L(CopyVecSizeTail1Case2OrCase3): + test %rdx, %rdx + jnz L(CopyVecSizeTail1Case2) + jmp L(StrncpyExit) +# endif + +/*------------End labels regarding with copying 1-VEC_SIZE bytes--and 1-(VEC_SIZE*2) bytes----*/ + + .p2align 4 +L(Exit1): + movzwl (%rsi), %edx + mov %dx, (%rdi) +# ifdef USE_AS_STPCPY + lea 1(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $2, %r8 + lea 2(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit2): + movzwl (%rsi), %ecx + mov %cx, (%rdi) + movb $0, 2(%rdi) +# ifdef USE_AS_STPCPY + lea 2(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $3, %r8 + lea 3(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit3): + mov (%rsi), %edx + mov %edx, (%rdi) +# ifdef USE_AS_STPCPY + lea 3(%rdi), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub $4, %r8 + lea 4(%rdi), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit4_7): + mov (%rsi), %ecx + mov %ecx, (%rdi) + mov -3(%rsi, %rdx), %ecx + mov %ecx, -3(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit8_15): + mov (%rsi), %rcx + mov -7(%rsi, %rdx), %r9 + mov %rcx, (%rdi) + mov %r9, -7(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit16_31): + VMOVU (%rsi), %XMM2 + VMOVU -15(%rsi, %rdx), %XMM3 + VMOVU %XMM2, (%rdi) + VMOVU %XMM3, -15(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + + .p2align 4 +L(Exit32_63): + VMOVU (%rsi), %YMM2 + VMOVU -31(%rsi, %rdx), %YMM3 + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, -31(%rdi, %rdx) +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT + sub %rdx, %r8 + sub $1, %r8 + lea 1(%rdi, %rdx), %rdi + jnz L(StrncpyFillTailWithZero) +# endif + ret + +# ifdef USE_AS_STRNCPY + + .p2align 4 +L(StrncpyExit1): + movzbl (%rsi), %edx + mov %dl, (%rdi) +# ifdef USE_AS_STPCPY + lea 1(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 1(%rdi) +# endif + ret + + .p2align 4 +L(StrncpyExit2): + movzwl (%rsi), %edx + mov %dx, (%rdi) +# ifdef USE_AS_STPCPY + lea 2(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 2(%rdi) +# endif + ret + + .p2align 4 +L(StrncpyExit3_4): + movzwl (%rsi), %ecx + movzwl -2(%rsi, %r8), %edx + mov %cx, (%rdi) + mov %dx, -2(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit5_8): + mov (%rsi), %ecx + mov -4(%rsi, %r8), %edx + mov %ecx, (%rdi) + mov %edx, -4(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit9_16): + mov (%rsi), %rcx + mov -8(%rsi, %r8), %rdx + mov %rcx, (%rdi) + mov %rdx, -8(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit17_32): + VMOVU (%rsi), %XMM2 + VMOVU -16(%rsi, %r8), %XMM3 + VMOVU %XMM2, (%rdi) + VMOVU %XMM3, -16(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit33_64): + /* 0/32, 31/16 */ + VMOVU (%rsi), %YMM2 + VMOVU -VEC_SIZE(%rsi, %r8), %YMM3 + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, -VEC_SIZE(%rdi, %r8) +# ifdef USE_AS_STPCPY + lea (%rdi, %r8), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi, %r8) +# endif + ret + + .p2align 4 +L(StrncpyExit65): + /* 0/32, 32/32, 64/1 */ + VMOVU (%rsi), %YMM2 + VMOVU 32(%rsi), %YMM3 + mov 64(%rsi), %cl + VMOVU %YMM2, (%rdi) + VMOVU %YMM3, 32(%rdi) + mov %cl, 64(%rdi) +# ifdef USE_AS_STPCPY + lea 65(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, 65(%rdi) +# endif + ret + +# ifndef USE_AS_STRCAT + + .p2align 4 +L(Fill1): + mov %dl, (%rdi) + ret + + .p2align 4 +L(Fill2): + mov %dx, (%rdi) + ret + + .p2align 4 +L(Fill3_4): + mov %dx, (%rdi) + mov %dx, -2(%rdi, %r8) + ret + + .p2align 4 +L(Fill5_8): + mov %edx, (%rdi) + mov %edx, -4(%rdi, %r8) + ret + + .p2align 4 +L(Fill9_16): + mov %rdx, (%rdi) + mov %rdx, -8(%rdi, %r8) + ret + + .p2align 4 +L(Fill17_32): + VMOVU %XMMZERO, (%rdi) + VMOVU %XMMZERO, -16(%rdi, %r8) + ret + + .p2align 4 +L(CopyVecSizeUnalignedVec2): + VMOVU %YMM2, (%rdi, %rcx) + + .p2align 4 +L(CopyVecSizeVecExit): + bsf %edx, %edx + add $(VEC_SIZE - 1), %r8 + add %rcx, %rdi +# ifdef USE_AS_STPCPY + lea (%rdi, %rdx), %rax +# endif + sub %rdx, %r8 + lea 1(%rdi, %rdx), %rdi + + .p2align 4 +L(StrncpyFillTailWithZero): + xor %edx, %edx + sub $VEC_SIZE, %r8 + jbe L(StrncpyFillExit) + + VMOVU %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + + mov %rdi, %rsi + and $(VEC_SIZE - 1), %esi + sub %rsi, %rdi + add %rsi, %r8 + sub $(VEC_SIZE * 4), %r8 + jb L(StrncpyFillLessFourVecSize) + +L(StrncpyFillLoopVmovdqa): + VMOVA %YMMZERO, (%rdi) + VMOVA %YMMZERO, VEC_SIZE(%rdi) + VMOVA %YMMZERO, (VEC_SIZE * 2)(%rdi) + VMOVA %YMMZERO, (VEC_SIZE * 3)(%rdi) + add $(VEC_SIZE * 4), %rdi + sub $(VEC_SIZE * 4), %r8 + jae L(StrncpyFillLoopVmovdqa) + +L(StrncpyFillLessFourVecSize): + add $(VEC_SIZE * 2), %r8 + jl L(StrncpyFillLessTwoVecSize) + VMOVA %YMMZERO, (%rdi) + VMOVA %YMMZERO, VEC_SIZE(%rdi) + add $(VEC_SIZE * 2), %rdi + sub $VEC_SIZE, %r8 + jl L(StrncpyFillExit) + VMOVA %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + jmp L(Fill) + + .p2align 4 +L(StrncpyFillLessTwoVecSize): + add $VEC_SIZE, %r8 + jl L(StrncpyFillExit) + VMOVA %YMMZERO, (%rdi) + add $VEC_SIZE, %rdi + jmp L(Fill) + + .p2align 4 +L(StrncpyFillExit): + add $VEC_SIZE, %r8 +L(Fill): + cmp $17, %r8d + jae L(Fill17_32) + cmp $9, %r8d + jae L(Fill9_16) + cmp $5, %r8d + jae L(Fill5_8) + cmp $3, %r8d + jae L(Fill3_4) + cmp $1, %r8d + ja L(Fill2) + je L(Fill1) + ret + +/* end of ifndef USE_AS_STRCAT */ +# endif + + .p2align 4 +L(UnalignedLeaveCase2OrCase3): + test %rdx, %rdx + jnz L(UnalignedFourVecSizeLeaveCase2) +L(UnalignedFourVecSizeLeaveCase3): + lea (VEC_SIZE * 4)(%r8), %rcx + and $-VEC_SIZE, %rcx + add $(VEC_SIZE * 3), %r8 + jl L(CopyVecSizeCase3) + VMOVU %YMM4, (%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM5, VEC_SIZE(%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + sub $VEC_SIZE, %r8 + jb L(CopyVecSizeCase3) + VMOVU %YMM7, (VEC_SIZE * 3)(%rdi) +# ifdef USE_AS_STPCPY + lea (VEC_SIZE * 4)(%rdi), %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (VEC_SIZE * 4)(%rdi) +# endif + ret + + .p2align 4 +L(UnalignedFourVecSizeLeaveCase2): + xor %ecx, %ecx + vpcmpb $0, %YMM4, %YMMZERO, %k1 + kmovd %k1, %edx + add $(VEC_SIZE * 3), %r8 + jle L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec4) +# else + jnz L(CopyVecSize) +# endif + vpcmpb $0, %YMM5, %YMMZERO, %k2 + kmovd %k2, %edx + VMOVU %YMM4, (%rdi) + add $VEC_SIZE, %rcx + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec5) +# else + jnz L(CopyVecSize) +# endif + + vpcmpb $0, %YMM6, %YMMZERO, %k3 + kmovd %k3, %edx + VMOVU %YMM5, VEC_SIZE(%rdi) + add $VEC_SIZE, %rcx + sub $VEC_SIZE, %r8 + jbe L(CopyVecSizeCase2OrCase3) + test %edx, %edx +# ifndef USE_AS_STRCAT + jnz L(CopyVecSizeUnalignedVec6) +# else + jnz L(CopyVecSize) +# endif + + vpcmpb $0, %YMM7, %YMMZERO, %k4 + kmovd %k4, %edx + VMOVU %YMM6, (VEC_SIZE * 2)(%rdi) + lea VEC_SIZE(%rdi, %rcx), %rdi + lea VEC_SIZE(%rsi, %rcx), %rsi + bsf %edx, %edx + cmp %r8d, %edx + jb L(CopyVecSizeExit) +L(StrncpyExit): + cmp $65, %r8d + je L(StrncpyExit65) + cmp $33, %r8d + jae L(StrncpyExit33_64) + cmp $17, %r8d + jae L(StrncpyExit17_32) + cmp $9, %r8d + jae L(StrncpyExit9_16) + cmp $5, %r8d + jae L(StrncpyExit5_8) + cmp $3, %r8d + jae L(StrncpyExit3_4) + cmp $1, %r8d + ja L(StrncpyExit2) + je L(StrncpyExit1) +# ifdef USE_AS_STPCPY + mov %rdi, %rax +# endif +# ifdef USE_AS_STRCAT + movb $0, (%rdi) +# endif + ret + + .p2align 4 +L(ExitZero): +# ifndef USE_AS_STRCAT + mov %rdi, %rax +# endif + ret + +# endif + +# ifndef USE_AS_STRCAT +END (STRCPY) +# else +END (STRCAT) +# endif +#endif diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S new file mode 100644 index 000000000..75b4b7612 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRLEN +# define STRLEN __strlen_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strlen-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S index 73421ec1b..8cfb7391b 100644 --- a/sysdeps/x86_64/multiarch/strlen-avx2.S +++ b/sysdeps/x86_64/multiarch/strlen-avx2.S @@ -27,370 +27,528 @@ # ifdef USE_AS_WCSLEN # define VPCMPEQ vpcmpeqd # define VPMINU vpminud +# define CHAR_SIZE 4 # else # define VPCMPEQ vpcmpeqb # define VPMINU vpminub +# define CHAR_SIZE 1 # endif # ifndef VZEROUPPER # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRLEN) # ifdef USE_AS_STRNLEN - /* Check for zero length. */ + /* Check zero length. */ +# ifdef __ILP32__ + /* Clear upper bits. */ + and %RSI_LP, %RSI_LP +# else test %RSI_LP, %RSI_LP - jz L(zero) -# ifdef USE_AS_WCSLEN - shl $2, %RSI_LP -# elif defined __ILP32__ - /* Clear the upper 32 bits. */ - movl %esi, %esi # endif + jz L(zero) + /* Store max len in R8_LP before adjusting if using WCSLEN. */ mov %RSI_LP, %R8_LP # endif - movl %edi, %ecx + movl %edi, %eax movq %rdi, %rdx vpxor %xmm0, %xmm0, %xmm0 - + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. */ + andl $(PAGE_SIZE - 1), %eax /* Check if we may cross page boundary with one vector load. */ - andl $(2 * VEC_SIZE - 1), %ecx - cmpl $VEC_SIZE, %ecx - ja L(cros_page_boundary) + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) /* Check the first VEC_SIZE bytes. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 + VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - # ifdef USE_AS_STRNLEN - jnz L(first_vec_x0_check) - /* Adjust length and check the end of data. */ - subq $VEC_SIZE, %rsi - jbe L(max) -# else - jnz L(first_vec_x0) + /* If length < VEC_SIZE handle special. */ + cmpq $CHAR_PER_VEC, %rsi + jbe L(first_vec_x0) # endif - - /* Align data for aligned loads in the loop. */ - addq $VEC_SIZE, %rdi - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi + /* If empty continue to aligned_more. Otherwise return bit + position of first match. */ + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN # ifdef USE_AS_STRNLEN - /* Adjust length. */ - addq %rcx, %rsi +L(zero): + xorl %eax, %eax + ret - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) + .p2align 4 +L(first_vec_x0): + /* Set bit for max len so that tzcnt will return min of max len + and position of first match. */ +# ifdef USE_AS_WCSLEN + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %esi +# endif + btsq %rsi, %rax + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN # endif - jmp L(more_4x_vec) .p2align 4 -L(cros_page_boundary): - andl $(VEC_SIZE - 1), %ecx - andq $-VEC_SIZE, %rdi - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - /* Remove the leading bytes. */ - sarl %cl, %eax - testl %eax, %eax - jz L(aligned_more) +L(first_vec_x1): tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 4 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + incl %edi + addl %edi, %eax # endif - addq %rdi, %rax - addq %rcx, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN - shrq $2, %rax + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 -L(aligned_more): +L(first_vec_x2): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE" - with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE" - to void possible addition overflow. */ - negq %rcx - addq $VEC_SIZE, %rcx - - /* Check the end of data. */ - subq %rcx, %rsi - jbe L(max) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 3 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE + 1), %edi + addl %edi, %eax # endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN - addq $VEC_SIZE, %rdi + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE * 2 + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE * 2 + 1), %edi + addl %edi, %eax +# endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ # ifdef USE_AS_STRNLEN - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) + /* Use ecx which was computed earlier to compute correct value. + */ +# ifdef USE_AS_WCSLEN + leal -(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax +# else + subl $(VEC_SIZE + 1), %ecx + addl %ecx, %eax +# endif +# else + subl %edx, %edi + addl $(VEC_SIZE * 3 + 1), %edi + addl %edi, %eax # endif +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %eax +# endif + VZEROUPPER_RETURN -L(more_4x_vec): + .p2align 5 +L(aligned_more): + /* Align data to VEC_SIZE - 1. This is the same number of + instructions as using andq with -VEC_SIZE but saves 4 bytes of + code on the x4 check. */ + orq $(VEC_SIZE - 1), %rdi +L(cross_page_continue): /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time since data is only aligned to VEC_SIZE. */ - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 +# ifdef USE_AS_STRNLEN + /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE + because it simplies the logic in last_4x_vec_or_less. */ + leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif +# endif + /* Load first VEC regardless. */ + VPCMPEQ 1(%rdi), %ymm0, %ymm1 +# ifdef USE_AS_STRNLEN + /* Adjust length. If near end handle specially. */ + subq %rcx, %rsi + jb L(last_4x_vec_or_less) +# endif vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x1) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x2) - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax testl %eax, %eax jnz L(first_vec_x3) - addq $(VEC_SIZE * 4), %rdi - -# ifdef USE_AS_STRNLEN - subq $(VEC_SIZE * 4), %rsi - jbe L(last_4x_vec_or_less) -# endif - - /* Align data to 4 * VEC_SIZE. */ - movq %rdi, %rcx - andl $(4 * VEC_SIZE - 1), %ecx - andq $-(4 * VEC_SIZE), %rdi + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x4) + /* Align data to VEC_SIZE * 4 - 1. */ # ifdef USE_AS_STRNLEN - /* Adjust length. */ + /* Before adjusting length check if at last VEC_SIZE * 4. */ + cmpq $(CHAR_PER_VEC * 4 - 1), %rsi + jbe L(last_4x_vec_or_less_load) + incq %rdi + movl %edi, %ecx + orq $(VEC_SIZE * 4 - 1), %rdi + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif + /* Readjust length. */ addq %rcx, %rsi +# else + incq %rdi + orq $(VEC_SIZE * 4 - 1), %rdi # endif - + /* Compare 4 * VEC at a time forward. */ .p2align 4 L(loop_4x_vec): - /* Compare 4 * VEC at a time forward. */ - vmovdqa (%rdi), %ymm1 - vmovdqa VEC_SIZE(%rdi), %ymm2 - vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3 - vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4 - VPMINU %ymm1, %ymm2, %ymm5 - VPMINU %ymm3, %ymm4, %ymm6 - VPMINU %ymm5, %ymm6, %ymm5 - +# ifdef USE_AS_STRNLEN + /* Break if at end of length. */ + subq $(CHAR_PER_VEC * 4), %rsi + jb L(last_4x_vec_or_less_cmpeq) +# endif + /* Save some code size by microfusing VPMINU with the load. + Since the matches in ymm2/ymm4 can only be returned if there + where no matches in ymm1/ymm3 respectively there is no issue + with overlap. */ + vmovdqa 1(%rdi), %ymm1 + VPMINU (VEC_SIZE + 1)(%rdi), %ymm1, %ymm2 + vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm3 + VPMINU (VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4 + + VPMINU %ymm2, %ymm4, %ymm5 VPCMPEQ %ymm5, %ymm0, %ymm5 - vpmovmskb %ymm5, %eax - testl %eax, %eax - jnz L(4x_vec_end) + vpmovmskb %ymm5, %ecx - addq $(VEC_SIZE * 4), %rdi + subq $-(VEC_SIZE * 4), %rdi + testl %ecx, %ecx + jz L(loop_4x_vec) -# ifndef USE_AS_STRNLEN - jmp L(loop_4x_vec) -# else - subq $(VEC_SIZE * 4), %rsi - ja L(loop_4x_vec) -L(last_4x_vec_or_less): - /* Less than 4 * VEC and aligned to VEC_SIZE. */ - addl $(VEC_SIZE * 2), %esi - jle L(last_2x_vec) - - VPCMPEQ (%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x0) - - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + VPCMPEQ %ymm1, %ymm0, %ymm1 vpmovmskb %ymm1, %eax + subq %rdx, %rdi testl %eax, %eax - jnz L(first_vec_x1) + jnz L(last_vec_return_x0) - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax + VPCMPEQ %ymm2, %ymm0, %ymm2 + vpmovmskb %ymm2, %eax testl %eax, %eax + jnz L(last_vec_return_x1) - jnz L(first_vec_x2_check) - subl $VEC_SIZE, %esi - jle L(max) + /* Combine last 2 VEC. */ + VPCMPEQ %ymm3, %ymm0, %ymm3 + vpmovmskb %ymm3, %eax + /* rcx has combined result from all 4 VEC. It will only be used + if the first 3 other VEC all did not contain a match. */ + salq $32, %rcx + orq %rcx, %rax + tzcntq %rax, %rax + subq $(VEC_SIZE * 2 - 1), %rdi + addq %rdi, %rax +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrq $2, %rax +# endif + VZEROUPPER_RETURN - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 - vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x3_check) - movq %r8, %rax +# ifdef USE_AS_STRNLEN + .p2align 4 +L(last_4x_vec_or_less_load): + /* Depending on entry adjust rdi / prepare first VEC in ymm1. + */ + subq $-(VEC_SIZE * 4), %rdi +L(last_4x_vec_or_less_cmpeq): + VPCMPEQ 1(%rdi), %ymm0, %ymm1 +L(last_4x_vec_or_less): # ifdef USE_AS_WCSLEN - shrq $2, %rax + /* NB: Multiply length by 4 to get byte count. */ + sall $2, %esi # endif - VZEROUPPER - ret - - .p2align 4 -L(last_2x_vec): - addl $(VEC_SIZE * 2), %esi - VPCMPEQ (%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + /* If remaining length > VEC_SIZE * 2. This works if esi is off + by VEC_SIZE * 4. */ + testl $(VEC_SIZE * 2), %esi + jnz L(last_4x_vec) + + /* length may have been negative or positive by an offset of + VEC_SIZE * 4 depending on where this was called from. This fixes + that. */ + andl $(VEC_SIZE * 4 - 1), %esi testl %eax, %eax + jnz L(last_vec_x1_check) - jnz L(first_vec_x0_check) subl $VEC_SIZE, %esi - jle L(max) + jb L(max) - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax - testl %eax, %eax - jnz L(first_vec_x1_check) - movq %r8, %rax -# ifdef USE_AS_WCSLEN - shrq $2, %rax -# endif - VZEROUPPER - ret - - .p2align 4 -L(first_vec_x0_check): tzcntl %eax, %eax /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + addl $(VEC_SIZE + 1), %eax addq %rdi, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN +# endif .p2align 4 -L(first_vec_x1_check): +L(last_vec_return_x0): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $VEC_SIZE, %rax + subq $(VEC_SIZE * 4 - 1), %rdi addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x2_check): +L(last_vec_return_x1): tzcntl %eax, %eax - /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $(VEC_SIZE * 2), %rax + subq $(VEC_SIZE * 3 - 1), %rdi addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN +# ifdef USE_AS_STRNLEN .p2align 4 -L(first_vec_x3_check): +L(last_vec_x1_check): + tzcntl %eax, %eax /* Check the end of data. */ - cmpq %rax, %rsi - jbe L(max) - addq $(VEC_SIZE * 3), %rax + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + incl %eax addq %rdi, %rax - subq %rdx, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN - .p2align 4 L(max): movq %r8, %rax + VZEROUPPER_RETURN + + .p2align 4 +L(last_4x_vec): + /* Test first 2x VEC normally. */ + testl %eax, %eax + jnz L(last_vec_x1) + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + /* Normalize length. */ + andl $(VEC_SIZE * 4 - 1), %esi + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + subl $(VEC_SIZE * 3), %esi + jb L(max) + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi + addl $(VEC_SIZE * 3 + 1), %eax + addq %rdi, %rax # ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax # endif - VZEROUPPER - ret + VZEROUPPER_RETURN - .p2align 4 -L(zero): - xorl %eax, %eax - ret -# endif .p2align 4 -L(first_vec_x0): +L(last_vec_x1): + /* essentially duplicates of first_vec_x1 but use 64 bit + instructions. */ tzcntl %eax, %eax + subq %rdx, %rdi + incl %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x1): +L(last_vec_x2): + /* essentially duplicates of first_vec_x1 but use 64 bit + instructions. */ tzcntl %eax, %eax - addq $VEC_SIZE, %rax + subq %rdx, %rdi + addl $(VEC_SIZE + 1), %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax -# endif - VZEROUPPER - ret +# endif + VZEROUPPER_RETURN .p2align 4 -L(first_vec_x2): +L(last_vec_x3): tzcntl %eax, %eax - addq $(VEC_SIZE * 2), %rax + subl $(VEC_SIZE * 2), %esi + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + subq %rdx, %rdi + addl $(VEC_SIZE * 2 + 1), %eax addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ shrq $2, %rax +# endif + VZEROUPPER_RETURN +L(max_end): + movq %r8, %rax + VZEROUPPER_RETURN # endif - VZEROUPPER - ret + /* Cold case for crossing page with first load. */ .p2align 4 -L(4x_vec_end): - VPCMPEQ %ymm1, %ymm0, %ymm1 +L(cross_page_boundary): + /* Align data to VEC_SIZE - 1. */ + orq $(VEC_SIZE - 1), %rdi + VPCMPEQ -(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1 vpmovmskb %ymm1, %eax + /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT + so no need to manually mod rdx. */ + sarxl %edx, %eax, %eax +# ifdef USE_AS_STRNLEN testl %eax, %eax - jnz L(first_vec_x0) - VPCMPEQ %ymm2, %ymm0, %ymm2 - vpmovmskb %ymm2, %eax - testl %eax, %eax - jnz L(first_vec_x1) - VPCMPEQ %ymm3, %ymm0, %ymm3 - vpmovmskb %ymm3, %eax + jnz L(cross_page_less_vec) + leaq 1(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get wchar_t count. */ + shrl $2, %ecx +# endif + /* Check length. */ + cmpq %rsi, %rcx + jb L(cross_page_continue) + movq %r8, %rax +# else testl %eax, %eax - jnz L(first_vec_x2) - VPCMPEQ %ymm4, %ymm0, %ymm4 - vpmovmskb %ymm4, %eax -L(first_vec_x3): + jz L(cross_page_continue) tzcntl %eax, %eax - addq $(VEC_SIZE * 3), %rax - addq %rdi, %rax - subq %rdx, %rax -# ifdef USE_AS_WCSLEN - shrq $2, %rax +# ifdef USE_AS_WCSLEN + /* NB: Divide length by 4 to get wchar_t count. */ + shrl $2, %eax +# endif +# endif +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN + +# ifdef USE_AS_STRNLEN + .p2align 4 +L(cross_page_less_vec): + tzcntl %eax, %eax +# ifdef USE_AS_WCSLEN + /* NB: Divide by 4 to convert from byte-count to length. */ + shrl $2, %eax +# endif + cmpq %rax, %rsi + cmovb %esi, %eax + VZEROUPPER_RETURN # endif - VZEROUPPER - ret END (STRLEN) #endif diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S new file mode 100644 index 000000000..4bf6874b8 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-evex.S @@ -0,0 +1,489 @@ +/* strlen/strnlen/wcslen/wcsnlen optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRLEN +# define STRLEN __strlen_evex +# endif + +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSLEN +# define VPCMP vpcmpd +# define VPMINU vpminud +# define SHIFT_REG ecx +# define CHAR_SIZE 4 +# else +# define VPCMP vpcmpb +# define VPMINU vpminub +# define SHIFT_REG edx +# define CHAR_SIZE 1 +# endif + +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMM1 ymm17 +# define YMM2 ymm18 +# define YMM3 ymm19 +# define YMM4 ymm20 +# define YMM5 ymm21 +# define YMM6 ymm22 + +# define VEC_SIZE 32 +# define PAGE_SIZE 4096 +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + .section .text.evex,"ax",@progbits +ENTRY (STRLEN) +# ifdef USE_AS_STRNLEN + /* Check zero length. */ + test %RSI_LP, %RSI_LP + jz L(zero) +# ifdef __ILP32__ + /* Clear the upper 32 bits. */ + movl %esi, %esi +# endif + mov %RSI_LP, %R8_LP +# endif + movl %edi, %eax + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + /* Clear high bits from edi. Only keeping bits relevant to page + cross check. */ + andl $(PAGE_SIZE - 1), %eax + /* Check if we may cross page boundary with one vector load. */ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. Each bit in K0 represents a + null byte. */ + VPCMP $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %eax +# ifdef USE_AS_STRNLEN + /* If length < CHAR_PER_VEC handle special. */ + cmpq $CHAR_PER_VEC, %rsi + jbe L(first_vec_x0) +# endif + testl %eax, %eax + jz L(aligned_more) + tzcntl %eax, %eax + ret +# ifdef USE_AS_STRNLEN +L(zero): + xorl %eax, %eax + ret + + .p2align 4 +L(first_vec_x0): + /* Set bit for max len so that tzcnt will return min of max len + and position of first match. */ + btsq %rsi, %rax + tzcntl %eax, %eax + ret +# endif + + .p2align 4 +L(first_vec_x1): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal CHAR_PER_VEC(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x2): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x3): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax +# endif + ret + + .p2align 4 +L(first_vec_x4): + tzcntl %eax, %eax + /* Safe to use 32 bit instructions as these are only called for + size = [1, 159]. */ +# ifdef USE_AS_STRNLEN + /* Use ecx which was computed earlier to compute correct value. + */ + leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax +# else + subl %edx, %edi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %edi +# endif + leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax +# endif + ret + + .p2align 5 +L(aligned_more): + movq %rdi, %rdx + /* Align data to VEC_SIZE. */ + andq $-(VEC_SIZE), %rdi +L(cross_page_continue): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ +# ifdef USE_AS_STRNLEN + /* + CHAR_SIZE because it simplies the logic in + last_4x_vec_or_less. */ + leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx + subq %rdx, %rcx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif +# endif + /* Load first VEC regardless. */ + VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0 +# ifdef USE_AS_STRNLEN + /* Adjust length. If near end handle specially. */ + subq %rcx, %rsi + jb L(last_4x_vec_or_less) +# endif + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + test %eax, %eax + jnz L(first_vec_x2) + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x3) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(first_vec_x4) + + addq $VEC_SIZE, %rdi +# ifdef USE_AS_STRNLEN + /* Check if at last VEC_SIZE * 4 length. */ + cmpq $(CHAR_PER_VEC * 4 - 1), %rsi + jbe L(last_4x_vec_or_less_load) + movl %edi, %ecx + andl $(VEC_SIZE * 4 - 1), %ecx +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarl $2, %ecx +# endif + /* Readjust length. */ + addq %rcx, %rsi +# endif + /* Align data to VEC_SIZE * 4. */ + andq $-(VEC_SIZE * 4), %rdi + + /* Compare 4 * VEC at a time forward. */ + .p2align 4 +L(loop_4x_vec): + /* Load first VEC regardless. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 +# ifdef USE_AS_STRNLEN + /* Break if at end of length. */ + subq $(CHAR_PER_VEC * 4), %rsi + jb L(last_4x_vec_or_less_cmpeq) +# endif + /* Save some code size by microfusing VPMINU with the load. Since + the matches in ymm2/ymm4 can only be returned if there where no + matches in ymm1/ymm3 respectively there is no issue with overlap. + */ + VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2 + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 + VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4 + + VPCMP $0, %YMM2, %YMMZERO, %k0 + VPCMP $0, %YMM4, %YMMZERO, %k1 + subq $-(VEC_SIZE * 4), %rdi + kortestd %k0, %k1 + jz L(loop_4x_vec) + + /* Check if end was in first half. */ + kmovd %k0, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + shrq $2, %rdi +# endif + testl %eax, %eax + jz L(second_vec_return) + + VPCMP $0, %YMM1, %YMMZERO, %k2 + kmovd %k2, %edx + /* Combine VEC1 matches (edx) with VEC2 matches (eax). */ +# ifdef USE_AS_WCSLEN + sall $CHAR_PER_VEC, %eax + orl %edx, %eax + tzcntl %eax, %eax +# else + salq $CHAR_PER_VEC, %rax + orq %rdx, %rax + tzcntq %rax, %rax +# endif + addq %rdi, %rax + ret + + +# ifdef USE_AS_STRNLEN + +L(last_4x_vec_or_less_load): + /* Depending on entry adjust rdi / prepare first VEC in YMM1. */ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 +L(last_4x_vec_or_less_cmpeq): + VPCMP $0, %YMM1, %YMMZERO, %k0 + addq $(VEC_SIZE * 3), %rdi +L(last_4x_vec_or_less): + kmovd %k0, %eax + /* If remaining length > VEC_SIZE * 2. This works if esi is off by + VEC_SIZE * 4. */ + testl $(CHAR_PER_VEC * 2), %esi + jnz L(last_4x_vec) + + /* length may have been negative or positive by an offset of + CHAR_PER_VEC * 4 depending on where this was called from. This + fixes that. */ + andl $(CHAR_PER_VEC * 4 - 1), %esi + testl %eax, %eax + jnz L(last_vec_x1_check) + + /* Check the end of data. */ + subl $CHAR_PER_VEC, %esi + jb L(max) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret +L(max): + movq %r8, %rax + ret +# endif + + /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less) + in the 4x VEC loop can use 2 byte encoding. */ + .p2align 4 +L(second_vec_return): + VPCMP $0, %YMM3, %YMMZERO, %k0 + /* Combine YMM3 matches (k0) with YMM4 matches (k1). */ +# ifdef USE_AS_WCSLEN + kunpckbw %k0, %k1, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax +# else + kunpckdq %k0, %k1, %k0 + kmovq %k0, %rax + tzcntq %rax, %rax +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret + + +# ifdef USE_AS_STRNLEN +L(last_vec_x1_check): + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max) + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_4x_vec): + /* Test first 2x VEC normally. */ + testl %eax, %eax + jnz L(last_vec_x1) + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x2) + + /* Normalize length. */ + andl $(CHAR_PER_VEC * 4 - 1), %esi + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + testl %eax, %eax + jnz L(last_vec_x3) + + /* Check the end of data. */ + subl $(CHAR_PER_VEC * 3), %esi + jb L(max) + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + tzcntl %eax, %eax + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x1): + tzcntl %eax, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x2): + tzcntl %eax, %eax + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax + ret + + .p2align 4 +L(last_vec_x3): + tzcntl %eax, %eax + subl $(CHAR_PER_VEC * 2), %esi + /* Check the end of data. */ + cmpl %eax, %esi + jb L(max_end) + subq %rdx, %rdi +# ifdef USE_AS_WCSLEN + /* NB: Divide bytes by 4 to get the wchar_t count. */ + sarq $2, %rdi +# endif + leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax + ret +L(max_end): + movq %r8, %rax + ret +# endif + + /* Cold case for crossing page with first load. */ + .p2align 4 +L(cross_page_boundary): + movq %rdi, %rdx + /* Align data to VEC_SIZE. */ + andq $-VEC_SIZE, %rdi + VPCMP $0, (%rdi), %YMMZERO, %k0 + kmovd %k0, %eax + /* Remove the leading bytes. */ +# ifdef USE_AS_WCSLEN + /* NB: Divide shift count by 4 since each bit in K0 represent 4 + bytes. */ + movl %edx, %ecx + shrl $2, %ecx + andl $(CHAR_PER_VEC - 1), %ecx +# endif + /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */ + sarxl %SHIFT_REG, %eax, %eax + testl %eax, %eax +# ifndef USE_AS_STRNLEN + jz L(cross_page_continue) + tzcntl %eax, %eax + ret +# else + jnz L(cross_page_less_vec) +# ifndef USE_AS_WCSLEN + movl %edx, %ecx + andl $(CHAR_PER_VEC - 1), %ecx +# endif + movl $CHAR_PER_VEC, %eax + subl %ecx, %eax + /* Check the end of data. */ + cmpq %rax, %rsi + ja L(cross_page_continue) + movl %esi, %eax + ret +L(cross_page_less_vec): + tzcntl %eax, %eax + /* Select min of length and position of first null. */ + cmpq %rax, %rsi + cmovb %esi, %eax + ret +# endif + +END (STRLEN) +#endif diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S index 055fbbc69..812af73c1 100644 --- a/sysdeps/x86_64/multiarch/strlen-sse2.S +++ b/sysdeps/x86_64/multiarch/strlen-sse2.S @@ -20,4 +20,4 @@ # define strlen __strlen_sse2 #endif -#include "../strlen.S" +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S new file mode 100644 index 000000000..439e486a4 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strlen-vec.S @@ -0,0 +1,270 @@ +/* SSE2 version of strlen and SSE4.1 version of wcslen. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#ifdef AS_WCSLEN +# define PMINU pminud +# define PCMPEQ pcmpeqd +# define SHIFT_RETURN shrq $2, %rax +#else +# define PMINU pminub +# define PCMPEQ pcmpeqb +# define SHIFT_RETURN +#endif + +/* Long lived register in strlen(s), strnlen(s, n) are: + + %xmm3 - zero + %rdi - s + %r10 (s+n) & (~(64-1)) + %r11 s+n +*/ + + +.text +ENTRY(strlen) + +/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ +#define FIND_ZERO \ + PCMPEQ (%rax), %xmm0; \ + PCMPEQ 16(%rax), %xmm1; \ + PCMPEQ 32(%rax), %xmm2; \ + PCMPEQ 48(%rax), %xmm3; \ + pmovmskb %xmm0, %esi; \ + pmovmskb %xmm1, %edx; \ + pmovmskb %xmm2, %r8d; \ + pmovmskb %xmm3, %ecx; \ + salq $16, %rdx; \ + salq $16, %rcx; \ + orq %rsi, %rdx; \ + orq %r8, %rcx; \ + salq $32, %rcx; \ + orq %rcx, %rdx; + +#ifdef AS_STRNLEN +/* Do not read anything when n==0. */ + test %RSI_LP, %RSI_LP + jne L(n_nonzero) + xor %rax, %rax + ret +L(n_nonzero): +# ifdef AS_WCSLEN +/* Check for overflow from maxlen * sizeof(wchar_t). If it would + overflow the only way this program doesn't have undefined behavior + is if there is a null terminator in valid memory so wcslen will + suffice. */ + mov %RSI_LP, %R10_LP + sar $62, %R10_LP + test %R10_LP, %R10_LP + jnz __wcslen_sse4_1 + sal $2, %RSI_LP +# endif + + +/* Initialize long lived registers. */ + + add %RDI_LP, %RSI_LP +# ifdef AS_WCSLEN +/* Check for overflow again from s + maxlen * sizeof(wchar_t). */ + jbe __wcslen_sse4_1 +# endif + mov %RSI_LP, %R10_LP + and $-64, %R10_LP + mov %RSI_LP, %R11_LP +#endif + + pxor %xmm0, %xmm0 + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 + movq %rdi, %rax + movq %rdi, %rcx + andq $4095, %rcx +/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ + cmpq $4047, %rcx +/* We cannot unify this branching as it would be ~6 cycles slower. */ + ja L(cross_page) + +#ifdef AS_STRNLEN +/* Test if end is among first 64 bytes. */ +# define STRNLEN_PROLOG \ + mov %r11, %rsi; \ + subq %rax, %rsi; \ + andq $-64, %rax; \ + testq $-64, %rsi; \ + je L(strnlen_ret) +#else +# define STRNLEN_PROLOG andq $-64, %rax; +#endif + +/* Ignore bits in mask that come before start of string. */ +#define PROLOG(lab) \ + movq %rdi, %rcx; \ + xorq %rax, %rcx; \ + STRNLEN_PROLOG; \ + sarq %cl, %rdx; \ + test %rdx, %rdx; \ + je L(lab); \ + bsfq %rdx, %rax; \ + SHIFT_RETURN; \ + ret + +#ifdef AS_STRNLEN + andq $-16, %rax + FIND_ZERO +#else + /* Test first 16 bytes unaligned. */ + movdqu (%rax), %xmm4 + PCMPEQ %xmm0, %xmm4 + pmovmskb %xmm4, %edx + test %edx, %edx + je L(next48_bytes) + bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ + SHIFT_RETURN + ret + +L(next48_bytes): +/* Same as FIND_ZERO except we do not check first 16 bytes. */ + andq $-16, %rax + PCMPEQ 16(%rax), %xmm1 + PCMPEQ 32(%rax), %xmm2 + PCMPEQ 48(%rax), %xmm3 + pmovmskb %xmm1, %edx + pmovmskb %xmm2, %r8d + pmovmskb %xmm3, %ecx + salq $16, %rdx + salq $16, %rcx + orq %r8, %rcx + salq $32, %rcx + orq %rcx, %rdx +#endif + + /* When no zero byte is found xmm1-3 are zero so we do not have to + zero them. */ + PROLOG(loop) + + .p2align 4 +L(cross_page): + andq $-64, %rax + FIND_ZERO + PROLOG(loop_init) + +#ifdef AS_STRNLEN +/* We must do this check to correctly handle strnlen (s, -1). */ +L(strnlen_ret): + bts %rsi, %rdx + sarq %cl, %rdx + test %rdx, %rdx + je L(loop_init) + bsfq %rdx, %rax + SHIFT_RETURN + ret +#endif + .p2align 4 +L(loop_init): + pxor %xmm1, %xmm1 + pxor %xmm2, %xmm2 + pxor %xmm3, %xmm3 +#ifdef AS_STRNLEN + .p2align 4 +L(loop): + + addq $64, %rax + cmpq %rax, %r10 + je L(exit_end) + + movdqa (%rax), %xmm0 + PMINU 16(%rax), %xmm0 + PMINU 32(%rax), %xmm0 + PMINU 48(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit) + jmp L(loop) + + .p2align 4 +L(exit_end): + cmp %rax, %r11 + je L(first) /* Do not read when end is at page boundary. */ + pxor %xmm0, %xmm0 + FIND_ZERO + +L(first): + bts %r11, %rdx + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + + .p2align 4 +L(exit): + pxor %xmm0, %xmm0 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + +#else + + /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ + .p2align 4 +L(loop): + + movdqa 64(%rax), %xmm0 + PMINU 80(%rax), %xmm0 + PMINU 96(%rax), %xmm0 + PMINU 112(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit64) + + subq $-128, %rax + + movdqa (%rax), %xmm0 + PMINU 16(%rax), %xmm0 + PMINU 32(%rax), %xmm0 + PMINU 48(%rax), %xmm0 + PCMPEQ %xmm3, %xmm0 + pmovmskb %xmm0, %edx + testl %edx, %edx + jne L(exit0) + jmp L(loop) + + .p2align 4 +L(exit64): + addq $64, %rax +L(exit0): + pxor %xmm0, %xmm0 + FIND_ZERO + + bsfq %rdx, %rdx + addq %rdx, %rax + subq %rdi, %rax + SHIFT_RETURN + ret + +#endif + +END(strlen) diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S new file mode 100644 index 000000000..0dcea18db --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCAT +#define STRCAT __strncat_avx2_rtm +#include "strcat-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S new file mode 100644 index 000000000..8884f0237 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncat-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCAT +#define STRCAT __strncat_evex +#include "strcat-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S new file mode 100644 index 000000000..68bad365b --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRCMP __strncmp_avx2_rtm +#define USE_AS_STRNCMP 1 +#define OVERFLOW_STRCMP __strcmp_avx2_rtm +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S index 1678bcc23..f138e9f1f 100644 --- a/sysdeps/x86_64/multiarch/strncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S @@ -1,3 +1,4 @@ #define STRCMP __strncmp_avx2 #define USE_AS_STRNCMP 1 +#define OVERFLOW_STRCMP __strcmp_avx2 #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S new file mode 100644 index 000000000..a1d53e8c9 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncmp-evex.S @@ -0,0 +1,3 @@ +#define STRCMP __strncmp_evex +#define USE_AS_STRNCMP 1 +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c index 6b63b0ac2..dee2a41b0 100644 --- a/sysdeps/x86_64/multiarch/strncmp.c +++ b/sysdeps/x86_64/multiarch/strncmp.c @@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden; extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; static inline void * IFUNC_SELECTOR (void) { const struct cpu_features* cpu_features = __get_cpu_features (); - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); + { + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP)) + return OPTIMIZE (evex); + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + return OPTIMIZE (avx2_rtm); + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2); + } if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2) && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2)) diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S new file mode 100644 index 000000000..79e708329 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCPY +#define STRCPY __strncpy_avx2_rtm +#include "strcpy-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S new file mode 100644 index 000000000..40e391f0d --- /dev/null +++ b/sysdeps/x86_64/multiarch/strncpy-evex.S @@ -0,0 +1,3 @@ +#define USE_AS_STRNCPY +#define STRCPY __strncpy_evex +#include "strcpy-evex.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S new file mode 100644 index 000000000..04f1626a5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRLEN __strnlen_avx2_rtm +#define USE_AS_STRNLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S new file mode 100644 index 000000000..722022f30 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strnlen-evex.S @@ -0,0 +1,4 @@ +#define STRLEN __strnlen_evex +#define USE_AS_STRNLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S new file mode 100644 index 000000000..5def14ec1 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S @@ -0,0 +1,12 @@ +#ifndef STRRCHR +# define STRRCHR __strrchr_avx2_rtm +#endif + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + +#define SECTION(p) p##.avx.rtm + +#include "strrchr-avx2.S" diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S index 23077b4c4..bfb860ebb 100644 --- a/sysdeps/x86_64/multiarch/strrchr-avx2.S +++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S @@ -36,9 +36,13 @@ # define VZEROUPPER vzeroupper # endif +# ifndef SECTION +# define SECTION(p) p##.avx +# endif + # define VEC_SIZE 32 - .section .text.avx,"ax",@progbits + .section SECTION(.text),"ax",@progbits ENTRY (STRRCHR) movd %esi, %xmm4 movl %edi, %ecx @@ -166,8 +170,8 @@ L(return_value): # endif bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret +L(return_vzeroupper): + ZERO_UPPER_VEC_REGISTERS_RETURN .p2align 4 L(match): @@ -198,8 +202,7 @@ L(find_nul): jz L(return_value) bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(char_and_nul): @@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec): jz L(return_null) bsrl %eax, %eax leaq -VEC_SIZE(%rdi, %rax), %rax - VZEROUPPER - ret + VZEROUPPER_RETURN .p2align 4 L(return_null): xorl %eax, %eax - VZEROUPPER - ret + VZEROUPPER_RETURN END (STRRCHR) #endif diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S new file mode 100644 index 000000000..f920b5a58 --- /dev/null +++ b/sysdeps/x86_64/multiarch/strrchr-evex.S @@ -0,0 +1,265 @@ +/* strrchr/wcsrchr optimized with 256-bit EVEX instructions. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if IS_IN (libc) + +# include + +# ifndef STRRCHR +# define STRRCHR __strrchr_evex +# endif + +# define VMOVU vmovdqu64 +# define VMOVA vmovdqa64 + +# ifdef USE_AS_WCSRCHR +# define VPBROADCAST vpbroadcastd +# define VPCMP vpcmpd +# define SHIFT_REG r8d +# else +# define VPBROADCAST vpbroadcastb +# define VPCMP vpcmpb +# define SHIFT_REG ecx +# endif + +# define XMMZERO xmm16 +# define YMMZERO ymm16 +# define YMMMATCH ymm17 +# define YMM1 ymm18 + +# define VEC_SIZE 32 + + .section .text.evex,"ax",@progbits +ENTRY (STRRCHR) + movl %edi, %ecx + /* Broadcast CHAR to YMMMATCH. */ + VPBROADCAST %esi, %YMMMATCH + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + /* Check if we may cross page boundary with one vector load. */ + andl $(2 * VEC_SIZE - 1), %ecx + cmpl $VEC_SIZE, %ecx + ja L(cros_page_boundary) + + VMOVU (%rdi), %YMM1 + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + + addq $VEC_SIZE, %rdi + + testl %eax, %eax + jnz L(first_vec) + + testl %ecx, %ecx + jnz L(return_null) + + andq $-VEC_SIZE, %rdi + xorl %edx, %edx + jmp L(aligned_loop) + + .p2align 4 +L(first_vec): + /* Check if there is a null byte. */ + testl %ecx, %ecx + jnz L(char_and_nul_in_first_vec) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + andq $-VEC_SIZE, %rdi + jmp L(aligned_loop) + + .p2align 4 +L(cros_page_boundary): + andl $(VEC_SIZE - 1), %ecx + andq $-VEC_SIZE, %rdi + +# ifdef USE_AS_WCSRCHR + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + bytes. */ + movl %ecx, %SHIFT_REG + sarl $2, %SHIFT_REG +# endif + + VMOVA (%rdi), %YMM1 + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %edx + kmovd %k1, %eax + + shrxl %SHIFT_REG, %edx, %edx + shrxl %SHIFT_REG, %eax, %eax + addq $VEC_SIZE, %rdi + + /* Check if there is a CHAR. */ + testl %eax, %eax + jnz L(found_char) + + testl %edx, %edx + jnz L(return_null) + + jmp L(aligned_loop) + + .p2align 4 +L(found_char): + testl %edx, %edx + jnz L(char_and_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + leaq (%rdi, %rcx), %rsi + + .p2align 4 +L(aligned_loop): + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + add $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jnz L(char_nor_null) + + VMOVA (%rdi), %YMM1 + addq $VEC_SIZE, %rdi + + /* Each bit in K0 represents a null byte in YMM1. */ + VPCMP $0, %YMMZERO, %YMM1, %k0 + /* Each bit in K1 represents a CHAR in YMM1. */ + VPCMP $0, %YMMMATCH, %YMM1, %k1 + kmovd %k0, %ecx + kmovd %k1, %eax + orl %eax, %ecx + jz L(aligned_loop) + + .p2align 4 +L(char_nor_null): + /* Find a CHAR or a null byte in a loop. */ + testl %eax, %eax + jnz L(match) +L(return_value): + testl %edx, %edx + jz L(return_null) + movl %edx, %eax + movq %rsi, %rdi + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(match): + /* Find a CHAR. Check if there is a null byte. */ + kmovd %k0, %ecx + testl %ecx, %ecx + jnz L(find_nul) + + /* Remember the match and keep searching. */ + movl %eax, %edx + movq %rdi, %rsi + jmp L(aligned_loop) + + .p2align 4 +L(find_nul): + /* Mask out any matching bits after the null byte. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* If there is no CHAR here, return the remembered one. */ + jz L(return_value) + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(char_and_nul): + /* Find both a CHAR and a null byte. */ + addq %rcx, %rdi + movl %edx, %ecx +L(char_and_nul_in_first_vec): + /* Mask out any matching bits after the null byte. */ + movl %ecx, %r8d + subl $1, %r8d + xorl %ecx, %r8d + andl %r8d, %eax + testl %eax, %eax + /* Return null pointer if the null byte comes first. */ + jz L(return_null) + bsrl %eax, %eax +# ifdef USE_AS_WCSRCHR + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + leaq -VEC_SIZE(%rdi, %rax, 4), %rax +# else + leaq -VEC_SIZE(%rdi, %rax), %rax +# endif + ret + + .p2align 4 +L(return_null): + xorl %eax, %eax + ret + +END (STRRCHR) +#endif diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S new file mode 100644 index 000000000..d49dbbf0b --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRCHR __wcschr_avx2_rtm +#define USE_AS_WCSCHR 1 +#include "strchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S new file mode 100644 index 000000000..7cb8f1e41 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcschr-evex.S @@ -0,0 +1,3 @@ +#define STRCHR __wcschr_evex +#define USE_AS_WCSCHR 1 +#include "strchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S new file mode 100644 index 000000000..d6ca2b806 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRCMP __wcscmp_avx2_rtm +#define USE_AS_WCSCMP 1 + +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S new file mode 100644 index 000000000..42e73e51e --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S @@ -0,0 +1,4 @@ +#define STRCMP __wcscmp_evex +#define USE_AS_WCSCMP 1 + +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S new file mode 100644 index 000000000..35658d736 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S @@ -0,0 +1,4 @@ +#define STRLEN __wcslen_avx2_rtm +#define USE_AS_WCSLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S new file mode 100644 index 000000000..bdafa83bd --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-evex.S @@ -0,0 +1,4 @@ +#define STRLEN __wcslen_evex +#define USE_AS_WCSLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S new file mode 100644 index 000000000..7e62621af --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S @@ -0,0 +1,4 @@ +#define AS_WCSLEN +#define strlen __wcslen_sse4_1 + +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c index bb97438c7..26b5fdffd 100644 --- a/sysdeps/x86_64/multiarch/wcslen.c +++ b/sysdeps/x86_64/multiarch/wcslen.c @@ -24,7 +24,7 @@ # undef __wcslen # define SYMBOL_NAME wcslen -# include "ifunc-avx2.h" +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ()); weak_alias (__wcslen, wcslen); diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S new file mode 100644 index 000000000..f467582cb --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S @@ -0,0 +1,5 @@ +#define STRCMP __wcsncmp_avx2_rtm +#define USE_AS_STRNCMP 1 +#define USE_AS_WCSCMP 1 +#define OVERFLOW_STRCMP __wcscmp_avx2_rtm +#include "strcmp-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S index 4fa1de4d3..e9ede522b 100644 --- a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S +++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S @@ -1,5 +1,5 @@ #define STRCMP __wcsncmp_avx2 #define USE_AS_STRNCMP 1 #define USE_AS_WCSCMP 1 - +#define OVERFLOW_STRCMP __wcscmp_avx2 #include "strcmp-avx2.S" diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S new file mode 100644 index 000000000..8a8e31071 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S @@ -0,0 +1,5 @@ +#define STRCMP __wcsncmp_evex +#define USE_AS_STRNCMP 1 +#define USE_AS_WCSCMP 1 + +#include "strcmp-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S new file mode 100644 index 000000000..7437ebee2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S @@ -0,0 +1,5 @@ +#define STRLEN __wcsnlen_avx2_rtm +#define USE_AS_WCSLEN 1 +#define USE_AS_STRNLEN 1 + +#include "strlen-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S new file mode 100644 index 000000000..24773bb4e --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S @@ -0,0 +1,5 @@ +#define STRLEN __wcsnlen_evex +#define USE_AS_WCSLEN 1 +#define USE_AS_STRNLEN 1 + +#include "strlen-evex.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S index a8cab0cb0..5fa51fe07 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S +++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S @@ -2,4 +2,4 @@ #define AS_STRNLEN #define strlen __wcsnlen_sse4_1 -#include "../strlen.S" +#include "strlen-vec.S" diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c index 8c1fc1a57..f15c1b328 100644 --- a/sysdeps/x86_64/multiarch/wcsnlen.c +++ b/sysdeps/x86_64/multiarch/wcsnlen.c @@ -24,27 +24,7 @@ # undef __wcsnlen # define SYMBOL_NAME wcsnlen -# include - -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; -extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; - -static inline void * -IFUNC_SELECTOR (void) -{ - const struct cpu_features* cpu_features = __get_cpu_features (); - - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) - && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) - return OPTIMIZE (avx2); - - if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) - return OPTIMIZE (sse4_1); - - return OPTIMIZE (sse2); -} +# include "ifunc-wcslen.h" libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ()); weak_alias (__wcsnlen, wcsnlen); diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S new file mode 100644 index 000000000..9bf760833 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S @@ -0,0 +1,3 @@ +#define STRRCHR __wcsrchr_avx2_rtm +#define USE_AS_WCSRCHR 1 +#include "strrchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S new file mode 100644 index 000000000..c64602f7d --- /dev/null +++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S @@ -0,0 +1,3 @@ +#define STRRCHR __wcsrchr_evex +#define USE_AS_WCSRCHR 1 +#include "strrchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S new file mode 100644 index 000000000..58ed21db0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S @@ -0,0 +1,4 @@ +#define MEMCHR __wmemchr_avx2_rtm +#define USE_AS_WMEMCHR 1 + +#include "memchr-avx2-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S new file mode 100644 index 000000000..06cd0f9f5 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S @@ -0,0 +1,4 @@ +#define MEMCHR __wmemchr_evex +#define USE_AS_WMEMCHR 1 + +#include "memchr-evex.S" diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S new file mode 100644 index 000000000..31104d121 --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S @@ -0,0 +1,4 @@ +#define MEMCMP __wmemcmp_avx2_movbe_rtm +#define USE_AS_WMEMCMP 1 + +#include "memcmp-avx2-movbe-rtm.S" diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S new file mode 100644 index 000000000..4726d74aa --- /dev/null +++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S @@ -0,0 +1,4 @@ +#define MEMCMP __wmemcmp_evex_movbe +#define USE_AS_WMEMCMP 1 + +#include "memcmp-evex-movbe.S" diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S index 2e226d0d5..8422c15cc 100644 --- a/sysdeps/x86_64/strlen.S +++ b/sysdeps/x86_64/strlen.S @@ -1,5 +1,5 @@ -/* SSE2 version of strlen/wcslen. - Copyright (C) 2012-2020 Free Software Foundation, Inc. +/* SSE2 version of strlen. + Copyright (C) 2021 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,243 +16,6 @@ License along with the GNU C Library; if not, see . */ -#include +#include "multiarch/strlen-vec.S" -#ifdef AS_WCSLEN -# define PMINU pminud -# define PCMPEQ pcmpeqd -# define SHIFT_RETURN shrq $2, %rax -#else -# define PMINU pminub -# define PCMPEQ pcmpeqb -# define SHIFT_RETURN -#endif - -/* Long lived register in strlen(s), strnlen(s, n) are: - - %xmm3 - zero - %rdi - s - %r10 (s+n) & (~(64-1)) - %r11 s+n -*/ - - -.text -ENTRY(strlen) - -/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */ -#define FIND_ZERO \ - PCMPEQ (%rax), %xmm0; \ - PCMPEQ 16(%rax), %xmm1; \ - PCMPEQ 32(%rax), %xmm2; \ - PCMPEQ 48(%rax), %xmm3; \ - pmovmskb %xmm0, %esi; \ - pmovmskb %xmm1, %edx; \ - pmovmskb %xmm2, %r8d; \ - pmovmskb %xmm3, %ecx; \ - salq $16, %rdx; \ - salq $16, %rcx; \ - orq %rsi, %rdx; \ - orq %r8, %rcx; \ - salq $32, %rcx; \ - orq %rcx, %rdx; - -#ifdef AS_STRNLEN -/* Do not read anything when n==0. */ - test %RSI_LP, %RSI_LP - jne L(n_nonzero) - xor %rax, %rax - ret -L(n_nonzero): -# ifdef AS_WCSLEN - shl $2, %RSI_LP -# endif - -/* Initialize long lived registers. */ - - add %RDI_LP, %RSI_LP - mov %RSI_LP, %R10_LP - and $-64, %R10_LP - mov %RSI_LP, %R11_LP -#endif - - pxor %xmm0, %xmm0 - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 - movq %rdi, %rax - movq %rdi, %rcx - andq $4095, %rcx -/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */ - cmpq $4047, %rcx -/* We cannot unify this branching as it would be ~6 cycles slower. */ - ja L(cross_page) - -#ifdef AS_STRNLEN -/* Test if end is among first 64 bytes. */ -# define STRNLEN_PROLOG \ - mov %r11, %rsi; \ - subq %rax, %rsi; \ - andq $-64, %rax; \ - testq $-64, %rsi; \ - je L(strnlen_ret) -#else -# define STRNLEN_PROLOG andq $-64, %rax; -#endif - -/* Ignore bits in mask that come before start of string. */ -#define PROLOG(lab) \ - movq %rdi, %rcx; \ - xorq %rax, %rcx; \ - STRNLEN_PROLOG; \ - sarq %cl, %rdx; \ - test %rdx, %rdx; \ - je L(lab); \ - bsfq %rdx, %rax; \ - SHIFT_RETURN; \ - ret - -#ifdef AS_STRNLEN - andq $-16, %rax - FIND_ZERO -#else - /* Test first 16 bytes unaligned. */ - movdqu (%rax), %xmm4 - PCMPEQ %xmm0, %xmm4 - pmovmskb %xmm4, %edx - test %edx, %edx - je L(next48_bytes) - bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */ - SHIFT_RETURN - ret - -L(next48_bytes): -/* Same as FIND_ZERO except we do not check first 16 bytes. */ - andq $-16, %rax - PCMPEQ 16(%rax), %xmm1 - PCMPEQ 32(%rax), %xmm2 - PCMPEQ 48(%rax), %xmm3 - pmovmskb %xmm1, %edx - pmovmskb %xmm2, %r8d - pmovmskb %xmm3, %ecx - salq $16, %rdx - salq $16, %rcx - orq %r8, %rcx - salq $32, %rcx - orq %rcx, %rdx -#endif - - /* When no zero byte is found xmm1-3 are zero so we do not have to - zero them. */ - PROLOG(loop) - - .p2align 4 -L(cross_page): - andq $-64, %rax - FIND_ZERO - PROLOG(loop_init) - -#ifdef AS_STRNLEN -/* We must do this check to correctly handle strnlen (s, -1). */ -L(strnlen_ret): - bts %rsi, %rdx - sarq %cl, %rdx - test %rdx, %rdx - je L(loop_init) - bsfq %rdx, %rax - SHIFT_RETURN - ret -#endif - .p2align 4 -L(loop_init): - pxor %xmm1, %xmm1 - pxor %xmm2, %xmm2 - pxor %xmm3, %xmm3 -#ifdef AS_STRNLEN - .p2align 4 -L(loop): - - addq $64, %rax - cmpq %rax, %r10 - je L(exit_end) - - movdqa (%rax), %xmm0 - PMINU 16(%rax), %xmm0 - PMINU 32(%rax), %xmm0 - PMINU 48(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit) - jmp L(loop) - - .p2align 4 -L(exit_end): - cmp %rax, %r11 - je L(first) /* Do not read when end is at page boundary. */ - pxor %xmm0, %xmm0 - FIND_ZERO - -L(first): - bts %r11, %rdx - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - - .p2align 4 -L(exit): - pxor %xmm0, %xmm0 - FIND_ZERO - - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - -#else - - /* Main loop. Unrolled twice to improve L2 cache performance on core2. */ - .p2align 4 -L(loop): - - movdqa 64(%rax), %xmm0 - PMINU 80(%rax), %xmm0 - PMINU 96(%rax), %xmm0 - PMINU 112(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit64) - - subq $-128, %rax - - movdqa (%rax), %xmm0 - PMINU 16(%rax), %xmm0 - PMINU 32(%rax), %xmm0 - PMINU 48(%rax), %xmm0 - PCMPEQ %xmm3, %xmm0 - pmovmskb %xmm0, %edx - testl %edx, %edx - jne L(exit0) - jmp L(loop) - - .p2align 4 -L(exit64): - addq $64, %rax -L(exit0): - pxor %xmm0, %xmm0 - FIND_ZERO - - bsfq %rdx, %rdx - addq %rdx, %rax - subq %rdi, %rax - SHIFT_RETURN - ret - -#endif - -END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h index 0b73674f6..c8ad778fe 100644 --- a/sysdeps/x86_64/sysdep.h +++ b/sysdeps/x86_64/sysdep.h @@ -95,6 +95,28 @@ lose: \ #define R14_LP r14 #define R15_LP r15 +/* Zero upper vector registers and return with xtest. NB: Use VZEROALL + to avoid RTM abort triggered by VZEROUPPER inside transactionally. */ +#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \ + xtest; \ + jz 1f; \ + vzeroall; \ + ret; \ +1: \ + vzeroupper; \ + ret + +/* Zero upper vector registers and return. */ +#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN +# define ZERO_UPPER_VEC_REGISTERS_RETURN \ + VZEROUPPER; \ + ret +#endif + +#ifndef VZEROUPPER_RETURN +# define VZEROUPPER_RETURN VZEROUPPER; ret +#endif + #else /* __ASSEMBLER__ */ /* Long and pointer size in bytes. */ diff --git a/sysdeps/x86_64/tst-rsi-strlen.c b/sysdeps/x86_64/tst-rsi-strlen.c new file mode 100644 index 000000000..a80c4f85c --- /dev/null +++ b/sysdeps/x86_64/tst-rsi-strlen.c @@ -0,0 +1,81 @@ +/* Test strlen with 0 in the RSI register. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifdef WIDE +# define TEST_NAME "wcslen" +#else +# define TEST_NAME "strlen" +#endif /* WIDE */ + +#define TEST_MAIN +#include + +#ifdef WIDE +# include +# define STRLEN wcslen +# define CHAR wchar_t +#else +# define STRLEN strlen +# define CHAR char +#endif /* WIDE */ + +IMPL (STRLEN, 1) + +typedef size_t (*proto_t) (const CHAR *); + +typedef struct +{ + void (*fn) (void); +} parameter_t; + +size_t +__attribute__ ((weak, noinline, noclone)) +do_strlen (parameter_t *a, int zero, const CHAR *str) +{ + return CALL (a, str); +} + +static int +test_main (void) +{ + test_init (); + + size_t size = page_size / sizeof (CHAR) - 1; + CHAR *buf = (CHAR *) buf2; + buf[size] = 0; + + parameter_t a; + + int ret = 0; + FOR_EACH_IMPL (impl, 0) + { + a.fn = impl->fn; + /* NB: Pass 0 in RSI. */ + size_t res = do_strlen (&a, 0, buf); + if (res != size) + { + error (0, 0, "Wrong result in function %s: %zu != %zu", + impl->name, res, size); + ret = 1; + } + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} + +#include diff --git a/sysdeps/x86_64/tst-rsi-wcslen.c b/sysdeps/x86_64/tst-rsi-wcslen.c new file mode 100644 index 000000000..f45a7dfb5 --- /dev/null +++ b/sysdeps/x86_64/tst-rsi-wcslen.c @@ -0,0 +1,20 @@ +/* Test wcslen with 0 in the RSI register. + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#define WIDE 1 +#include "tst-rsi-strlen.c" -- 2.30.2