GIT update of https://sourceware.org/git/glibc.git/release/2.34/master from glibc-2.34
GIT update of https://sourceware.org/git/glibc.git/release/2.34/master from glibc-2.34
Gbp-Pq: Name git-updates.diff
library will still be usable, but functionality may be lost--for
example, you can't build a shared libc with old binutils.
+'--with-default-link'
+ With '--with-default-link', the build system does not use a custom
+ linker script for linking shared objects. The default is
+ '--without-default-link', because the custom linker script is
+ needed for full RELRO protection.
+
'--with-nonshared-cflags=CFLAGS'
Use additional compiler flags CFLAGS to build the parts of the
library which are always statically linked into applications and
# Command for linking PIE programs with the C library.
ifndef +link-pie
-+link-pie-before-libc = $(if $($(@F)-no-pie),$(no-pie-ldflag),-pie) \
- -Wl,-O1 -nostdlib -nostartfiles -o $@ \
++link-pie-before-inputs = $(if $($(@F)-no-pie),$(no-pie-ldflag),-pie) \
+ -Wl,-O1 -nostdlib -nostartfiles \
$(sysdep-LDFLAGS) $(LDFLAGS) $(LDFLAGS-$(@F)) \
$(combreloc-LDFLAGS) $(relro-LDFLAGS) $(hashstyle-LDFLAGS) \
$(firstword $(CRT-$(@F)) $(csu-objpfx)S$(start-installed-name)) \
- $(+preinit) $(+prectorS) \
+ $(+preinit) $(+prectorS)
++link-pie-before-libc = -o $@ $(+link-pie-before-inputs) \
$(filter-out $(addprefix $(csu-objpfx),start.o \
S$(start-installed-name))\
$(+preinit) $(link-extra-libs) \
endif
# Command for statically linking programs with the C library.
ifndef +link-static
-+link-static-before-libc = -nostdlib -nostartfiles -static -o $@ \
++link-static-before-inputs = -nostdlib -nostartfiles -static \
$(if $($(@F)-no-pie),$(no-pie-ldflag),$(default-pie-ldflag)) \
$(sysdep-LDFLAGS) $(LDFLAGS) $(LDFLAGS-$(@F)) \
$(firstword $(CRT-$(@F)) $(csu-objpfx)$(real-static-start-installed-name)) \
- $(+preinit) $(+prectorT) \
+ $(+preinit) $(+prectorT)
++link-static-before-libc = -o $@ $(+link-static-before-inputs) \
$(filter-out $(addprefix $(csu-objpfx),start.o \
$(start-installed-name))\
$(+preinit) $(link-extra-libs-static) \
ifeq (yes,$(build-pie-default))
+link = $(+link-pie)
+link-tests = $(+link-pie-tests)
++link-tests-before-inputs = $(+link-pie-before-inputs) $(rtld-tests-LDFLAGS)
++link-tests-after-inputs = $(link-libc-tests) $(+link-pie-after-libc)
+link-printers-tests = $(+link-pie-printers-tests)
else # not build-pie-default
-+link-before-libc = -nostdlib -nostartfiles -o $@ \
++link-before-inputs = -nostdlib -nostartfiles \
$(sysdep-LDFLAGS) $(LDFLAGS) $(LDFLAGS-$(@F)) \
$(combreloc-LDFLAGS) $(relro-LDFLAGS) $(hashstyle-LDFLAGS) \
$(firstword $(CRT-$(@F)) $(csu-objpfx)$(start-installed-name)) \
- $(+preinit) $(+prector) \
+ $(+preinit) $(+prector)
++link-before-libc = -o $@ $(+link-before-inputs) \
$(filter-out $(addprefix $(csu-objpfx),start.o \
$(start-installed-name))\
$(+preinit) $(link-extra-libs) \
$(link-extra-flags) $(link-libc) $(+link-after-libc)
$(call after-link,$@)
endef
++link-tests-before-inputs = $(+link-before-inputs) $(rtld-tests-LDFLAGS)
++link-tests-after-inputs = $(link-libc-tests) $(+link-after-libc)
define +link-tests
$(CC) $(+link-before-libc) $(rtld-tests-LDFLAGS) $(link-libc-tests) \
$(+link-after-libc)
else # build-static
+link = $(+link-static)
+link-tests = $(+link-static-tests)
++link-tests-before-inputs = $(+link-static-before-inputs)
++link-tests-after-inputs = $(link-libc-static-tests) $(+link-static-after-libc)
+link-printers-tests = $(+link-static-tests)
endif # build-shared
endif # +link
endef
object-suffixes-left := $(all-object-suffixes)
include $(o-iterator)
+
+define o-iterator-doit
+$(objpfx)%$o: $(objpfx)%.cc $(before-compile); $$(compile-command.cc)
+endef
+object-suffixes-left := $(all-object-suffixes)
+include $(o-iterator)
endif
# Generate .dT files as we compile.
Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
using `glibc' in the "product" field.
+\f
+Version 2.34.1
+
+Major new features:
+
+* The audit libraries will avoid unnecessary slowdown if it is not required
+ PLT tracking (by not implementing the la_pltenter or la_pltexit callbacks).
+
+Changes to build and runtime requirements:
+
+* The audit module interface version LAV_CURRENT is increased to enable
+ proper bind-now support. The loader now advertises via the la_symbind
+ flags that PLT trace is not possible.
+
+* The audit interface on aarch64 is extended to support both the indirect
+ result location register (x8) and NEON Q register. Old audit modules are
+ rejected by the loader.
+
+Security related changes:
+
+ CVE-2022-23219: Passing an overlong file name to the clnt_create
+ legacy function could result in a stack-based buffer overflow when
+ using the "unix" protocol. Reported by Martin Sebor.
+
+ CVE-2022-23218: Passing an overlong file name to the svcunix_create
+ legacy function could result in a stack-based buffer overflow.
+
+ CVE-2021-3998: Passing a path longer than PATH_MAX to the realpath
+ function could result in a memory leak and potential access of
+ uninitialized memory. Reported by Qualys.
+
+ CVE-2021-3999: Passing a buffer of size exactly 1 byte to the getcwd
+ function may result in an off-by-one buffer underflow and overflow
+ when the current working directory is longer than PATH_MAX and also
+ corresponds to the / directory through an unprivileged mount
+ namespace. Reported by Qualys.
+
+The following bugs are resolved with this release:
+
+ [12889] nptl: Fix race between pthread_kill and thread exit
+ [15533] dynamic-link: LD_AUDIT introduces an avoidable performance
+ degradation
+ [19193] nptl: pthread_kill, pthread_cancel should not fail after exit
+ [22542] CVE-2022-23219: Buffer overflow in sunrpc clnt_create for "unix"
+ [23734] la_symbind*() doesn't get called when LD_BIND_NOW is set
+ [25812] Libio vtable protection is sometimes only partially enforced
+ [26643] register x8 and quad sized NEON registers are not properly
+ saved when using ld_audit on aarch64
+ [28036] Incorrect types for pthread_mutexattr_set/getrobust_np
+ [28061] dynamic-link: A failing dlmopen called by an auditor crashed
+ [28062] dynamic-link: Suppress audit calls when a (new) namespace is
+ empty
+ [28096] elf: audit calls that uses static tls might fail
+ [28182] _TIME_BITS=64 in C++ has issues with fcntl, ioctl, prctl
+ [28223] mips: clone does not align stack
+ [28310] Do not use affinity mask for sysconf (_SC_NPROCESSORS_CONF)
+ [28338] undefined behavior in __tzfile_compute with oddball TZif file
+ [28340] ld.so crashes while loading a DSO with a read-only dynamic section
+ [28349] libc: Segfault for ping -R on qemux86 caused by recvmsg()
+ [28350] libc: ping receives SIGABRT on lib32-qemux86-64 caused by
+ recvmsg()
+ [28353] Race condition in __opensock
+ [28357] deadlock between pthread_create and ELF constructors
+ [28361] nptl: Avoid setxid deadlock with blocked signals in thread exit
+ [28407] pthread_kill assumes that kill and tgkill are equivalent
+ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs
+ [28532] powerpc64[le]: CFI for assembly templated syscalls is incorrect
+ [28607] Masked signals are delivered on thread exit
+ [28678] nptl/tst-create1 hangs sporadically
+ [28700] "dns [!UNAVAIL=return] files" NSS default for hosts is not useful
+ [28702] RISC-V: clone does not align stack
+ [28703] RISC-V: _dl_init might be called with unaligned stack
+ [28704] elf/tst-cpu-features-cpuinfo fails for KVM guests on some AMD systems
+ [28707] assert in tzfile.c __tzfile_read striking with truncated timezones
+ [28744] A64FX string functions are selected without SVE HWCAP
+ [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex
+ [28771] %ebx optimization macros are incompatible with .altmacro
+ [28768] CVE-2022-23218: Buffer overflow in sunrpc svcunix_create
+ [28769] CVE-2021-3999: Off-by-one buffer overflow/underflow in getcwd()
+ [28770] CVE-2021-3998: Unexpected return value from realpath() for too long results
+ [28784] x86: crash in 32bit memset-sse2.s when the cache size can not be determined
+ [28846] CMSG_NXTHDR may trigger -Wstrict-overflow warning
+ [28850] linux: __get_nprocs_sched reads uninitialized memory from the stack
+ [28857] FAIL: elf/tst-audit24a
+ [28860] build: --enable-kernel=5.1.0 build fails because of missing
+ __convert_scm_timestamps
+ [28865] linux: _SC_NPROCESSORS_CONF and _SC_NPROCESSORS_ONLN are inaccurate
+ without /sys and /proc
+ [28896] strncmp-avx2-rtm and wcsncmp-avx2-rtm fallback on non-rtm
+ variants when avoiding overflow
+ [28953] nss: Protect against errno changes in function lookup
+ [29029] nptl: poll() spuriously returns EINTR during thread
+ cancellation and with cancellation disabled
+ [29069] libc: fstatat64_time64_statx wrapper broken on MIPS N32 with
+ -D_FILE_OFFSET_BITS=64 and -D_TIME_BITS=64
+ [29078] <dlfcn.h> functions unusable during early auditing
+ [29097] time: fchmodat does not handle 64 bit time_t for
+ AT_SYMLINK_NOFOLLOW
+ [29165] libc: [Regression] broken argv adjustment
+ [29187] dynamic-link: [regression] broken argv adjustment for nios2
+ [29203] libc: daemon is not y2038 aware
+ [29204] libc: getusershell is not 2038 aware
+ [29207] libc: posix_fallocate fallback implementation is not y2038
+ [29208] libc: fpathconf(_PC_ASYNC_IO) is not y2038 aware
+ [29209] libc: isfdtype is not y2038 aware
+ [29210] network: ruserpass is not y2038 aware
+ [29211] libc: __open_catalog is not y2038 aware
+ [29213] libc: gconv_parseconfdir is not y2038 aware
+ [29214] nptl: pthread_setcanceltype fails to set type
+ [29446] _dlopen now ignores dl_caller argument in static mode
+ [29490] alpha: New __brk_call implementation is broken
+
\f
Version 2.34
--- /dev/null
+/* Data structure for communication from the run-time dynamic linker for
+ loaded ELF shared objects. LAV_CURRENT definition.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _LINK_H
+# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
+#endif
+
+/* Version numbers for la_version handshake interface. */
+#define LAV_CURRENT 2
+ CMSG_ALIGN (sizeof (struct cmsghdr)))
#define CMSG_LEN(len) (CMSG_ALIGN (sizeof (struct cmsghdr)) + (len))
+/* Given a length, return the additional padding necessary such that
+ len + __CMSG_PADDING(len) == CMSG_ALIGN (len). */
+#define __CMSG_PADDING(len) ((sizeof (size_t) \
+ - ((len) & (sizeof (size_t) - 1))) \
+ & (sizeof (size_t) - 1))
+
extern struct cmsghdr *__cmsg_nxthdr (struct msghdr *__mhdr,
struct cmsghdr *__cmsg) __THROW;
#ifdef __USE_EXTERN_INLINES
_EXTERN_INLINE struct cmsghdr *
__NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
{
+ /* We may safely assume that __cmsg lies between __mhdr->msg_control and
+ __mhdr->msg_controllen because the user is required to obtain the first
+ cmsg via CMSG_FIRSTHDR, set its length, then obtain subsequent cmsgs
+ via CMSG_NXTHDR, setting lengths along the way. However, we don't yet
+ trust the value of __cmsg->cmsg_len and therefore do not use it in any
+ pointer arithmetic until we check its value. */
+
+ unsigned char * __msg_control_ptr = (unsigned char *) __mhdr->msg_control;
+ unsigned char * __cmsg_ptr = (unsigned char *) __cmsg;
+
+ size_t __size_needed = sizeof (struct cmsghdr)
+ + __CMSG_PADDING (__cmsg->cmsg_len);
+
+ /* The current header is malformed, too small to be a full header. */
if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
- /* The kernel header does this so there may be a reason. */
return (struct cmsghdr *) 0;
+ /* There isn't enough space between __cmsg and the end of the buffer to
+ hold the current cmsg *and* the next one. */
+ if (((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr)
+ < __size_needed)
+ || ((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr
+ - __size_needed)
+ < __cmsg->cmsg_len))
+
+ return (struct cmsghdr *) 0;
+
+ /* Now, we trust cmsg_len and can use it to find the next header. */
__cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
+ CMSG_ALIGN (__cmsg->cmsg_len));
- if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
- + __mhdr->msg_controllen)
- || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
- > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
- /* No more entries. */
- return (struct cmsghdr *) 0;
return __cmsg;
}
#endif /* Use `extern inline'. */
while (__l < __u)
{
__idx = (__l + __u) / 2;
- __p = (void *) (((const char *) __base) + (__idx * __size));
+ __p = (const void *) (((const char *) __base) + (__idx * __size));
__comparison = (*__compar) (__key, __p);
if (__comparison < 0)
__u = __idx;
else if (__comparison > 0)
__l = __idx + 1;
else
- return (void *) __p;
+ {
+#if __GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+ return (void *) __p;
+#if __GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic pop
+#endif
+ }
}
return NULL;
__nl_catd catalog)
{
int fd = -1;
- struct stat64 st;
+ struct __stat64_t64 st;
int swapping;
size_t cnt;
size_t max_offset;
return -1;
}
- if (__builtin_expect (__fstat64 (fd, &st), 0) < 0)
+ if (__glibc_unlikely (__fstat64_time64 (fd, &st) < 0))
goto close_unlock_return;
if (__builtin_expect (!S_ISREG (st.st_mode), 0)
/* Define if static PIE is enabled. */
#define ENABLE_STATIC_PIE 0
-/* Some compiler options may now allow to use ebp in __asm__ (used mainly
- in i386 6 argument syscall issue). */
-#define CAN_USE_REGISTER_ASM_EBP 0
-
/* The default value of x86 CET control. */
#define DEFAULT_DL_X86_CET_CONTROL cet_elf_property
if test "${with_default_link+set}" = set; then :
withval=$with_default_link; use_default_link=$withval
else
- use_default_link=default
+ use_default_link=no
fi
$as_echo "$libc_cv_hashstyle" >&6; }
-# The linker's default -shared behavior is good enough if it
-# does these things that our custom linker scripts ensure that
-# all allocated NOTE sections come first.
-if test "$use_default_link" = default; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sufficient default -shared layout" >&5
-$as_echo_n "checking for sufficient default -shared layout... " >&6; }
-if ${libc_cv_use_default_link+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- libc_cv_use_default_link=no
- cat > conftest.s <<\EOF
- .section .note.a,"a",%note
- .balign 4
- .long 4,4,9
- .string "GNU"
- .string "foo"
- .section .note.b,"a",%note
- .balign 4
- .long 4,4,9
- .string "GNU"
- .string "bar"
-EOF
- if { ac_try=' ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&5'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } &&
- ac_try=`$READELF -S conftest.so | sed -n \
- '${x;p;}
- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/
- t a
- b
- : a
- H'`
- then
- libc_seen_a=no libc_seen_b=no
- set -- $ac_try
- while test $# -ge 2 -a "$1" = NOTE; do
- case "$2" in
- .note.a) libc_seen_a=yes ;;
- .note.b) libc_seen_b=yes ;;
- esac
- shift 2
- done
- case "$libc_seen_a$libc_seen_b" in
- yesyes)
- libc_cv_use_default_link=yes
- ;;
- *)
- echo >&5 "\
-$libc_seen_a$libc_seen_b from:
-$ac_try"
- ;;
- esac
- fi
- rm -f conftest*
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_use_default_link" >&5
-$as_echo "$libc_cv_use_default_link" >&6; }
- use_default_link=$libc_cv_use_default_link
-fi
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GLOB_DAT reloc" >&5
$as_echo_n "checking for GLOB_DAT reloc... " >&6; }
if ${libc_cv_has_glob_dat+:} false; then :
AS_HELP_STRING([--with-default-link],
[do not use explicit linker scripts]),
[use_default_link=$withval],
- [use_default_link=default])
+ [use_default_link=no])
dnl Additional build flags injection.
AC_ARG_WITH([nonshared-cflags],
rm -f conftest*])
AC_SUBST(libc_cv_hashstyle)
-# The linker's default -shared behavior is good enough if it
-# does these things that our custom linker scripts ensure that
-# all allocated NOTE sections come first.
-if test "$use_default_link" = default; then
- AC_CACHE_CHECK([for sufficient default -shared layout],
- libc_cv_use_default_link, [dnl
- libc_cv_use_default_link=no
- cat > conftest.s <<\EOF
- .section .note.a,"a",%note
- .balign 4
- .long 4,4,9
- .string "GNU"
- .string "foo"
- .section .note.b,"a",%note
- .balign 4
- .long 4,4,9
- .string "GNU"
- .string "bar"
-EOF
- if AC_TRY_COMMAND([dnl
- ${CC-cc} $ASFLAGS -shared -o conftest.so conftest.s 1>&AS_MESSAGE_LOG_FD]) &&
- ac_try=`$READELF -S conftest.so | sed -n \
- ['${x;p;}
- s/^ *\[ *[1-9][0-9]*\] *\([^ ][^ ]*\) *\([^ ][^ ]*\) .*$/\2 \1/
- t a
- b
- : a
- H']`
- then
- libc_seen_a=no libc_seen_b=no
- set -- $ac_try
- while test $# -ge 2 -a "$1" = NOTE; do
- case "$2" in
- .note.a) libc_seen_a=yes ;;
- .note.b) libc_seen_b=yes ;;
- esac
- shift 2
- done
- case "$libc_seen_a$libc_seen_b" in
- yesyes)
- libc_cv_use_default_link=yes
- ;;
- *)
- echo >&AS_MESSAGE_LOG_FD "\
-$libc_seen_a$libc_seen_b from:
-$ac_try"
- ;;
- esac
- fi
- rm -f conftest*])
- use_default_link=$libc_cv_use_default_link
-fi
-
AC_CACHE_CHECK(for GLOB_DAT reloc,
libc_cv_has_glob_dat, [dnl
cat > conftest.c <<EOF
conformtest.py $(conformtest-headers-data)
(set -e; std_hdr=$*; std=$${std_hdr%%/*}; hdr=$${std_hdr#*/}; \
mkdir -p $(@D); \
- $(PYTHON) $< --cc='$(CC)' --flags='$(conformtest-cc-flags)' \
+ $(PYTHON) $< --cc='$(CC) $(pie-default)' \
+ --flags='$(conformtest-cc-flags)' \
+ --ldflags='$(+link-tests-before-inputs)' \
+ --libs='$(+link-tests-after-inputs)' \
+ --run-program-prefix='$(run-program-prefix)' \
--standard=$$std --header=$$hdr $(conformtest-xfail) \
$(conformtest-cross) \
> $@ 2>&1); \
class HeaderTests(object):
"""The set of tests run for a header."""
- def __init__(self, header, standard, cc, flags, cross, xfail):
+ def __init__(self, header, standard, cc, flags, ldflags, libs,
+ run_program_prefix, cross, xfail):
"""Initialize a HeaderTests object."""
self.header = header
self.standard = standard
self.cc = cc
self.flags = flags
+ self.ldflags = ldflags
+ self.libs = libs
+ self.run_program_prefix = run_program_prefix
self.cross = cross
self.xfail_str = xfail
self.cflags_namespace = ('%s -fno-builtin %s -D_ISOMAC'
exe_file = os.path.join(self.temp_dir, 'test')
with open(c_file, 'w') as c_file_out:
c_file_out.write('#include <%s>\n%s' % (self.header, text))
- cmd = ('%s %s %s -o %s' % (self.cc, self.cflags, c_file, exe_file))
+ cmd = ('%s %s %s %s %s -o %s' % (self.cc, self.cflags, self.ldflags,
+ c_file, self.libs, exe_file))
try:
subprocess.check_call(cmd, shell=True)
except subprocess.CalledProcessError:
self.note_skip(name)
return
try:
- subprocess.check_call(exe_file, shell=True)
+ subprocess.check_call('%s %s' % (self.run_program_prefix,
+ exe_file),
+ shell=True)
except subprocess.CalledProcessError:
self.note_error(name, self.group_xfail)
return
help='C compiler to use')
parser.add_argument('--flags', metavar='CFLAGS',
help='Compiler flags to use with CC')
+ parser.add_argument('--ldflags', metavar='LDFLAGS',
+ help='Compiler arguments for linking before inputs')
+ parser.add_argument('--libs', metavar='LIBS',
+ help='Compiler arguments for linking after inputs')
+ parser.add_argument('--run-program-prefix', metavar='RUN-PROGRAM-PREFIX',
+ help='Wrapper for running newly built program')
parser.add_argument('--cross', action='store_true',
help='Do not run compiled test programs')
parser.add_argument('--xfail', metavar='COND',
help='Name of condition for XFAILs')
args = parser.parse_args()
tests = HeaderTests(args.header, args.standard, args.cc, args.flags,
+ args.ldflags, args.libs, args.run_program_prefix,
args.cross, args.xfail)
tests.run()
}
}
- /* Initialize very early so that tunables can use it. */
- __libc_init_secure ();
-
__tunables_init (__environ);
ARCH_INIT_CPU_FEATURES ();
/* This is a current program. Use the dynamic segment to find
constructors. */
call_init (argc, argv, __environ);
-#else /* !SHARED */
- call_init (argc, argv, __environ);
-#endif /* SHARED */
-#ifdef SHARED
/* Auditing checkpoint: we have a new object. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0))
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->preinit != NULL)
- afct->preinit (&link_map_audit_state (head, cnt)->cookie);
-
- afct = afct->next;
- }
- }
-#endif
+ _dl_audit_preinit (GL(dl_ns)[LM_ID_BASE]._ns_loaded);
-#ifdef SHARED
if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS))
GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]);
-#endif
+#else /* !SHARED */
+ call_init (argc, argv, __environ);
-#ifndef SHARED
_dl_debug_initialize (0, LM_ID_BASE);
#endif
_dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
and dl_tls_static_align. */
tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
- tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
+ tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
+ if (tlsblock == NULL)
+ _startup_fatal ("Fatal glibc error: Cannot allocate TLS block\n");
#elif TLS_DTV_AT_TP
tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
- tlsblock = __sbrk (tcb_offset + memsz + max_align
- + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus));
+ tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align
+ + TLS_PRE_TCB_SIZE
+ + GLRO(dl_tls_static_surplus));
+ if (tlsblock == NULL)
+ _startup_fatal ("Fatal glibc error: Cannot allocate TLS block\n");
tlsblock += TLS_PRE_TCB_SIZE;
#else
/* In case a model with a different layout for the TCB and DTV
-# Copyright (C) 1998-2021 Free Software Foundation, Inc.
+# Copyright (C) 1998-2022 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
CPPFLAGS-tst-longjmp_chk2.c += -D_FORTIFY_SOURCE=1
CFLAGS-tst-longjmp_chk3.c += -fexceptions -fasynchronous-unwind-tables
CPPFLAGS-tst-longjmp_chk3.c += -D_FORTIFY_SOURCE=1
+CPPFLAGS-tst-realpath-chk.c += -D_FORTIFY_SOURCE=2
+
+# _FORTIFY_SOURCE tests.
+# Auto-generate tests for _FORTIFY_SOURCE for different levels, compilers and
+# preprocessor conditions based on tst-fortify.c.
+#
+# To add a new test condition, define a cflags-$(cond) make variable to set
+# CFLAGS for the file.
+
+tests-all-chk = tst-fortify
+tests-c-chk =
+tests-cc-chk =
+
+CFLAGS-tst-fortify.c += -Wno-format -Wno-deprecated-declarations -Wno-error
+
+# No additional flags for the default tests.
+define cflags-default
+endef
+
+define cflags-lfs
+CFLAGS-tst-fortify-$(1)-lfs-$(2).$(1) += -D_FILE_OFFSET_BITS=64
+endef
+
+define cflags-nongnu
+CFLAGS-tst-fortify-$(1)-nongnu-$(2).$(1) += -D_LARGEFILE64_SOURCE=1
+endef
+
+src-chk-nongnu = \#undef _GNU_SOURCE
# We know these tests have problems with format strings, this is what
# we are testing. Disable that warning. They are also testing
# deprecated functions (notably gets) so disable that warning as well.
# And they also generate warnings from warning attributes, which
# cannot be disabled via pragmas, so require -Wno-error to be used.
-CFLAGS-tst-chk1.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-chk2.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-chk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-chk4.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-chk5.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-chk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk1.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk2.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk3.c += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk4.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk5.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-CFLAGS-tst-lfschk6.cc += -Wno-format -Wno-deprecated-declarations -Wno-error
-LDLIBS-tst-chk4 = -lstdc++
-LDLIBS-tst-chk5 = -lstdc++
-LDLIBS-tst-chk6 = -lstdc++
-LDLIBS-tst-lfschk4 = -lstdc++
-LDLIBS-tst-lfschk5 = -lstdc++
-LDLIBS-tst-lfschk6 = -lstdc++
+define gen-chk-test
+tests-$(1)-chk += tst-fortify-$(1)-$(2)-$(3)
+CFLAGS-tst-fortify-$(1)-$(2)-$(3).$(1) += -D_FORTIFY_SOURCE=$(3) -Wno-format \
+ -Wno-deprecated-declarations \
+ -Wno-error
+$(eval $(call cflags-$(2),$(1),$(3)))
+$(objpfx)tst-fortify-$(1)-$(2)-$(3).$(1): tst-fortify.c Makefile
+ ( echo "/* Autogenerated from Makefile. */"; \
+ echo "$(src-chk-$(2))"; \
+ echo "#include \"tst-fortify.c\"" ) > $$@.tmp
+ mv $$@.tmp $$@
+endef
+
+chk-extensions = c cc
+chk-types = default lfs nongnu
+chk-levels = 1 2 3
+
+$(foreach e,$(chk-extensions), \
+ $(foreach t,$(chk-types), \
+ $(foreach l,$(chk-levels), \
+ $(eval $(call gen-chk-test,$(e),$(t),$(l))))))
+
+tests-all-chk += $(tests-c-chk) $(tests-cc-chk)
+
+define link-cc
+LDLIBS-$(1) = -lstdc++
+endef
+$(foreach t,$(tests-cc-chk), $(eval $(call link-cc,$(t))))
# backtrace_symbols only works if we link with -rdynamic. backtrace
# requires unwind tables on most architectures.
CFLAGS-tst-ssp-1.c += -fstack-protector-all
-tests = backtrace-tst tst-longjmp_chk tst-chk1 tst-chk2 tst-chk3 \
- tst-lfschk1 tst-lfschk2 tst-lfschk3 test-strcpy_chk test-stpcpy_chk \
- tst-chk4 tst-chk5 tst-chk6 tst-lfschk4 tst-lfschk5 tst-lfschk6 \
- tst-longjmp_chk2 tst-backtrace2 tst-backtrace3 tst-backtrace4 \
- tst-backtrace5 tst-backtrace6
+tests = backtrace-tst \
+ tst-longjmp_chk \
+ test-strcpy_chk \
+ test-stpcpy_chk \
+ tst-longjmp_chk2 \
+ tst-backtrace2 \
+ tst-backtrace3 \
+ tst-backtrace4 \
+ tst-backtrace5 \
+ tst-backtrace6 \
+ tst-realpath-chk \
+ $(tests-all-chk)
ifeq ($(have-ssp),yes)
tests += tst-ssp-1
endif
ifeq (,$(CXX))
-tests-unsupported = tst-chk4 tst-chk5 tst-chk6 \
- tst-lfschk4 tst-lfschk5 tst-lfschk6
+tests-unsupported = $(tests-cc-chk)
endif
extra-libs = libSegFault libpcprofile
LOCALES := de_DE.UTF-8
include ../gen-locales.mk
-$(objpfx)tst-chk1.out: $(gen-locales)
-$(objpfx)tst-chk2.out: $(gen-locales)
-$(objpfx)tst-chk3.out: $(gen-locales)
-$(objpfx)tst-chk4.out: $(gen-locales)
-$(objpfx)tst-chk5.out: $(gen-locales)
-$(objpfx)tst-chk6.out: $(gen-locales)
-$(objpfx)tst-lfschk1.out: $(gen-locales)
-$(objpfx)tst-lfschk2.out: $(gen-locales)
-$(objpfx)tst-lfschk3.out: $(gen-locales)
-$(objpfx)tst-lfschk4.out: $(gen-locales)
-$(objpfx)tst-lfschk5.out: $(gen-locales)
-$(objpfx)tst-lfschk6.out: $(gen-locales)
+define chk-gen-locales
+$(objpfx)$(1).out: $(gen-locales)
+endef
+$(foreach t, $(tests-all-chk), $(eval $(call chk-gen-locales,$(t))))
endif
sLIBdir := $(shell echo $(slibdir) | sed 's,lib\(\|64\)$$,\\\\$$LIB,')
+++ /dev/null
-/* Copyright (C) 2004-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Jakub Jelinek <jakub@redhat.com>, 2004.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-/* This file tests gets. Force it to be declared. */
-#include <features.h>
-#undef __GLIBC_USE_DEPRECATED_GETS
-#define __GLIBC_USE_DEPRECATED_GETS 1
-
-#include <assert.h>
-#include <fcntl.h>
-#include <locale.h>
-#include <obstack.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <wchar.h>
-#include <sys/poll.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-#include <sys/un.h>
-
-
-#define obstack_chunk_alloc malloc
-#define obstack_chunk_free free
-
-char *temp_filename;
-static void do_prepare (void);
-static int do_test (void);
-#define PREPARE(argc, argv) do_prepare ()
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
-
-static void
-do_prepare (void)
-{
- int temp_fd = create_temp_file ("tst-chk1.", &temp_filename);
- if (temp_fd == -1)
- {
- printf ("cannot create temporary file: %m\n");
- exit (1);
- }
-
- const char *strs = "abcdefgh\nABCDEFGHI\nabcdefghij\nABCDEFGHIJ";
- if ((size_t) write (temp_fd, strs, strlen (strs)) != strlen (strs))
- {
- puts ("could not write test strings into file");
- unlink (temp_filename);
- exit (1);
- }
-}
-
-volatile int chk_fail_ok;
-volatile int ret;
-jmp_buf chk_fail_buf;
-
-static void
-handler (int sig)
-{
- if (chk_fail_ok)
- {
- chk_fail_ok = 0;
- longjmp (chk_fail_buf, 1);
- }
- else
- _exit (127);
-}
-
-char buf[10];
-wchar_t wbuf[10];
-volatile size_t l0;
-volatile char *p;
-volatile wchar_t *wp;
-const char *str1 = "JIHGFEDCBA";
-const char *str2 = "F";
-const char *str3 = "%s%n%s%n";
-const char *str4 = "Hello, ";
-const char *str5 = "World!\n";
-const wchar_t *wstr1 = L"JIHGFEDCBA";
-const wchar_t *wstr2 = L"F";
-const wchar_t *wstr3 = L"%s%n%s%n";
-const wchar_t *wstr4 = L"Hello, ";
-const wchar_t *wstr5 = L"World!\n";
-char buf2[10] = "%s";
-int num1 = 67;
-int num2 = 987654;
-
-#define FAIL() \
- do { printf ("Failure on line %d\n", __LINE__); ret = 1; } while (0)
-#define CHK_FAIL_START \
- chk_fail_ok = 1; \
- if (! setjmp (chk_fail_buf)) \
- {
-#define CHK_FAIL_END \
- chk_fail_ok = 0; \
- FAIL (); \
- }
-#if __USE_FORTIFY_LEVEL >= 2 && (!defined __cplusplus || defined __va_arg_pack)
-# define CHK_FAIL2_START CHK_FAIL_START
-# define CHK_FAIL2_END CHK_FAIL_END
-#else
-# define CHK_FAIL2_START
-# define CHK_FAIL2_END
-#endif
-
-static int
-do_test (void)
-{
- set_fortify_handler (handler);
-
- struct A { char buf1[9]; char buf2[1]; } a;
- struct wA { wchar_t buf1[9]; wchar_t buf2[1]; } wa;
-
- printf ("Test checking routines at fortify level %d\n",
-#ifdef __USE_FORTIFY_LEVEL
- (int) __USE_FORTIFY_LEVEL
-#else
- 0
-#endif
- );
-
-#if defined __USE_FORTIFY_LEVEL && !defined __fortify_function
- printf ("Test skipped");
- if (l0 == 0)
- return 0;
-#endif
-
- /* These ops can be done without runtime checking of object size. */
- memcpy (buf, "abcdefghij", 10);
- memmove (buf + 1, buf, 9);
- if (memcmp (buf, "aabcdefghi", 10))
- FAIL ();
-
- memcpy (buf, "abcdefghij", 10);
- bcopy (buf, buf + 1, 9);
- if (memcmp (buf, "aabcdefghi", 10))
- FAIL ();
-
- if (mempcpy (buf + 5, "abcde", 5) != buf + 10
- || memcmp (buf, "aabcdabcde", 10))
- FAIL ();
-
- memset (buf + 8, 'j', 2);
- if (memcmp (buf, "aabcdabcjj", 10))
- FAIL ();
-
- bzero (buf + 8, 2);
- if (memcmp (buf, "aabcdabc\0\0", 10))
- FAIL ();
-
- explicit_bzero (buf + 6, 4);
- if (memcmp (buf, "aabcda\0\0\0\0", 10))
- FAIL ();
-
- strcpy (buf + 4, "EDCBA");
- if (memcmp (buf, "aabcEDCBA", 10))
- FAIL ();
-
- if (stpcpy (buf + 8, "F") != buf + 9 || memcmp (buf, "aabcEDCBF", 10))
- FAIL ();
-
- strncpy (buf + 6, "X", 4);
- if (memcmp (buf, "aabcEDX\0\0", 10))
- FAIL ();
-
- if (sprintf (buf + 7, "%s", "67") != 2 || memcmp (buf, "aabcEDX67", 10))
- FAIL ();
-
- if (snprintf (buf + 7, 3, "%s", "987654") != 6
- || memcmp (buf, "aabcEDX98", 10))
- FAIL ();
-
- /* These ops need runtime checking, but shouldn't __chk_fail. */
- memcpy (buf, "abcdefghij", l0 + 10);
- memmove (buf + 1, buf, l0 + 9);
- if (memcmp (buf, "aabcdefghi", 10))
- FAIL ();
-
- memcpy (buf, "abcdefghij", l0 + 10);
- bcopy (buf, buf + 1, l0 + 9);
- if (memcmp (buf, "aabcdefghi", 10))
- FAIL ();
-
- if (mempcpy (buf + 5, "abcde", l0 + 5) != buf + 10
- || memcmp (buf, "aabcdabcde", 10))
- FAIL ();
-
- memset (buf + 8, 'j', l0 + 2);
- if (memcmp (buf, "aabcdabcjj", 10))
- FAIL ();
-
- bzero (buf + 8, l0 + 2);
- if (memcmp (buf, "aabcdabc\0\0", 10))
- FAIL ();
-
- explicit_bzero (buf + 6, l0 + 4);
- if (memcmp (buf, "aabcda\0\0\0\0", 10))
- FAIL ();
-
- strcpy (buf + 4, str1 + 5);
- if (memcmp (buf, "aabcEDCBA", 10))
- FAIL ();
-
- if (stpcpy (buf + 8, str2) != buf + 9 || memcmp (buf, "aabcEDCBF", 10))
- FAIL ();
-
- strncpy (buf + 6, "X", l0 + 4);
- if (memcmp (buf, "aabcEDX\0\0", 10))
- FAIL ();
-
- if (stpncpy (buf + 5, "cd", l0 + 5) != buf + 7
- || memcmp (buf, "aabcEcd\0\0", 10))
- FAIL ();
-
- if (sprintf (buf + 7, "%d", num1) != 2 || memcmp (buf, "aabcEcd67", 10))
- FAIL ();
-
- if (snprintf (buf + 7, 3, "%d", num2) != 6 || memcmp (buf, "aabcEcd98", 10))
- FAIL ();
-
- buf[l0 + 8] = '\0';
- strcat (buf, "A");
- if (memcmp (buf, "aabcEcd9A", 10))
- FAIL ();
-
- buf[l0 + 7] = '\0';
- strncat (buf, "ZYXWV", l0 + 2);
- if (memcmp (buf, "aabcEcdZY", 10))
- FAIL ();
-
- /* The following tests are supposed to succeed at all fortify
- levels, even though they overflow a.buf1 into a.buf2. */
- memcpy (a.buf1, "abcdefghij", l0 + 10);
- memmove (a.buf1 + 1, a.buf1, l0 + 9);
- if (memcmp (a.buf1, "aabcdefghi", 10))
- FAIL ();
-
- memcpy (a.buf1, "abcdefghij", l0 + 10);
- bcopy (a.buf1, a.buf1 + 1, l0 + 9);
- if (memcmp (a.buf1, "aabcdefghi", 10))
- FAIL ();
-
- if (mempcpy (a.buf1 + 5, "abcde", l0 + 5) != a.buf1 + 10
- || memcmp (a.buf1, "aabcdabcde", 10))
- FAIL ();
-
- memset (a.buf1 + 8, 'j', l0 + 2);
- if (memcmp (a.buf1, "aabcdabcjj", 10))
- FAIL ();
-
- bzero (a.buf1 + 8, l0 + 2);
- if (memcmp (a.buf1, "aabcdabc\0\0", 10))
- FAIL ();
-
- explicit_bzero (a.buf1 + 6, l0 + 4);
- if (memcmp (a.buf1, "aabcda\0\0\0\0", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL < 2
- /* The following tests are supposed to crash with -D_FORTIFY_SOURCE=2
- and sufficient GCC support, as the string operations overflow
- from a.buf1 into a.buf2. */
- strcpy (a.buf1 + 4, str1 + 5);
- if (memcmp (a.buf1, "aabcEDCBA", 10))
- FAIL ();
-
- if (stpcpy (a.buf1 + 8, str2) != a.buf1 + 9
- || memcmp (a.buf1, "aabcEDCBF", 10))
- FAIL ();
-
- strncpy (a.buf1 + 6, "X", l0 + 4);
- if (memcmp (a.buf1, "aabcEDX\0\0", 10))
- FAIL ();
-
- if (sprintf (a.buf1 + 7, "%d", num1) != 2
- || memcmp (a.buf1, "aabcEDX67", 10))
- FAIL ();
-
- if (snprintf (a.buf1 + 7, 3, "%d", num2) != 6
- || memcmp (a.buf1, "aabcEDX98", 10))
- FAIL ();
-
- a.buf1[l0 + 8] = '\0';
- strcat (a.buf1, "A");
- if (memcmp (a.buf1, "aabcEDX9A", 10))
- FAIL ();
-
- a.buf1[l0 + 7] = '\0';
- strncat (a.buf1, "ZYXWV", l0 + 2);
- if (memcmp (a.buf1, "aabcEDXZY", 10))
- FAIL ();
-
-#endif
-
-#if __USE_FORTIFY_LEVEL >= 1
- /* Now check if all buffer overflows are caught at runtime.
- N.B. All tests involving a length parameter need to be done
- twice: once with the length a compile-time constant, once without. */
-
- CHK_FAIL_START
- memcpy (buf + 1, "abcdefghij", 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memcpy (buf + 1, "abcdefghij", l0 + 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memmove (buf + 2, buf + 1, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memmove (buf + 2, buf + 1, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bcopy (buf + 1, buf + 2, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bcopy (buf + 1, buf + 2, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = (char *) mempcpy (buf + 6, "abcde", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = (char *) mempcpy (buf + 6, "abcde", l0 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memset (buf + 9, 'j', 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memset (buf + 9, 'j', l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bzero (buf + 9, 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bzero (buf + 9, l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- explicit_bzero (buf + 9, 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- explicit_bzero (buf + 9, l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- strcpy (buf + 5, str1 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = stpcpy (buf + 9, str2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- strncpy (buf + 7, "X", 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- strncpy (buf + 7, "X", l0 + 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- stpncpy (buf + 6, "cd", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- stpncpy (buf + 6, "cd", l0 + 5);
- CHK_FAIL_END
-
-# if !defined __cplusplus || defined __va_arg_pack
- CHK_FAIL_START
- sprintf (buf + 8, "%d", num1);
- CHK_FAIL_END
-
- CHK_FAIL_START
- snprintf (buf + 8, 3, "%d", num2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- snprintf (buf + 8, l0 + 3, "%d", num2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- swprintf (wbuf + 8, 3, L"%d", num1);
- CHK_FAIL_END
-
- CHK_FAIL_START
- swprintf (wbuf + 8, l0 + 3, L"%d", num1);
- CHK_FAIL_END
-# endif
-
- memcpy (buf, str1 + 2, 9);
- CHK_FAIL_START
- strcat (buf, "AB");
- CHK_FAIL_END
-
- memcpy (buf, str1 + 3, 8);
- CHK_FAIL_START
- strncat (buf, "ZYXWV", 3);
- CHK_FAIL_END
-
- memcpy (buf, str1 + 3, 8);
- CHK_FAIL_START
- strncat (buf, "ZYXWV", l0 + 3);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memcpy (a.buf1 + 1, "abcdefghij", 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memcpy (a.buf1 + 1, "abcdefghij", l0 + 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memmove (a.buf1 + 2, a.buf1 + 1, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memmove (a.buf1 + 2, a.buf1 + 1, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bcopy (a.buf1 + 1, a.buf1 + 2, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bcopy (a.buf1 + 1, a.buf1 + 2, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = (char *) mempcpy (a.buf1 + 6, "abcde", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = (char *) mempcpy (a.buf1 + 6, "abcde", l0 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memset (a.buf1 + 9, 'j', 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- memset (a.buf1 + 9, 'j', l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bzero (a.buf1 + 9, 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- bzero (a.buf1 + 9, l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- explicit_bzero (a.buf1 + 9, 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- explicit_bzero (a.buf1 + 9, l0 + 2);
- CHK_FAIL_END
-
-# if __USE_FORTIFY_LEVEL >= 2
-# define O 0
-# else
-# define O 1
-# endif
-
- CHK_FAIL_START
- strcpy (a.buf1 + (O + 4), str1 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- p = stpcpy (a.buf1 + (O + 8), str2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- strncpy (a.buf1 + (O + 6), "X", 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- strncpy (a.buf1 + (O + 6), "X", l0 + 4);
- CHK_FAIL_END
-
-# if !defined __cplusplus || defined __va_arg_pack
- CHK_FAIL_START
- sprintf (a.buf1 + (O + 7), "%d", num1);
- CHK_FAIL_END
-
- CHK_FAIL_START
- snprintf (a.buf1 + (O + 7), 3, "%d", num2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- snprintf (a.buf1 + (O + 7), l0 + 3, "%d", num2);
- CHK_FAIL_END
-# endif
-
- memcpy (a.buf1, str1 + (3 - O), 8 + O);
- CHK_FAIL_START
- strcat (a.buf1, "AB");
- CHK_FAIL_END
-
- memcpy (a.buf1, str1 + (4 - O), 7 + O);
- CHK_FAIL_START
- strncat (a.buf1, "ZYXWV", l0 + 3);
- CHK_FAIL_END
-#endif
-
-
- /* These ops can be done without runtime checking of object size. */
- wmemcpy (wbuf, L"abcdefghij", 10);
- wmemmove (wbuf + 1, wbuf, 9);
- if (wmemcmp (wbuf, L"aabcdefghi", 10))
- FAIL ();
-
- if (wmempcpy (wbuf + 5, L"abcde", 5) != wbuf + 10
- || wmemcmp (wbuf, L"aabcdabcde", 10))
- FAIL ();
-
- wmemset (wbuf + 8, L'j', 2);
- if (wmemcmp (wbuf, L"aabcdabcjj", 10))
- FAIL ();
-
- wcscpy (wbuf + 4, L"EDCBA");
- if (wmemcmp (wbuf, L"aabcEDCBA", 10))
- FAIL ();
-
- if (wcpcpy (wbuf + 8, L"F") != wbuf + 9 || wmemcmp (wbuf, L"aabcEDCBF", 10))
- FAIL ();
-
- wcsncpy (wbuf + 6, L"X", 4);
- if (wmemcmp (wbuf, L"aabcEDX\0\0", 10))
- FAIL ();
-
- if (swprintf (wbuf + 7, 3, L"%ls", L"987654") >= 0
- || wmemcmp (wbuf, L"aabcEDX98", 10))
- FAIL ();
-
- if (swprintf (wbuf + 7, 3, L"64") != 2
- || wmemcmp (wbuf, L"aabcEDX64", 10))
- FAIL ();
-
- /* These ops need runtime checking, but shouldn't __chk_fail. */
- wmemcpy (wbuf, L"abcdefghij", l0 + 10);
- wmemmove (wbuf + 1, wbuf, l0 + 9);
- if (wmemcmp (wbuf, L"aabcdefghi", 10))
- FAIL ();
-
- if (wmempcpy (wbuf + 5, L"abcde", l0 + 5) != wbuf + 10
- || wmemcmp (wbuf, L"aabcdabcde", 10))
- FAIL ();
-
- wmemset (wbuf + 8, L'j', l0 + 2);
- if (wmemcmp (wbuf, L"aabcdabcjj", 10))
- FAIL ();
-
- wcscpy (wbuf + 4, wstr1 + 5);
- if (wmemcmp (wbuf, L"aabcEDCBA", 10))
- FAIL ();
-
- if (wcpcpy (wbuf + 8, wstr2) != wbuf + 9 || wmemcmp (wbuf, L"aabcEDCBF", 10))
- FAIL ();
-
- wcsncpy (wbuf + 6, L"X", l0 + 4);
- if (wmemcmp (wbuf, L"aabcEDX\0\0", 10))
- FAIL ();
-
- if (wcpncpy (wbuf + 5, L"cd", l0 + 5) != wbuf + 7
- || wmemcmp (wbuf, L"aabcEcd\0\0", 10))
- FAIL ();
-
- if (swprintf (wbuf + 7, 3, L"%d", num2) >= 0
- || wmemcmp (wbuf, L"aabcEcd98", 10))
- FAIL ();
-
- wbuf[l0 + 8] = L'\0';
- wcscat (wbuf, L"A");
- if (wmemcmp (wbuf, L"aabcEcd9A", 10))
- FAIL ();
-
- wbuf[l0 + 7] = L'\0';
- wcsncat (wbuf, L"ZYXWV", l0 + 2);
- if (wmemcmp (wbuf, L"aabcEcdZY", 10))
- FAIL ();
-
- wmemcpy (wa.buf1, L"abcdefghij", l0 + 10);
- wmemmove (wa.buf1 + 1, wa.buf1, l0 + 9);
- if (wmemcmp (wa.buf1, L"aabcdefghi", 10))
- FAIL ();
-
- if (wmempcpy (wa.buf1 + 5, L"abcde", l0 + 5) != wa.buf1 + 10
- || wmemcmp (wa.buf1, L"aabcdabcde", 10))
- FAIL ();
-
- wmemset (wa.buf1 + 8, L'j', l0 + 2);
- if (wmemcmp (wa.buf1, L"aabcdabcjj", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL < 2
- /* The following tests are supposed to crash with -D_FORTIFY_SOURCE=2
- and sufficient GCC support, as the string operations overflow
- from a.buf1 into a.buf2. */
- wcscpy (wa.buf1 + 4, wstr1 + 5);
- if (wmemcmp (wa.buf1, L"aabcEDCBA", 10))
- FAIL ();
-
- if (wcpcpy (wa.buf1 + 8, wstr2) != wa.buf1 + 9
- || wmemcmp (wa.buf1, L"aabcEDCBF", 10))
- FAIL ();
-
- wcsncpy (wa.buf1 + 6, L"X", l0 + 4);
- if (wmemcmp (wa.buf1, L"aabcEDX\0\0", 10))
- FAIL ();
-
- if (swprintf (wa.buf1 + 7, 3, L"%d", num2) >= 0
- || wmemcmp (wa.buf1, L"aabcEDX98", 10))
- FAIL ();
-
- wa.buf1[l0 + 8] = L'\0';
- wcscat (wa.buf1, L"A");
- if (wmemcmp (wa.buf1, L"aabcEDX9A", 10))
- FAIL ();
-
- wa.buf1[l0 + 7] = L'\0';
- wcsncat (wa.buf1, L"ZYXWV", l0 + 2);
- if (wmemcmp (wa.buf1, L"aabcEDXZY", 10))
- FAIL ();
-
-#endif
-
-#if __USE_FORTIFY_LEVEL >= 1
- /* Now check if all buffer overflows are caught at runtime.
- N.B. All tests involving a length parameter need to be done
- twice: once with the length a compile-time constant, once without. */
-
- CHK_FAIL_START
- wmemcpy (wbuf + 1, L"abcdefghij", 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemcpy (wbuf + 1, L"abcdefghij", l0 + 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemcpy (wbuf + 9, L"abcdefghij", 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemcpy (wbuf + 9, L"abcdefghij", l0 + 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemmove (wbuf + 2, wbuf + 1, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemmove (wbuf + 2, wbuf + 1, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wmempcpy (wbuf + 6, L"abcde", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wmempcpy (wbuf + 6, L"abcde", l0 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemset (wbuf + 9, L'j', 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemset (wbuf + 9, L'j', l0 + 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcscpy (wbuf + 5, wstr1 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wcpcpy (wbuf + 9, wstr2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcsncpy (wbuf + 7, L"X", 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcsncpy (wbuf + 7, L"X", l0 + 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcsncpy (wbuf + 9, L"XABCDEFGH", 8);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcpncpy (wbuf + 9, L"XABCDEFGH", 8);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcpncpy (wbuf + 6, L"cd", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcpncpy (wbuf + 6, L"cd", l0 + 5);
- CHK_FAIL_END
-
- wmemcpy (wbuf, wstr1 + 2, 9);
- CHK_FAIL_START
- wcscat (wbuf, L"AB");
- CHK_FAIL_END
-
- wmemcpy (wbuf, wstr1 + 3, 8);
- CHK_FAIL_START
- wcsncat (wbuf, L"ZYXWV", l0 + 3);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemcpy (wa.buf1 + 1, L"abcdefghij", 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemcpy (wa.buf1 + 1, L"abcdefghij", l0 + 10);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemmove (wa.buf1 + 2, wa.buf1 + 1, 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemmove (wa.buf1 + 2, wa.buf1 + 1, l0 + 9);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wmempcpy (wa.buf1 + 6, L"abcde", 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wmempcpy (wa.buf1 + 6, L"abcde", l0 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemset (wa.buf1 + 9, L'j', 2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wmemset (wa.buf1 + 9, L'j', l0 + 2);
- CHK_FAIL_END
-
-#if __USE_FORTIFY_LEVEL >= 2
-# define O 0
-#else
-# define O 1
-#endif
-
- CHK_FAIL_START
- wcscpy (wa.buf1 + (O + 4), wstr1 + 5);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wp = wcpcpy (wa.buf1 + (O + 8), wstr2);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcsncpy (wa.buf1 + (O + 6), L"X", 4);
- CHK_FAIL_END
-
- CHK_FAIL_START
- wcsncpy (wa.buf1 + (O + 6), L"X", l0 + 4);
- CHK_FAIL_END
-
- wmemcpy (wa.buf1, wstr1 + (3 - O), 8 + O);
- CHK_FAIL_START
- wcscat (wa.buf1, L"AB");
- CHK_FAIL_END
-
- wmemcpy (wa.buf1, wstr1 + (4 - O), 7 + O);
- CHK_FAIL_START
- wcsncat (wa.buf1, L"ZYXWV", l0 + 3);
- CHK_FAIL_END
-#endif
-
-
- /* Now checks for %n protection. */
-
- /* Constant literals passed directly are always ok
- (even with warnings about possible bugs from GCC). */
- int n1, n2;
- if (sprintf (buf, "%s%n%s%n", str2, &n1, str2, &n2) != 2
- || n1 != 1 || n2 != 2)
- FAIL ();
-
- /* In this case the format string is not known at compile time,
- but resides in read-only memory, so is ok. */
- if (snprintf (buf, 4, str3, str2, &n1, str2, &n2) != 2
- || n1 != 1 || n2 != 2)
- FAIL ();
-
- strcpy (buf2 + 2, "%n%s%n");
- /* When the format string is writable and contains %n,
- with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
- CHK_FAIL2_START
- if (sprintf (buf, buf2, str2, &n1, str2, &n1) != 2)
- FAIL ();
- CHK_FAIL2_END
-
- CHK_FAIL2_START
- if (snprintf (buf, 3, buf2, str2, &n1, str2, &n1) != 2)
- FAIL ();
- CHK_FAIL2_END
-
- /* But if there is no %n, even writable format string
- should work. */
- buf2[6] = '\0';
- if (sprintf (buf, buf2 + 4, str2) != 1)
- FAIL ();
-
- /* Constant literals passed directly are always ok
- (even with warnings about possible bugs from GCC). */
- if (printf ("%s%n%s%n", str4, &n1, str5, &n2) != 14
- || n1 != 7 || n2 != 14)
- FAIL ();
-
- /* In this case the format string is not known at compile time,
- but resides in read-only memory, so is ok. */
- if (printf (str3, str4, &n1, str5, &n2) != 14
- || n1 != 7 || n2 != 14)
- FAIL ();
-
- strcpy (buf2 + 2, "%n%s%n");
- /* When the format string is writable and contains %n,
- with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
- CHK_FAIL2_START
- if (printf (buf2, str4, &n1, str5, &n1) != 14)
- FAIL ();
- CHK_FAIL2_END
-
- /* But if there is no %n, even writable format string
- should work. */
- buf2[6] = '\0';
- if (printf (buf2 + 4, str5) != 7)
- FAIL ();
-
- FILE *fp = stdout;
-
- /* Constant literals passed directly are always ok
- (even with warnings about possible bugs from GCC). */
- if (fprintf (fp, "%s%n%s%n", str4, &n1, str5, &n2) != 14
- || n1 != 7 || n2 != 14)
- FAIL ();
-
- /* In this case the format string is not known at compile time,
- but resides in read-only memory, so is ok. */
- if (fprintf (fp, str3, str4, &n1, str5, &n2) != 14
- || n1 != 7 || n2 != 14)
- FAIL ();
-
- strcpy (buf2 + 2, "%n%s%n");
- /* When the format string is writable and contains %n,
- with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
- CHK_FAIL2_START
- if (fprintf (fp, buf2, str4, &n1, str5, &n1) != 14)
- FAIL ();
- CHK_FAIL2_END
-
- /* But if there is no %n, even writable format string
- should work. */
- buf2[6] = '\0';
- if (fprintf (fp, buf2 + 4, str5) != 7)
- FAIL ();
-
- char *my_ptr = NULL;
- strcpy (buf2 + 2, "%n%s%n");
- /* When the format string is writable and contains %n,
- with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
- CHK_FAIL2_START
- if (asprintf (&my_ptr, buf2, str4, &n1, str5, &n1) != 14)
- FAIL ();
- else
- free (my_ptr);
- CHK_FAIL2_END
-
- struct obstack obs;
- obstack_init (&obs);
- CHK_FAIL2_START
- if (obstack_printf (&obs, buf2, str4, &n1, str5, &n1) != 14)
- FAIL ();
- CHK_FAIL2_END
- obstack_free (&obs, NULL);
-
- my_ptr = NULL;
- if (asprintf (&my_ptr, "%s%n%s%n", str4, &n1, str5, &n1) != 14)
- FAIL ();
- else
- free (my_ptr);
-
- obstack_init (&obs);
- if (obstack_printf (&obs, "%s%n%s%n", str4, &n1, str5, &n1) != 14)
- FAIL ();
- obstack_free (&obs, NULL);
-
- if (freopen (temp_filename, "r", stdin) == NULL)
- {
- puts ("could not open temporary file");
- exit (1);
- }
-
- if (gets (buf) != buf || memcmp (buf, "abcdefgh", 9))
- FAIL ();
- if (gets (buf) != buf || memcmp (buf, "ABCDEFGHI", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (gets (buf) != buf)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- rewind (stdin);
-
- if (fgets (buf, sizeof (buf), stdin) != buf
- || memcmp (buf, "abcdefgh\n", 10))
- FAIL ();
- if (fgets (buf, sizeof (buf), stdin) != buf || memcmp (buf, "ABCDEFGHI", 10))
- FAIL ();
-
- rewind (stdin);
-
- if (fgets (buf, l0 + sizeof (buf), stdin) != buf
- || memcmp (buf, "abcdefgh\n", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (fgets (buf, sizeof (buf) + 1, stdin) != buf)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (fgets (buf, l0 + sizeof (buf) + 1, stdin) != buf)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- rewind (stdin);
-
- if (fgets_unlocked (buf, sizeof (buf), stdin) != buf
- || memcmp (buf, "abcdefgh\n", 10))
- FAIL ();
- if (fgets_unlocked (buf, sizeof (buf), stdin) != buf
- || memcmp (buf, "ABCDEFGHI", 10))
- FAIL ();
-
- rewind (stdin);
-
- if (fgets_unlocked (buf, l0 + sizeof (buf), stdin) != buf
- || memcmp (buf, "abcdefgh\n", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (fgets_unlocked (buf, sizeof (buf) + 1, stdin) != buf)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (fgets_unlocked (buf, l0 + sizeof (buf) + 1, stdin) != buf)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- rewind (stdin);
-
- if (fread (buf, 1, sizeof (buf), stdin) != sizeof (buf)
- || memcmp (buf, "abcdefgh\nA", 10))
- FAIL ();
- if (fread (buf, sizeof (buf), 1, stdin) != 1
- || memcmp (buf, "BCDEFGHI\na", 10))
- FAIL ();
-
- rewind (stdin);
-
- if (fread (buf, l0 + 1, sizeof (buf), stdin) != sizeof (buf)
- || memcmp (buf, "abcdefgh\nA", 10))
- FAIL ();
- if (fread (buf, sizeof (buf), l0 + 1, stdin) != 1
- || memcmp (buf, "BCDEFGHI\na", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (fread (buf, 1, sizeof (buf) + 1, stdin) != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (fread (buf, sizeof (buf) + 1, l0 + 1, stdin) != 1)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- rewind (stdin);
-
- if (fread_unlocked (buf, 1, sizeof (buf), stdin) != sizeof (buf)
- || memcmp (buf, "abcdefgh\nA", 10))
- FAIL ();
- if (fread_unlocked (buf, sizeof (buf), 1, stdin) != 1
- || memcmp (buf, "BCDEFGHI\na", 10))
- FAIL ();
-
- rewind (stdin);
-
- if (fread_unlocked (buf, 1, 4, stdin) != 4
- || memcmp (buf, "abcdFGHI\na", 10))
- FAIL ();
- if (fread_unlocked (buf, 4, 1, stdin) != 1
- || memcmp (buf, "efghFGHI\na", 10))
- FAIL ();
-
- rewind (stdin);
-
- if (fread_unlocked (buf, l0 + 1, sizeof (buf), stdin) != sizeof (buf)
- || memcmp (buf, "abcdefgh\nA", 10))
- FAIL ();
- if (fread_unlocked (buf, sizeof (buf), l0 + 1, stdin) != 1
- || memcmp (buf, "BCDEFGHI\na", 10))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (fread_unlocked (buf, 1, sizeof (buf) + 1, stdin) != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (fread_unlocked (buf, sizeof (buf) + 1, l0 + 1, stdin) != 1)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- lseek (fileno (stdin), 0, SEEK_SET);
-
- if (read (fileno (stdin), buf, sizeof (buf) - 1) != sizeof (buf) - 1
- || memcmp (buf, "abcdefgh\n", 9))
- FAIL ();
- if (read (fileno (stdin), buf, sizeof (buf) - 1) != sizeof (buf) - 1
- || memcmp (buf, "ABCDEFGHI", 9))
- FAIL ();
-
- lseek (fileno (stdin), 0, SEEK_SET);
-
- if (read (fileno (stdin), buf, l0 + sizeof (buf) - 1) != sizeof (buf) - 1
- || memcmp (buf, "abcdefgh\n", 9))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (read (fileno (stdin), buf, sizeof (buf) + 1) != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (read (fileno (stdin), buf, l0 + sizeof (buf) + 1) != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- if (pread (fileno (stdin), buf, sizeof (buf) - 1, sizeof (buf) - 2)
- != sizeof (buf) - 1
- || memcmp (buf, "\nABCDEFGH", 9))
- FAIL ();
- if (pread (fileno (stdin), buf, sizeof (buf) - 1, 0) != sizeof (buf) - 1
- || memcmp (buf, "abcdefgh\n", 9))
- FAIL ();
- if (pread (fileno (stdin), buf, l0 + sizeof (buf) - 1, sizeof (buf) - 3)
- != sizeof (buf) - 1
- || memcmp (buf, "h\nABCDEFG", 9))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (pread (fileno (stdin), buf, sizeof (buf) + 1, 2 * sizeof (buf))
- != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (pread (fileno (stdin), buf, l0 + sizeof (buf) + 1, 2 * sizeof (buf))
- != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- if (pread64 (fileno (stdin), buf, sizeof (buf) - 1, sizeof (buf) - 2)
- != sizeof (buf) - 1
- || memcmp (buf, "\nABCDEFGH", 9))
- FAIL ();
- if (pread64 (fileno (stdin), buf, sizeof (buf) - 1, 0) != sizeof (buf) - 1
- || memcmp (buf, "abcdefgh\n", 9))
- FAIL ();
- if (pread64 (fileno (stdin), buf, l0 + sizeof (buf) - 1, sizeof (buf) - 3)
- != sizeof (buf) - 1
- || memcmp (buf, "h\nABCDEFG", 9))
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (pread64 (fileno (stdin), buf, sizeof (buf) + 1, 2 * sizeof (buf))
- != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (pread64 (fileno (stdin), buf, l0 + sizeof (buf) + 1, 2 * sizeof (buf))
- != sizeof (buf) + 1)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- if (freopen (temp_filename, "r", stdin) == NULL)
- {
- puts ("could not open temporary file");
- exit (1);
- }
-
- if (fseek (stdin, 9 + 10 + 11, SEEK_SET))
- {
- puts ("could not seek in test file");
- exit (1);
- }
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (gets (buf) != buf)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- /* Check whether missing N$ formats are detected. */
- CHK_FAIL2_START
- printf ("%3$d\n", 1, 2, 3, 4);
- CHK_FAIL2_END
-
- CHK_FAIL2_START
- fprintf (stdout, "%3$d\n", 1, 2, 3, 4);
- CHK_FAIL2_END
-
- CHK_FAIL2_START
- sprintf (buf, "%3$d\n", 1, 2, 3, 4);
- CHK_FAIL2_END
-
- CHK_FAIL2_START
- snprintf (buf, sizeof (buf), "%3$d\n", 1, 2, 3, 4);
- CHK_FAIL2_END
-
- int sp[2];
- if (socketpair (PF_UNIX, SOCK_STREAM, 0, sp))
- FAIL ();
- else
- {
- const char *sendstr = "abcdefgh\nABCDEFGH\n0123456789\n";
- if ((size_t) send (sp[0], sendstr, strlen (sendstr), 0)
- != strlen (sendstr))
- FAIL ();
-
- char recvbuf[12];
- if (recv (sp[1], recvbuf, sizeof recvbuf, MSG_PEEK)
- != sizeof recvbuf
- || memcmp (recvbuf, sendstr, sizeof recvbuf) != 0)
- FAIL ();
-
- if (recv (sp[1], recvbuf + 6, l0 + sizeof recvbuf - 7, MSG_PEEK)
- != sizeof recvbuf - 7
- || memcmp (recvbuf + 6, sendstr, sizeof recvbuf - 7) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (recv (sp[1], recvbuf + 1, sizeof recvbuf, MSG_PEEK)
- != sizeof recvbuf)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (recv (sp[1], recvbuf + 4, l0 + sizeof recvbuf - 3, MSG_PEEK)
- != sizeof recvbuf - 3)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- socklen_t sl;
- struct sockaddr_un sa_un;
-
- sl = sizeof (sa_un);
- if (recvfrom (sp[1], recvbuf, sizeof recvbuf, MSG_PEEK,
- (struct sockaddr *) &sa_un, &sl)
- != sizeof recvbuf
- || memcmp (recvbuf, sendstr, sizeof recvbuf) != 0)
- FAIL ();
-
- sl = sizeof (sa_un);
- if (recvfrom (sp[1], recvbuf + 6, l0 + sizeof recvbuf - 7, MSG_PEEK,
- (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf - 7
- || memcmp (recvbuf + 6, sendstr, sizeof recvbuf - 7) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- sl = sizeof (sa_un);
- if (recvfrom (sp[1], recvbuf + 1, sizeof recvbuf, MSG_PEEK,
- (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- sl = sizeof (sa_un);
- if (recvfrom (sp[1], recvbuf + 4, l0 + sizeof recvbuf - 3, MSG_PEEK,
- (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf - 3)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- close (sp[0]);
- close (sp[1]);
- }
-
- char fname[] = "/tmp/tst-chk1-dir-XXXXXX\0foo";
- char *enddir = strchr (fname, '\0');
- if (mkdtemp (fname) == NULL)
- {
- printf ("mkdtemp failed: %m\n");
- return 1;
- }
- *enddir = '/';
- if (symlink ("bar", fname) != 0)
- FAIL ();
-
- char readlinkbuf[4];
- if (readlink (fname, readlinkbuf, 4) != 3
- || memcmp (readlinkbuf, "bar", 3) != 0)
- FAIL ();
- if (readlink (fname, readlinkbuf + 1, l0 + 3) != 3
- || memcmp (readlinkbuf, "bbar", 4) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (readlink (fname, readlinkbuf + 2, l0 + 3) != 3)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (readlink (fname, readlinkbuf + 3, 4) != 3)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- int tmpfd = open ("/tmp", O_RDONLY | O_DIRECTORY);
- if (tmpfd < 0)
- FAIL ();
-
- if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf, 4) != 3
- || memcmp (readlinkbuf, "bar", 3) != 0)
- FAIL ();
- if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 1,
- l0 + 3) != 3
- || memcmp (readlinkbuf, "bbar", 4) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 2,
- l0 + 3) != 3)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 3,
- 4) != 3)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- close (tmpfd);
-
- char *cwd1 = getcwd (NULL, 0);
- if (cwd1 == NULL)
- FAIL ();
-
- char *cwd2 = getcwd (NULL, 250);
- if (cwd2 == NULL)
- FAIL ();
-
- if (cwd1 && cwd2)
- {
- if (strcmp (cwd1, cwd2) != 0)
- FAIL ();
-
- *enddir = '\0';
- if (chdir (fname))
- FAIL ();
-
- char *cwd3 = getcwd (NULL, 0);
- if (cwd3 == NULL)
- FAIL ();
- if (strcmp (fname, cwd3) != 0)
- printf ("getcwd after chdir is '%s' != '%s',"
- "get{c,}wd tests skipped\n", cwd3, fname);
- else
- {
- char getcwdbuf[sizeof fname - 3];
-
- char *cwd4 = getcwd (getcwdbuf, sizeof getcwdbuf);
- if (cwd4 != getcwdbuf
- || strcmp (getcwdbuf, fname) != 0)
- FAIL ();
-
- cwd4 = getcwd (getcwdbuf + 1, l0 + sizeof getcwdbuf - 1);
- if (cwd4 != getcwdbuf + 1
- || getcwdbuf[0] != fname[0]
- || strcmp (getcwdbuf + 1, fname) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (getcwd (getcwdbuf + 2, l0 + sizeof getcwdbuf)
- != getcwdbuf + 2)
- FAIL ();
- CHK_FAIL_END
-
- CHK_FAIL_START
- if (getcwd (getcwdbuf + 2, sizeof getcwdbuf)
- != getcwdbuf + 2)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- if (getwd (getcwdbuf) != getcwdbuf
- || strcmp (getcwdbuf, fname) != 0)
- FAIL ();
-
- if (getwd (getcwdbuf + 1) != getcwdbuf + 1
- || strcmp (getcwdbuf + 1, fname) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- if (getwd (getcwdbuf + 2) != getcwdbuf + 2)
- FAIL ();
- CHK_FAIL_END
-#endif
- }
-
- if (chdir (cwd1) != 0)
- FAIL ();
- free (cwd3);
- }
-
- free (cwd1);
- free (cwd2);
- *enddir = '/';
- if (unlink (fname) != 0)
- FAIL ();
-
- *enddir = '\0';
- if (rmdir (fname) != 0)
- FAIL ();
-
-
-#if PATH_MAX > 0
- char largebuf[PATH_MAX];
- char *realres = realpath (".", largebuf);
- if (realres != largebuf)
- FAIL ();
-
-# if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char realbuf[1];
- realres = realpath (".", realbuf);
- if (realres != realbuf)
- FAIL ();
- CHK_FAIL_END
-# endif
-#endif
-
- if (setlocale (LC_ALL, "de_DE.UTF-8") != NULL)
- {
- assert (MB_CUR_MAX <= 10);
-
- /* First a simple test. */
- char enough[10];
- if (wctomb (enough, L'A') != 1)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- /* We know the wchar_t encoding is ISO 10646. So pick a
- character which has a multibyte representation which does not
- fit. */
- CHK_FAIL_START
- char smallbuf[2];
- if (wctomb (smallbuf, L'\x100') != 2)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- mbstate_t s;
- memset (&s, '\0', sizeof (s));
- if (wcrtomb (enough, L'D', &s) != 1 || enough[0] != 'D')
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- /* We know the wchar_t encoding is ISO 10646. So pick a
- character which has a multibyte representation which does not
- fit. */
- CHK_FAIL_START
- char smallbuf[2];
- if (wcrtomb (smallbuf, L'\x100', &s) != 2)
- FAIL ();
- CHK_FAIL_END
-#endif
-
- wchar_t wenough[10];
- memset (&s, '\0', sizeof (s));
- const char *cp = "A";
- if (mbsrtowcs (wenough, &cp, 10, &s) != 1
- || wcscmp (wenough, L"A") != 0)
- FAIL ();
-
- cp = "BC";
- if (mbsrtowcs (wenough, &cp, l0 + 10, &s) != 2
- || wcscmp (wenough, L"BC") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- wchar_t wsmallbuf[2];
- cp = "ABC";
- mbsrtowcs (wsmallbuf, &cp, 10, &s);
- CHK_FAIL_END
-#endif
-
- cp = "A";
- if (mbstowcs (wenough, cp, 10) != 1
- || wcscmp (wenough, L"A") != 0)
- FAIL ();
-
- cp = "DEF";
- if (mbstowcs (wenough, cp, l0 + 10) != 3
- || wcscmp (wenough, L"DEF") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- wchar_t wsmallbuf[2];
- cp = "ABC";
- mbstowcs (wsmallbuf, cp, 10);
- CHK_FAIL_END
-#endif
-
- memset (&s, '\0', sizeof (s));
- cp = "ABC";
- wcscpy (wenough, L"DEF");
- if (mbsnrtowcs (wenough, &cp, 1, 10, &s) != 1
- || wcscmp (wenough, L"AEF") != 0)
- FAIL ();
-
- cp = "IJ";
- if (mbsnrtowcs (wenough, &cp, 1, l0 + 10, &s) != 1
- || wcscmp (wenough, L"IEF") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- wchar_t wsmallbuf[2];
- cp = "ABC";
- mbsnrtowcs (wsmallbuf, &cp, 3, 10, &s);
- CHK_FAIL_END
-#endif
-
- memset (&s, '\0', sizeof (s));
- const wchar_t *wcp = L"A";
- if (wcsrtombs (enough, &wcp, 10, &s) != 1
- || strcmp (enough, "A") != 0)
- FAIL ();
-
- wcp = L"BC";
- if (wcsrtombs (enough, &wcp, l0 + 10, &s) != 2
- || strcmp (enough, "BC") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[2];
- wcp = L"ABC";
- wcsrtombs (smallbuf, &wcp, 10, &s);
- CHK_FAIL_END
-#endif
-
- memset (enough, 'Z', sizeof (enough));
- wcp = L"EF";
- if (wcstombs (enough, wcp, 10) != 2
- || strcmp (enough, "EF") != 0)
- FAIL ();
-
- wcp = L"G";
- if (wcstombs (enough, wcp, l0 + 10) != 1
- || strcmp (enough, "G") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[2];
- wcp = L"ABC";
- wcstombs (smallbuf, wcp, 10);
- CHK_FAIL_END
-#endif
-
- memset (&s, '\0', sizeof (s));
- wcp = L"AB";
- if (wcsnrtombs (enough, &wcp, 1, 10, &s) != 1
- || strcmp (enough, "A") != 0)
- FAIL ();
-
- wcp = L"BCD";
- if (wcsnrtombs (enough, &wcp, 1, l0 + 10, &s) != 1
- || strcmp (enough, "B") != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[2];
- wcp = L"ABC";
- wcsnrtombs (smallbuf, &wcp, 3, 10, &s);
- CHK_FAIL_END
-#endif
- }
- else
- {
- puts ("cannot set locale");
- ret = 1;
- }
-
- int fd = posix_openpt (O_RDWR);
- if (fd != -1)
- {
- char enough[1000];
- if (ptsname_r (fd, enough, sizeof (enough)) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[2];
- if (ptsname_r (fd, smallbuf, sizeof (smallbuf) + 1) == 0)
- FAIL ();
- CHK_FAIL_END
-#endif
- close (fd);
- }
-
-#if PATH_MAX > 0
- confstr (_CS_GNU_LIBC_VERSION, largebuf, sizeof (largebuf));
-# if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[1];
- confstr (_CS_GNU_LIBC_VERSION, smallbuf, sizeof (largebuf));
- CHK_FAIL_END
-# endif
-#endif
-
- gid_t grpslarge[5];
- int ngr = getgroups (5, grpslarge);
- asm volatile ("" : : "r" (ngr));
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[1];
- ngr = getgroups (5, (gid_t *) smallbuf);
- asm volatile ("" : : "r" (ngr));
- CHK_FAIL_END
-#endif
-
- fd = open (_PATH_TTY, O_RDONLY);
- if (fd != -1)
- {
- char enough[1000];
- if (ttyname_r (fd, enough, sizeof (enough)) != 0)
- FAIL ();
-
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[2];
- if (ttyname_r (fd, smallbuf, sizeof (smallbuf) + 1) == 0)
- FAIL ();
- CHK_FAIL_END
-#endif
- close (fd);
- }
-
- char hostnamelarge[1000];
- gethostname (hostnamelarge, sizeof (hostnamelarge));
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[1];
- gethostname (smallbuf, sizeof (hostnamelarge));
- CHK_FAIL_END
-#endif
-
- char loginlarge[1000];
- getlogin_r (loginlarge, sizeof (hostnamelarge));
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[1];
- getlogin_r (smallbuf, sizeof (loginlarge));
- CHK_FAIL_END
-#endif
-
- char domainnamelarge[1000];
- int res = getdomainname (domainnamelarge, sizeof (domainnamelarge));
- asm volatile ("" : : "r" (res));
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- char smallbuf[1];
- res = getdomainname (smallbuf, sizeof (domainnamelarge));
- asm volatile ("" : : "r" (res));
- CHK_FAIL_END
-#endif
-
- fd_set s;
- FD_ZERO (&s);
-
- FD_SET (FD_SETSIZE - 1, &s);
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- FD_SET (FD_SETSIZE, &s);
- CHK_FAIL_END
-
- CHK_FAIL_START
- FD_SET (l0 + FD_SETSIZE, &s);
- CHK_FAIL_END
-#endif
-
- FD_CLR (FD_SETSIZE - 1, &s);
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- FD_CLR (FD_SETSIZE, &s);
- CHK_FAIL_END
-
- CHK_FAIL_START
- FD_SET (l0 + FD_SETSIZE, &s);
- CHK_FAIL_END
-#endif
-
- FD_ISSET (FD_SETSIZE - 1, &s);
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- FD_ISSET (FD_SETSIZE, &s);
- CHK_FAIL_END
-
- CHK_FAIL_START
- FD_ISSET (l0 + FD_SETSIZE, &s);
- CHK_FAIL_END
-#endif
-
- struct pollfd fds[1];
- fds[0].fd = STDOUT_FILENO;
- fds[0].events = POLLOUT;
- poll (fds, 1, 0);
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- poll (fds, 2, 0);
- CHK_FAIL_END
-
- CHK_FAIL_START
- poll (fds, l0 + 2, 0);
- CHK_FAIL_END
-#endif
- ppoll (fds, 1, NULL, NULL);
-#if __USE_FORTIFY_LEVEL >= 1
- CHK_FAIL_START
- ppoll (fds, 2, NULL, NULL);
- CHK_FAIL_END
-
- CHK_FAIL_START
- ppoll (fds, l0 + 2, NULL, NULL);
- CHK_FAIL_END
-#endif
-
- return ret;
-}
+++ /dev/null
-#define _FORTIFY_SOURCE 1
-#include "tst-chk1.c"
+++ /dev/null
-#define _FORTIFY_SOURCE 2
-#include "tst-chk1.c"
+++ /dev/null
-#include "tst-chk1.c"
+++ /dev/null
-#define _FORTIFY_SOURCE 1
-#include "tst-chk1.c"
+++ /dev/null
-#define _FORTIFY_SOURCE 2
-#include "tst-chk1.c"
--- /dev/null
+/* Copyright (C) 2004-2022 Free Software Foundation, Inc.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+ Contributed by Jakub Jelinek <jakub@redhat.com>, 2004.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This file tests gets. Force it to be declared. */
+#include <features.h>
+#undef __GLIBC_USE_DEPRECATED_GETS
+#define __GLIBC_USE_DEPRECATED_GETS 1
+
+#include <assert.h>
+#include <fcntl.h>
+#include <locale.h>
+#include <obstack.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include <sys/poll.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#ifndef _GNU_SOURCE
+# define MEMPCPY memcpy
+# define WMEMPCPY wmemcpy
+# define MEMPCPY_RET(x) 0
+# define WMEMPCPY_RET(x) 0
+#else
+# define MEMPCPY mempcpy
+# define WMEMPCPY wmempcpy
+# define MEMPCPY_RET(x) __builtin_strlen (x)
+# define WMEMPCPY_RET(x) wcslen (x)
+#endif
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+
+char *temp_filename;
+static void do_prepare (void);
+static int do_test (void);
+#define PREPARE(argc, argv) do_prepare ()
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
+
+static void
+do_prepare (void)
+{
+ int temp_fd = create_temp_file ("tst-chk1.", &temp_filename);
+ if (temp_fd == -1)
+ {
+ printf ("cannot create temporary file: %m\n");
+ exit (1);
+ }
+
+ const char *strs = "abcdefgh\nABCDEFGHI\nabcdefghij\nABCDEFGHIJ";
+ if ((size_t) write (temp_fd, strs, strlen (strs)) != strlen (strs))
+ {
+ puts ("could not write test strings into file");
+ unlink (temp_filename);
+ exit (1);
+ }
+}
+
+volatile int chk_fail_ok;
+volatile int ret;
+jmp_buf chk_fail_buf;
+
+static void
+handler (int sig)
+{
+ if (chk_fail_ok)
+ {
+ chk_fail_ok = 0;
+ longjmp (chk_fail_buf, 1);
+ }
+ else
+ _exit (127);
+}
+
+#if __USE_FORTIFY_LEVEL == 3
+volatile size_t buf_size = 10;
+#else
+char buf[10];
+wchar_t wbuf[10];
+#define buf_size sizeof (buf)
+#endif
+
+volatile size_t l0;
+volatile char *p;
+volatile wchar_t *wp;
+const char *str1 = "JIHGFEDCBA";
+const char *str2 = "F";
+const char *str3 = "%s%n%s%n";
+const char *str4 = "Hello, ";
+const char *str5 = "World!\n";
+const wchar_t *wstr1 = L"JIHGFEDCBA";
+const wchar_t *wstr2 = L"F";
+const wchar_t *wstr3 = L"%s%n%s%n";
+const wchar_t *wstr4 = L"Hello, ";
+const wchar_t *wstr5 = L"World!\n";
+char buf2[10] = "%s";
+int num1 = 67;
+int num2 = 987654;
+
+#define FAIL() \
+ do { printf ("Failure on line %d\n", __LINE__); ret = 1; } while (0)
+#define CHK_FAIL_START \
+ chk_fail_ok = 1; \
+ if (! setjmp (chk_fail_buf)) \
+ {
+#define CHK_FAIL_END \
+ chk_fail_ok = 0; \
+ FAIL (); \
+ }
+#if __USE_FORTIFY_LEVEL >= 2 && (!defined __cplusplus || defined __va_arg_pack)
+# define CHK_FAIL2_START CHK_FAIL_START
+# define CHK_FAIL2_END CHK_FAIL_END
+#else
+# define CHK_FAIL2_START
+# define CHK_FAIL2_END
+#endif
+
+static int
+do_test (void)
+{
+#if __USE_FORTIFY_LEVEL == 3
+ char *buf = (char *) malloc (buf_size);
+ wchar_t *wbuf = (wchar_t *) malloc (buf_size * sizeof (wchar_t));
+#endif
+ set_fortify_handler (handler);
+
+ struct A { char buf1[9]; char buf2[1]; } a;
+ struct wA { wchar_t buf1[9]; wchar_t buf2[1]; } wa;
+
+ printf ("Test checking routines at fortify level %d\n",
+#ifdef __USE_FORTIFY_LEVEL
+ (int) __USE_FORTIFY_LEVEL
+#else
+ 0
+#endif
+ );
+
+#if defined __USE_FORTIFY_LEVEL && !defined __fortify_function
+ printf ("Test skipped");
+ if (l0 == 0)
+ return 0;
+#endif
+
+ /* These ops can be done without runtime checking of object size. */
+ memcpy (buf, "abcdefghij", 10);
+ memmove (buf + 1, buf, 9);
+ if (memcmp (buf, "aabcdefghi", 10))
+ FAIL ();
+
+ memcpy (buf, "abcdefghij", 10);
+ bcopy (buf, buf + 1, 9);
+ if (memcmp (buf, "aabcdefghi", 10))
+ FAIL ();
+
+ if (MEMPCPY (buf + 5, "abcde", 5) != buf + 5 + MEMPCPY_RET ("abcde")
+ || memcmp (buf, "aabcdabcde", 10))
+ FAIL ();
+
+ memset (buf + 8, 'j', 2);
+ if (memcmp (buf, "aabcdabcjj", 10))
+ FAIL ();
+
+ bzero (buf + 8, 2);
+ if (memcmp (buf, "aabcdabc\0\0", 10))
+ FAIL ();
+
+ explicit_bzero (buf + 6, 4);
+ if (memcmp (buf, "aabcda\0\0\0\0", 10))
+ FAIL ();
+
+ strcpy (buf + 4, "EDCBA");
+ if (memcmp (buf, "aabcEDCBA", 10))
+ FAIL ();
+
+ if (stpcpy (buf + 8, "F") != buf + 9 || memcmp (buf, "aabcEDCBF", 10))
+ FAIL ();
+
+ strncpy (buf + 6, "X", 4);
+ if (memcmp (buf, "aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (sprintf (buf + 7, "%s", "67") != 2 || memcmp (buf, "aabcEDX67", 10))
+ FAIL ();
+
+ if (snprintf (buf + 7, 3, "%s", "987654") != 6
+ || memcmp (buf, "aabcEDX98", 10))
+ FAIL ();
+
+ /* These ops need runtime checking, but shouldn't __chk_fail. */
+ memcpy (buf, "abcdefghij", l0 + 10);
+ memmove (buf + 1, buf, l0 + 9);
+ if (memcmp (buf, "aabcdefghi", 10))
+ FAIL ();
+
+ memcpy (buf, "abcdefghij", l0 + 10);
+ bcopy (buf, buf + 1, l0 + 9);
+ if (memcmp (buf, "aabcdefghi", 10))
+ FAIL ();
+
+ if (MEMPCPY (buf + 5, "abcde", l0 + 5) != buf + 5 + MEMPCPY_RET ("abcde")
+ || memcmp (buf, "aabcdabcde", 10))
+ FAIL ();
+
+ memset (buf + 8, 'j', l0 + 2);
+ if (memcmp (buf, "aabcdabcjj", 10))
+ FAIL ();
+
+ bzero (buf + 8, l0 + 2);
+ if (memcmp (buf, "aabcdabc\0\0", 10))
+ FAIL ();
+
+ explicit_bzero (buf + 6, l0 + 4);
+ if (memcmp (buf, "aabcda\0\0\0\0", 10))
+ FAIL ();
+
+ strcpy (buf + 4, str1 + 5);
+ if (memcmp (buf, "aabcEDCBA", 10))
+ FAIL ();
+
+ if (stpcpy (buf + 8, str2) != buf + 9 || memcmp (buf, "aabcEDCBF", 10))
+ FAIL ();
+
+ strncpy (buf + 6, "X", l0 + 4);
+ if (memcmp (buf, "aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (stpncpy (buf + 5, "cd", l0 + 5) != buf + 7
+ || memcmp (buf, "aabcEcd\0\0", 10))
+ FAIL ();
+
+ if (sprintf (buf + 7, "%d", num1) != 2 || memcmp (buf, "aabcEcd67", 10))
+ FAIL ();
+
+ if (snprintf (buf + 7, 3, "%d", num2) != 6 || memcmp (buf, "aabcEcd98", 10))
+ FAIL ();
+
+ buf[l0 + 8] = '\0';
+ strcat (buf, "A");
+ if (memcmp (buf, "aabcEcd9A", 10))
+ FAIL ();
+
+ buf[l0 + 7] = '\0';
+ strncat (buf, "ZYXWV", l0 + 2);
+ if (memcmp (buf, "aabcEcdZY", 10))
+ FAIL ();
+
+ /* The following tests are supposed to succeed at all fortify
+ levels, even though they overflow a.buf1 into a.buf2. */
+ memcpy (a.buf1, "abcdefghij", l0 + 10);
+ memmove (a.buf1 + 1, a.buf1, l0 + 9);
+ if (memcmp (a.buf1, "aabcdefghi", 10))
+ FAIL ();
+
+ memcpy (a.buf1, "abcdefghij", l0 + 10);
+ bcopy (a.buf1, a.buf1 + 1, l0 + 9);
+ if (memcmp (a.buf1, "aabcdefghi", 10))
+ FAIL ();
+
+ if (MEMPCPY (a.buf1 + 5, "abcde", l0 + 5)
+ != a.buf1 + 5 + MEMPCPY_RET ("abcde")
+ || memcmp (a.buf1, "aabcdabcde", 10))
+ FAIL ();
+
+ memset (a.buf1 + 8, 'j', l0 + 2);
+ if (memcmp (a.buf1, "aabcdabcjj", 10))
+ FAIL ();
+
+ bzero (a.buf1 + 8, l0 + 2);
+ if (memcmp (a.buf1, "aabcdabc\0\0", 10))
+ FAIL ();
+
+ explicit_bzero (a.buf1 + 6, l0 + 4);
+ if (memcmp (a.buf1, "aabcda\0\0\0\0", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL < 2
+ /* The following tests are supposed to crash with -D_FORTIFY_SOURCE=2
+ and sufficient GCC support, as the string operations overflow
+ from a.buf1 into a.buf2. */
+ strcpy (a.buf1 + 4, str1 + 5);
+ if (memcmp (a.buf1, "aabcEDCBA", 10))
+ FAIL ();
+
+ if (stpcpy (a.buf1 + 8, str2) != a.buf1 + 9
+ || memcmp (a.buf1, "aabcEDCBF", 10))
+ FAIL ();
+
+ strncpy (a.buf1 + 6, "X", l0 + 4);
+ if (memcmp (a.buf1, "aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (sprintf (a.buf1 + 7, "%d", num1) != 2
+ || memcmp (a.buf1, "aabcEDX67", 10))
+ FAIL ();
+
+ if (snprintf (a.buf1 + 7, 3, "%d", num2) != 6
+ || memcmp (a.buf1, "aabcEDX98", 10))
+ FAIL ();
+
+ a.buf1[l0 + 8] = '\0';
+ strcat (a.buf1, "A");
+ if (memcmp (a.buf1, "aabcEDX9A", 10))
+ FAIL ();
+
+ a.buf1[l0 + 7] = '\0';
+ strncat (a.buf1, "ZYXWV", l0 + 2);
+ if (memcmp (a.buf1, "aabcEDXZY", 10))
+ FAIL ();
+
+#endif
+
+#if __USE_FORTIFY_LEVEL >= 1
+ /* Now check if all buffer overflows are caught at runtime.
+ N.B. All tests involving a length parameter need to be done
+ twice: once with the length a compile-time constant, once without. */
+
+ CHK_FAIL_START
+ memcpy (buf + 1, "abcdefghij", 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memcpy (buf + 1, "abcdefghij", l0 + 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memmove (buf + 2, buf + 1, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memmove (buf + 2, buf + 1, l0 + 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bcopy (buf + 1, buf + 2, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bcopy (buf + 1, buf + 2, l0 + 9);
+ CHK_FAIL_END
+
+#ifdef _GNU_SOURCE
+ CHK_FAIL_START
+ p = (char *) mempcpy (buf + 6, "abcde", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ p = (char *) mempcpy (buf + 6, "abcde", l0 + 5);
+ CHK_FAIL_END
+#endif
+
+ CHK_FAIL_START
+ memset (buf + 9, 'j', 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memset (buf + 9, 'j', l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bzero (buf + 9, 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bzero (buf + 9, l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ explicit_bzero (buf + 9, 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ explicit_bzero (buf + 9, l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ strcpy (buf + 5, str1 + 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ p = stpcpy (buf + 9, str2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ strncpy (buf + 7, "X", 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ strncpy (buf + 7, "X", l0 + 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ stpncpy (buf + 6, "cd", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ stpncpy (buf + 6, "cd", l0 + 5);
+ CHK_FAIL_END
+
+# if !defined __cplusplus || defined __va_arg_pack
+ CHK_FAIL_START
+ sprintf (buf + 8, "%d", num1);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ snprintf (buf + 8, 3, "%d", num2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ snprintf (buf + 8, l0 + 3, "%d", num2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ swprintf (wbuf + 8, 3, L"%d", num1);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ swprintf (wbuf + 8, l0 + 3, L"%d", num1);
+ CHK_FAIL_END
+# endif
+
+ memcpy (buf, str1 + 2, 9);
+ CHK_FAIL_START
+ strcat (buf, "AB");
+ CHK_FAIL_END
+
+ memcpy (buf, str1 + 3, 8);
+ CHK_FAIL_START
+ strncat (buf, "ZYXWV", 3);
+ CHK_FAIL_END
+
+ memcpy (buf, str1 + 3, 8);
+ CHK_FAIL_START
+ strncat (buf, "ZYXWV", l0 + 3);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memcpy (a.buf1 + 1, "abcdefghij", 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memcpy (a.buf1 + 1, "abcdefghij", l0 + 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memmove (a.buf1 + 2, a.buf1 + 1, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memmove (a.buf1 + 2, a.buf1 + 1, l0 + 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bcopy (a.buf1 + 1, a.buf1 + 2, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bcopy (a.buf1 + 1, a.buf1 + 2, l0 + 9);
+ CHK_FAIL_END
+
+#ifdef _GNU_SOURCE
+ CHK_FAIL_START
+ p = (char *) mempcpy (a.buf1 + 6, "abcde", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ p = (char *) mempcpy (a.buf1 + 6, "abcde", l0 + 5);
+ CHK_FAIL_END
+#endif
+
+ CHK_FAIL_START
+ memset (a.buf1 + 9, 'j', 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ memset (a.buf1 + 9, 'j', l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bzero (a.buf1 + 9, 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ bzero (a.buf1 + 9, l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ explicit_bzero (a.buf1 + 9, 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ explicit_bzero (a.buf1 + 9, l0 + 2);
+ CHK_FAIL_END
+
+# if __USE_FORTIFY_LEVEL >= 2
+# define O 0
+# else
+# define O 1
+# endif
+
+ CHK_FAIL_START
+ strcpy (a.buf1 + (O + 4), str1 + 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ p = stpcpy (a.buf1 + (O + 8), str2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ strncpy (a.buf1 + (O + 6), "X", 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ strncpy (a.buf1 + (O + 6), "X", l0 + 4);
+ CHK_FAIL_END
+
+# if !defined __cplusplus || defined __va_arg_pack
+ CHK_FAIL_START
+ sprintf (a.buf1 + (O + 7), "%d", num1);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ snprintf (a.buf1 + (O + 7), 3, "%d", num2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ snprintf (a.buf1 + (O + 7), l0 + 3, "%d", num2);
+ CHK_FAIL_END
+# endif
+
+ memcpy (a.buf1, str1 + (3 - O), 8 + O);
+ CHK_FAIL_START
+ strcat (a.buf1, "AB");
+ CHK_FAIL_END
+
+ memcpy (a.buf1, str1 + (4 - O), 7 + O);
+ CHK_FAIL_START
+ strncat (a.buf1, "ZYXWV", l0 + 3);
+ CHK_FAIL_END
+#endif
+
+
+ /* These ops can be done without runtime checking of object size. */
+ wmemcpy (wbuf, L"abcdefghij", 10);
+ wmemmove (wbuf + 1, wbuf, 9);
+ if (wmemcmp (wbuf, L"aabcdefghi", 10))
+ FAIL ();
+
+ if (WMEMPCPY (wbuf + 5, L"abcde", 5) != wbuf + 5 + WMEMPCPY_RET (L"abcde")
+ || wmemcmp (wbuf, L"aabcdabcde", 10))
+ FAIL ();
+
+ wmemset (wbuf + 8, L'j', 2);
+ if (wmemcmp (wbuf, L"aabcdabcjj", 10))
+ FAIL ();
+
+ wcscpy (wbuf + 4, L"EDCBA");
+ if (wmemcmp (wbuf, L"aabcEDCBA", 10))
+ FAIL ();
+
+ if (wcpcpy (wbuf + 8, L"F") != wbuf + 9 || wmemcmp (wbuf, L"aabcEDCBF", 10))
+ FAIL ();
+
+ wcsncpy (wbuf + 6, L"X", 4);
+ if (wmemcmp (wbuf, L"aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (swprintf (wbuf + 7, 3, L"%ls", L"987654") >= 0
+ || wmemcmp (wbuf, L"aabcEDX98", 10))
+ FAIL ();
+
+ if (swprintf (wbuf + 7, 3, L"64") != 2
+ || wmemcmp (wbuf, L"aabcEDX64", 10))
+ FAIL ();
+
+ /* These ops need runtime checking, but shouldn't __chk_fail. */
+ wmemcpy (wbuf, L"abcdefghij", l0 + 10);
+ wmemmove (wbuf + 1, wbuf, l0 + 9);
+ if (wmemcmp (wbuf, L"aabcdefghi", 10))
+ FAIL ();
+
+ if (WMEMPCPY (wbuf + 5, L"abcde", l0 + 5)
+ != wbuf + 5 + WMEMPCPY_RET (L"abcde")
+ || wmemcmp (wbuf, L"aabcdabcde", 10))
+ FAIL ();
+
+ wmemset (wbuf + 8, L'j', l0 + 2);
+ if (wmemcmp (wbuf, L"aabcdabcjj", 10))
+ FAIL ();
+
+ wcscpy (wbuf + 4, wstr1 + 5);
+ if (wmemcmp (wbuf, L"aabcEDCBA", 10))
+ FAIL ();
+
+ if (wcpcpy (wbuf + 8, wstr2) != wbuf + 9 || wmemcmp (wbuf, L"aabcEDCBF", 10))
+ FAIL ();
+
+ wcsncpy (wbuf + 6, L"X", l0 + 4);
+ if (wmemcmp (wbuf, L"aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (wcpncpy (wbuf + 5, L"cd", l0 + 5) != wbuf + 7
+ || wmemcmp (wbuf, L"aabcEcd\0\0", 10))
+ FAIL ();
+
+ if (swprintf (wbuf + 7, 3, L"%d", num2) >= 0
+ || wmemcmp (wbuf, L"aabcEcd98", 10))
+ FAIL ();
+
+ wbuf[l0 + 8] = L'\0';
+ wcscat (wbuf, L"A");
+ if (wmemcmp (wbuf, L"aabcEcd9A", 10))
+ FAIL ();
+
+ wbuf[l0 + 7] = L'\0';
+ wcsncat (wbuf, L"ZYXWV", l0 + 2);
+ if (wmemcmp (wbuf, L"aabcEcdZY", 10))
+ FAIL ();
+
+ wmemcpy (wa.buf1, L"abcdefghij", l0 + 10);
+ wmemmove (wa.buf1 + 1, wa.buf1, l0 + 9);
+ if (wmemcmp (wa.buf1, L"aabcdefghi", 10))
+ FAIL ();
+
+ if (WMEMPCPY (wa.buf1 + 5, L"abcde", l0 + 5)
+ != wa.buf1 + 5 + WMEMPCPY_RET (L"abcde")
+ || wmemcmp (wa.buf1, L"aabcdabcde", 10))
+ FAIL ();
+
+ wmemset (wa.buf1 + 8, L'j', l0 + 2);
+ if (wmemcmp (wa.buf1, L"aabcdabcjj", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL < 2
+ /* The following tests are supposed to crash with -D_FORTIFY_SOURCE=2
+ and sufficient GCC support, as the string operations overflow
+ from a.buf1 into a.buf2. */
+ wcscpy (wa.buf1 + 4, wstr1 + 5);
+ if (wmemcmp (wa.buf1, L"aabcEDCBA", 10))
+ FAIL ();
+
+ if (wcpcpy (wa.buf1 + 8, wstr2) != wa.buf1 + 9
+ || wmemcmp (wa.buf1, L"aabcEDCBF", 10))
+ FAIL ();
+
+ wcsncpy (wa.buf1 + 6, L"X", l0 + 4);
+ if (wmemcmp (wa.buf1, L"aabcEDX\0\0", 10))
+ FAIL ();
+
+ if (swprintf (wa.buf1 + 7, 3, L"%d", num2) >= 0
+ || wmemcmp (wa.buf1, L"aabcEDX98", 10))
+ FAIL ();
+
+ wa.buf1[l0 + 8] = L'\0';
+ wcscat (wa.buf1, L"A");
+ if (wmemcmp (wa.buf1, L"aabcEDX9A", 10))
+ FAIL ();
+
+ wa.buf1[l0 + 7] = L'\0';
+ wcsncat (wa.buf1, L"ZYXWV", l0 + 2);
+ if (wmemcmp (wa.buf1, L"aabcEDXZY", 10))
+ FAIL ();
+
+#endif
+
+#if __USE_FORTIFY_LEVEL >= 1
+ /* Now check if all buffer overflows are caught at runtime.
+ N.B. All tests involving a length parameter need to be done
+ twice: once with the length a compile-time constant, once without. */
+
+ CHK_FAIL_START
+ wmemcpy (wbuf + 1, L"abcdefghij", 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemcpy (wbuf + 1, L"abcdefghij", l0 + 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemcpy (wbuf + 9, L"abcdefghij", 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemcpy (wbuf + 9, L"abcdefghij", l0 + 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemmove (wbuf + 2, wbuf + 1, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemmove (wbuf + 2, wbuf + 1, l0 + 9);
+ CHK_FAIL_END
+
+#ifdef _GNU_SOURCE
+ CHK_FAIL_START
+ wp = wmempcpy (wbuf + 6, L"abcde", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wp = wmempcpy (wbuf + 6, L"abcde", l0 + 5);
+ CHK_FAIL_END
+#endif
+
+ CHK_FAIL_START
+ wmemset (wbuf + 9, L'j', 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemset (wbuf + 9, L'j', l0 + 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcscpy (wbuf + 5, wstr1 + 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wp = wcpcpy (wbuf + 9, wstr2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcsncpy (wbuf + 7, L"X", 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcsncpy (wbuf + 7, L"X", l0 + 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcsncpy (wbuf + 9, L"XABCDEFGH", 8);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcpncpy (wbuf + 9, L"XABCDEFGH", 8);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcpncpy (wbuf + 6, L"cd", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcpncpy (wbuf + 6, L"cd", l0 + 5);
+ CHK_FAIL_END
+
+ wmemcpy (wbuf, wstr1 + 2, 9);
+ CHK_FAIL_START
+ wcscat (wbuf, L"AB");
+ CHK_FAIL_END
+
+ wmemcpy (wbuf, wstr1 + 3, 8);
+ CHK_FAIL_START
+ wcsncat (wbuf, L"ZYXWV", l0 + 3);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemcpy (wa.buf1 + 1, L"abcdefghij", 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemcpy (wa.buf1 + 1, L"abcdefghij", l0 + 10);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemmove (wa.buf1 + 2, wa.buf1 + 1, 9);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemmove (wa.buf1 + 2, wa.buf1 + 1, l0 + 9);
+ CHK_FAIL_END
+
+#ifdef _GNU_SOURCE
+ CHK_FAIL_START
+ wp = wmempcpy (wa.buf1 + 6, L"abcde", 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wp = wmempcpy (wa.buf1 + 6, L"abcde", l0 + 5);
+ CHK_FAIL_END
+#endif
+
+ CHK_FAIL_START
+ wmemset (wa.buf1 + 9, L'j', 2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wmemset (wa.buf1 + 9, L'j', l0 + 2);
+ CHK_FAIL_END
+
+#if __USE_FORTIFY_LEVEL >= 2
+# define O 0
+#else
+# define O 1
+#endif
+
+ CHK_FAIL_START
+ wcscpy (wa.buf1 + (O + 4), wstr1 + 5);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wp = wcpcpy (wa.buf1 + (O + 8), wstr2);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcsncpy (wa.buf1 + (O + 6), L"X", 4);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ wcsncpy (wa.buf1 + (O + 6), L"X", l0 + 4);
+ CHK_FAIL_END
+
+ wmemcpy (wa.buf1, wstr1 + (3 - O), 8 + O);
+ CHK_FAIL_START
+ wcscat (wa.buf1, L"AB");
+ CHK_FAIL_END
+
+ wmemcpy (wa.buf1, wstr1 + (4 - O), 7 + O);
+ CHK_FAIL_START
+ wcsncat (wa.buf1, L"ZYXWV", l0 + 3);
+ CHK_FAIL_END
+#endif
+
+
+ /* Now checks for %n protection. */
+
+ /* Constant literals passed directly are always ok
+ (even with warnings about possible bugs from GCC). */
+ int n1, n2;
+ if (sprintf (buf, "%s%n%s%n", str2, &n1, str2, &n2) != 2
+ || n1 != 1 || n2 != 2)
+ FAIL ();
+
+ /* In this case the format string is not known at compile time,
+ but resides in read-only memory, so is ok. */
+ if (snprintf (buf, 4, str3, str2, &n1, str2, &n2) != 2
+ || n1 != 1 || n2 != 2)
+ FAIL ();
+
+ strcpy (buf2 + 2, "%n%s%n");
+ /* When the format string is writable and contains %n,
+ with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
+ CHK_FAIL2_START
+ if (sprintf (buf, buf2, str2, &n1, str2, &n1) != 2)
+ FAIL ();
+ CHK_FAIL2_END
+
+ CHK_FAIL2_START
+ if (snprintf (buf, 3, buf2, str2, &n1, str2, &n1) != 2)
+ FAIL ();
+ CHK_FAIL2_END
+
+ /* But if there is no %n, even writable format string
+ should work. */
+ buf2[6] = '\0';
+ if (sprintf (buf, buf2 + 4, str2) != 1)
+ FAIL ();
+
+ /* Constant literals passed directly are always ok
+ (even with warnings about possible bugs from GCC). */
+ if (printf ("%s%n%s%n", str4, &n1, str5, &n2) != 14
+ || n1 != 7 || n2 != 14)
+ FAIL ();
+
+ /* In this case the format string is not known at compile time,
+ but resides in read-only memory, so is ok. */
+ if (printf (str3, str4, &n1, str5, &n2) != 14
+ || n1 != 7 || n2 != 14)
+ FAIL ();
+
+ strcpy (buf2 + 2, "%n%s%n");
+ /* When the format string is writable and contains %n,
+ with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
+ CHK_FAIL2_START
+ if (printf (buf2, str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ CHK_FAIL2_END
+
+ /* But if there is no %n, even writable format string
+ should work. */
+ buf2[6] = '\0';
+ if (printf (buf2 + 4, str5) != 7)
+ FAIL ();
+
+ FILE *fp = stdout;
+
+ /* Constant literals passed directly are always ok
+ (even with warnings about possible bugs from GCC). */
+ if (fprintf (fp, "%s%n%s%n", str4, &n1, str5, &n2) != 14
+ || n1 != 7 || n2 != 14)
+ FAIL ();
+
+ /* In this case the format string is not known at compile time,
+ but resides in read-only memory, so is ok. */
+ if (fprintf (fp, str3, str4, &n1, str5, &n2) != 14
+ || n1 != 7 || n2 != 14)
+ FAIL ();
+
+ strcpy (buf2 + 2, "%n%s%n");
+ /* When the format string is writable and contains %n,
+ with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
+ CHK_FAIL2_START
+ if (fprintf (fp, buf2, str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ CHK_FAIL2_END
+
+ /* But if there is no %n, even writable format string
+ should work. */
+ buf2[6] = '\0';
+ if (fprintf (fp, buf2 + 4, str5) != 7)
+ FAIL ();
+
+#ifdef _GNU_SOURCE
+ char *my_ptr = NULL;
+ strcpy (buf2 + 2, "%n%s%n");
+ /* When the format string is writable and contains %n,
+ with -D_FORTIFY_SOURCE=2 it causes __chk_fail. */
+ CHK_FAIL2_START
+ if (asprintf (&my_ptr, buf2, str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ else
+ free (my_ptr);
+ CHK_FAIL2_END
+
+ struct obstack obs;
+ obstack_init (&obs);
+ CHK_FAIL2_START
+ if (obstack_printf (&obs, buf2, str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ CHK_FAIL2_END
+ obstack_free (&obs, NULL);
+
+ my_ptr = NULL;
+ if (asprintf (&my_ptr, "%s%n%s%n", str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ else
+ free (my_ptr);
+
+ obstack_init (&obs);
+ if (obstack_printf (&obs, "%s%n%s%n", str4, &n1, str5, &n1) != 14)
+ FAIL ();
+ obstack_free (&obs, NULL);
+#endif
+
+ if (freopen (temp_filename, "r", stdin) == NULL)
+ {
+ puts ("could not open temporary file");
+ exit (1);
+ }
+
+ if (gets (buf) != buf || memcmp (buf, "abcdefgh", 9))
+ FAIL ();
+ if (gets (buf) != buf || memcmp (buf, "ABCDEFGHI", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (gets (buf) != buf)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ rewind (stdin);
+
+ if (fgets (buf, buf_size, stdin) != buf
+ || memcmp (buf, "abcdefgh\n", 10))
+ FAIL ();
+ if (fgets (buf, buf_size, stdin) != buf || memcmp (buf, "ABCDEFGHI", 10))
+ FAIL ();
+
+ rewind (stdin);
+
+ if (fgets (buf, l0 + buf_size, stdin) != buf
+ || memcmp (buf, "abcdefgh\n", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (fgets (buf, buf_size + 1, stdin) != buf)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (fgets (buf, l0 + buf_size + 1, stdin) != buf)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ rewind (stdin);
+
+#ifdef _GNU_SOURCE
+ if (fgets_unlocked (buf, buf_size, stdin) != buf
+ || memcmp (buf, "abcdefgh\n", 10))
+ FAIL ();
+ if (fgets_unlocked (buf, buf_size, stdin) != buf
+ || memcmp (buf, "ABCDEFGHI", 10))
+ FAIL ();
+
+ rewind (stdin);
+
+ if (fgets_unlocked (buf, l0 + buf_size, stdin) != buf
+ || memcmp (buf, "abcdefgh\n", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (fgets_unlocked (buf, buf_size + 1, stdin) != buf)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (fgets_unlocked (buf, l0 + buf_size + 1, stdin) != buf)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ rewind (stdin);
+#endif
+
+ if (fread (buf, 1, buf_size, stdin) != buf_size
+ || memcmp (buf, "abcdefgh\nA", 10))
+ FAIL ();
+ if (fread (buf, buf_size, 1, stdin) != 1
+ || memcmp (buf, "BCDEFGHI\na", 10))
+ FAIL ();
+
+ rewind (stdin);
+
+ if (fread (buf, l0 + 1, buf_size, stdin) != buf_size
+ || memcmp (buf, "abcdefgh\nA", 10))
+ FAIL ();
+ if (fread (buf, buf_size, l0 + 1, stdin) != 1
+ || memcmp (buf, "BCDEFGHI\na", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (fread (buf, 1, buf_size + 1, stdin) != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (fread (buf, buf_size + 1, l0 + 1, stdin) != 1)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ rewind (stdin);
+
+ if (fread_unlocked (buf, 1, buf_size, stdin) != buf_size
+ || memcmp (buf, "abcdefgh\nA", 10))
+ FAIL ();
+ if (fread_unlocked (buf, buf_size, 1, stdin) != 1
+ || memcmp (buf, "BCDEFGHI\na", 10))
+ FAIL ();
+
+ rewind (stdin);
+
+ if (fread_unlocked (buf, 1, 4, stdin) != 4
+ || memcmp (buf, "abcdFGHI\na", 10))
+ FAIL ();
+ if (fread_unlocked (buf, 4, 1, stdin) != 1
+ || memcmp (buf, "efghFGHI\na", 10))
+ FAIL ();
+
+ rewind (stdin);
+
+ if (fread_unlocked (buf, l0 + 1, buf_size, stdin) != buf_size
+ || memcmp (buf, "abcdefgh\nA", 10))
+ FAIL ();
+ if (fread_unlocked (buf, buf_size, l0 + 1, stdin) != 1
+ || memcmp (buf, "BCDEFGHI\na", 10))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (fread_unlocked (buf, 1, buf_size + 1, stdin) != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (fread_unlocked (buf, buf_size + 1, l0 + 1, stdin) != 1)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ lseek (fileno (stdin), 0, SEEK_SET);
+
+ if (read (fileno (stdin), buf, buf_size - 1) != buf_size - 1
+ || memcmp (buf, "abcdefgh\n", 9))
+ FAIL ();
+ if (read (fileno (stdin), buf, buf_size - 1) != buf_size - 1
+ || memcmp (buf, "ABCDEFGHI", 9))
+ FAIL ();
+
+ lseek (fileno (stdin), 0, SEEK_SET);
+
+ if (read (fileno (stdin), buf, l0 + buf_size - 1) != buf_size - 1
+ || memcmp (buf, "abcdefgh\n", 9))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (read (fileno (stdin), buf, buf_size + 1) != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (read (fileno (stdin), buf, l0 + buf_size + 1) != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ if (pread (fileno (stdin), buf, buf_size - 1, buf_size - 2)
+ != buf_size - 1
+ || memcmp (buf, "\nABCDEFGH", 9))
+ FAIL ();
+ if (pread (fileno (stdin), buf, buf_size - 1, 0) != buf_size - 1
+ || memcmp (buf, "abcdefgh\n", 9))
+ FAIL ();
+ if (pread (fileno (stdin), buf, l0 + buf_size - 1, buf_size - 3)
+ != buf_size - 1
+ || memcmp (buf, "h\nABCDEFG", 9))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (pread (fileno (stdin), buf, buf_size + 1, 2 * buf_size)
+ != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (pread (fileno (stdin), buf, l0 + buf_size + 1, 2 * buf_size)
+ != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ if (pread64 (fileno (stdin), buf, buf_size - 1, buf_size - 2)
+ != buf_size - 1
+ || memcmp (buf, "\nABCDEFGH", 9))
+ FAIL ();
+ if (pread64 (fileno (stdin), buf, buf_size - 1, 0) != buf_size - 1
+ || memcmp (buf, "abcdefgh\n", 9))
+ FAIL ();
+ if (pread64 (fileno (stdin), buf, l0 + buf_size - 1, buf_size - 3)
+ != buf_size - 1
+ || memcmp (buf, "h\nABCDEFG", 9))
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (pread64 (fileno (stdin), buf, buf_size + 1, 2 * buf_size)
+ != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (pread64 (fileno (stdin), buf, l0 + buf_size + 1, 2 * buf_size)
+ != buf_size + 1)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ if (freopen (temp_filename, "r", stdin) == NULL)
+ {
+ puts ("could not open temporary file");
+ exit (1);
+ }
+
+ if (fseek (stdin, 9 + 10 + 11, SEEK_SET))
+ {
+ puts ("could not seek in test file");
+ exit (1);
+ }
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (gets (buf) != buf)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ /* Check whether missing N$ formats are detected. */
+ CHK_FAIL2_START
+ printf ("%3$d\n", 1, 2, 3, 4);
+ CHK_FAIL2_END
+
+ CHK_FAIL2_START
+ fprintf (stdout, "%3$d\n", 1, 2, 3, 4);
+ CHK_FAIL2_END
+
+ CHK_FAIL2_START
+ sprintf (buf, "%3$d\n", 1, 2, 3, 4);
+ CHK_FAIL2_END
+
+ CHK_FAIL2_START
+ snprintf (buf, buf_size, "%3$d\n", 1, 2, 3, 4);
+ CHK_FAIL2_END
+
+ int sp[2];
+ if (socketpair (PF_UNIX, SOCK_STREAM, 0, sp))
+ FAIL ();
+ else
+ {
+ const char *sendstr = "abcdefgh\nABCDEFGH\n0123456789\n";
+ if ((size_t) send (sp[0], sendstr, strlen (sendstr), 0)
+ != strlen (sendstr))
+ FAIL ();
+
+ char recvbuf[12];
+ if (recv (sp[1], recvbuf, sizeof recvbuf, MSG_PEEK)
+ != sizeof recvbuf
+ || memcmp (recvbuf, sendstr, sizeof recvbuf) != 0)
+ FAIL ();
+
+ if (recv (sp[1], recvbuf + 6, l0 + sizeof recvbuf - 7, MSG_PEEK)
+ != sizeof recvbuf - 7
+ || memcmp (recvbuf + 6, sendstr, sizeof recvbuf - 7) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (recv (sp[1], recvbuf + 1, sizeof recvbuf, MSG_PEEK)
+ != sizeof recvbuf)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (recv (sp[1], recvbuf + 4, l0 + sizeof recvbuf - 3, MSG_PEEK)
+ != sizeof recvbuf - 3)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ socklen_t sl;
+ struct sockaddr_un sa_un;
+
+ sl = sizeof (sa_un);
+ if (recvfrom (sp[1], recvbuf, sizeof recvbuf, MSG_PEEK,
+ (struct sockaddr *) &sa_un, &sl)
+ != sizeof recvbuf
+ || memcmp (recvbuf, sendstr, sizeof recvbuf) != 0)
+ FAIL ();
+
+ sl = sizeof (sa_un);
+ if (recvfrom (sp[1], recvbuf + 6, l0 + sizeof recvbuf - 7, MSG_PEEK,
+ (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf - 7
+ || memcmp (recvbuf + 6, sendstr, sizeof recvbuf - 7) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ sl = sizeof (sa_un);
+ if (recvfrom (sp[1], recvbuf + 1, sizeof recvbuf, MSG_PEEK,
+ (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ sl = sizeof (sa_un);
+ if (recvfrom (sp[1], recvbuf + 4, l0 + sizeof recvbuf - 3, MSG_PEEK,
+ (struct sockaddr *) &sa_un, &sl) != sizeof recvbuf - 3)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ close (sp[0]);
+ close (sp[1]);
+ }
+
+ char fname[] = "/tmp/tst-chk1-dir-XXXXXX\0foo";
+ char *enddir = strchr (fname, '\0');
+ if (mkdtemp (fname) == NULL)
+ {
+ printf ("mkdtemp failed: %m\n");
+ return 1;
+ }
+ *enddir = '/';
+ if (symlink ("bar", fname) != 0)
+ FAIL ();
+
+ char readlinkbuf[4];
+ if (readlink (fname, readlinkbuf, 4) != 3
+ || memcmp (readlinkbuf, "bar", 3) != 0)
+ FAIL ();
+ if (readlink (fname, readlinkbuf + 1, l0 + 3) != 3
+ || memcmp (readlinkbuf, "bbar", 4) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (readlink (fname, readlinkbuf + 2, l0 + 3) != 3)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (readlink (fname, readlinkbuf + 3, 4) != 3)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ int tmpfd = open ("/tmp", O_RDONLY | O_DIRECTORY);
+ if (tmpfd < 0)
+ FAIL ();
+
+ if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf, 4) != 3
+ || memcmp (readlinkbuf, "bar", 3) != 0)
+ FAIL ();
+ if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 1,
+ l0 + 3) != 3
+ || memcmp (readlinkbuf, "bbar", 4) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 2,
+ l0 + 3) != 3)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (readlinkat (tmpfd, fname + sizeof ("/tmp/") - 1, readlinkbuf + 3,
+ 4) != 3)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ close (tmpfd);
+
+ char *cwd1 = getcwd (NULL, 0);
+ if (cwd1 == NULL)
+ FAIL ();
+
+ char *cwd2 = getcwd (NULL, 250);
+ if (cwd2 == NULL)
+ FAIL ();
+
+ if (cwd1 && cwd2)
+ {
+ if (strcmp (cwd1, cwd2) != 0)
+ FAIL ();
+
+ *enddir = '\0';
+ if (chdir (fname))
+ FAIL ();
+
+ char *cwd3 = getcwd (NULL, 0);
+ if (cwd3 == NULL)
+ FAIL ();
+ if (strcmp (fname, cwd3) != 0)
+ printf ("getcwd after chdir is '%s' != '%s',"
+ "get{c,}wd tests skipped\n", cwd3, fname);
+ else
+ {
+ char getcwdbuf[sizeof fname - 3];
+
+ char *cwd4 = getcwd (getcwdbuf, sizeof getcwdbuf);
+ if (cwd4 != getcwdbuf
+ || strcmp (getcwdbuf, fname) != 0)
+ FAIL ();
+
+ cwd4 = getcwd (getcwdbuf + 1, l0 + sizeof getcwdbuf - 1);
+ if (cwd4 != getcwdbuf + 1
+ || getcwdbuf[0] != fname[0]
+ || strcmp (getcwdbuf + 1, fname) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (getcwd (getcwdbuf + 2, l0 + sizeof getcwdbuf)
+ != getcwdbuf + 2)
+ FAIL ();
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ if (getcwd (getcwdbuf + 2, sizeof getcwdbuf)
+ != getcwdbuf + 2)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ if (getwd (getcwdbuf) != getcwdbuf
+ || strcmp (getcwdbuf, fname) != 0)
+ FAIL ();
+
+ if (getwd (getcwdbuf + 1) != getcwdbuf + 1
+ || strcmp (getcwdbuf + 1, fname) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ if (getwd (getcwdbuf + 2) != getcwdbuf + 2)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+ }
+
+ if (chdir (cwd1) != 0)
+ FAIL ();
+ free (cwd3);
+ }
+
+ free (cwd1);
+ free (cwd2);
+ *enddir = '/';
+ if (unlink (fname) != 0)
+ FAIL ();
+
+ *enddir = '\0';
+ if (rmdir (fname) != 0)
+ FAIL ();
+
+
+#if PATH_MAX > 0
+ char largebuf[PATH_MAX];
+ char *realres = realpath (".", largebuf);
+ if (realres != largebuf)
+ FAIL ();
+
+# if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char realbuf[1];
+ realres = realpath (".", realbuf);
+ if (realres != realbuf)
+ FAIL ();
+ CHK_FAIL_END
+# endif
+#endif
+
+ if (setlocale (LC_ALL, "de_DE.UTF-8") != NULL)
+ {
+ assert (MB_CUR_MAX <= 10);
+
+ /* First a simple test. */
+ char enough[10];
+ if (wctomb (enough, L'A') != 1)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ /* We know the wchar_t encoding is ISO 10646. So pick a
+ character which has a multibyte representation which does not
+ fit. */
+ CHK_FAIL_START
+ char smallbuf[2];
+ if (wctomb (smallbuf, L'\x100') != 2)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ mbstate_t s;
+ memset (&s, '\0', sizeof (s));
+ if (wcrtomb (enough, L'D', &s) != 1 || enough[0] != 'D')
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ /* We know the wchar_t encoding is ISO 10646. So pick a
+ character which has a multibyte representation which does not
+ fit. */
+ CHK_FAIL_START
+ char smallbuf[2];
+ if (wcrtomb (smallbuf, L'\x100', &s) != 2)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+
+ wchar_t wenough[10];
+ memset (&s, '\0', sizeof (s));
+ const char *cp = "A";
+ if (mbsrtowcs (wenough, &cp, 10, &s) != 1
+ || wcscmp (wenough, L"A") != 0)
+ FAIL ();
+
+ cp = "BC";
+ if (mbsrtowcs (wenough, &cp, l0 + 10, &s) != 2
+ || wcscmp (wenough, L"BC") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ wchar_t wsmallbuf[2];
+ cp = "ABC";
+ mbsrtowcs (wsmallbuf, &cp, 10, &s);
+ CHK_FAIL_END
+#endif
+
+ /* Bug 29030 regresion check */
+ cp = "HelloWorld";
+ if (mbsrtowcs (NULL, &cp, (size_t)-1, &s) != 10)
+ FAIL ();
+
+ cp = "A";
+ if (mbstowcs (wenough, cp, 10) != 1
+ || wcscmp (wenough, L"A") != 0)
+ FAIL ();
+
+ cp = "DEF";
+ if (mbstowcs (wenough, cp, l0 + 10) != 3
+ || wcscmp (wenough, L"DEF") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ wchar_t wsmallbuf[2];
+ cp = "ABC";
+ mbstowcs (wsmallbuf, cp, 10);
+ CHK_FAIL_END
+#endif
+
+ memset (&s, '\0', sizeof (s));
+ cp = "ABC";
+ wcscpy (wenough, L"DEF");
+ if (mbsnrtowcs (wenough, &cp, 1, 10, &s) != 1
+ || wcscmp (wenough, L"AEF") != 0)
+ FAIL ();
+
+ cp = "IJ";
+ if (mbsnrtowcs (wenough, &cp, 1, l0 + 10, &s) != 1
+ || wcscmp (wenough, L"IEF") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ wchar_t wsmallbuf[2];
+ cp = "ABC";
+ mbsnrtowcs (wsmallbuf, &cp, 3, 10, &s);
+ CHK_FAIL_END
+#endif
+
+ memset (&s, '\0', sizeof (s));
+ const wchar_t *wcp = L"A";
+ if (wcsrtombs (enough, &wcp, 10, &s) != 1
+ || strcmp (enough, "A") != 0)
+ FAIL ();
+
+ wcp = L"BC";
+ if (wcsrtombs (enough, &wcp, l0 + 10, &s) != 2
+ || strcmp (enough, "BC") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[2];
+ wcp = L"ABC";
+ wcsrtombs (smallbuf, &wcp, 10, &s);
+ CHK_FAIL_END
+#endif
+
+ memset (enough, 'Z', sizeof (enough));
+ wcp = L"EF";
+ if (wcstombs (enough, wcp, 10) != 2
+ || strcmp (enough, "EF") != 0)
+ FAIL ();
+
+ wcp = L"G";
+ if (wcstombs (enough, wcp, l0 + 10) != 1
+ || strcmp (enough, "G") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[2];
+ wcp = L"ABC";
+ wcstombs (smallbuf, wcp, 10);
+ CHK_FAIL_END
+#endif
+
+ memset (&s, '\0', sizeof (s));
+ wcp = L"AB";
+ if (wcsnrtombs (enough, &wcp, 1, 10, &s) != 1
+ || strcmp (enough, "A") != 0)
+ FAIL ();
+
+ wcp = L"BCD";
+ if (wcsnrtombs (enough, &wcp, 1, l0 + 10, &s) != 1
+ || strcmp (enough, "B") != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[2];
+ wcp = L"ABC";
+ wcsnrtombs (smallbuf, &wcp, 3, 10, &s);
+ CHK_FAIL_END
+#endif
+ }
+ else
+ {
+ puts ("cannot set locale");
+ ret = 1;
+ }
+
+ int fd;
+
+#ifdef _GNU_SOURCE
+ fd = posix_openpt (O_RDWR);
+ if (fd != -1)
+ {
+ char enough[1000];
+ if (ptsname_r (fd, enough, sizeof (enough)) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[2];
+ if (ptsname_r (fd, smallbuf, sizeof (smallbuf) + 1) == 0)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+ close (fd);
+ }
+#endif
+
+#if PATH_MAX > 0
+ confstr (_CS_GNU_LIBC_VERSION, largebuf, sizeof (largebuf));
+# if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[1];
+ confstr (_CS_GNU_LIBC_VERSION, smallbuf, sizeof (largebuf));
+ CHK_FAIL_END
+# endif
+#endif
+
+ gid_t grpslarge[5];
+ int ngr = getgroups (5, grpslarge);
+ asm volatile ("" : : "r" (ngr));
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[1];
+ ngr = getgroups (5, (gid_t *) smallbuf);
+ asm volatile ("" : : "r" (ngr));
+ CHK_FAIL_END
+#endif
+
+ fd = open (_PATH_TTY, O_RDONLY);
+ if (fd != -1)
+ {
+ char enough[1000];
+ if (ttyname_r (fd, enough, sizeof (enough)) != 0)
+ FAIL ();
+
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[2];
+ if (ttyname_r (fd, smallbuf, sizeof (smallbuf) + 1) == 0)
+ FAIL ();
+ CHK_FAIL_END
+#endif
+ close (fd);
+ }
+
+ char hostnamelarge[1000];
+ gethostname (hostnamelarge, sizeof (hostnamelarge));
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[1];
+ gethostname (smallbuf, sizeof (hostnamelarge));
+ CHK_FAIL_END
+#endif
+
+ char loginlarge[1000];
+ getlogin_r (loginlarge, sizeof (hostnamelarge));
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[1];
+ getlogin_r (smallbuf, sizeof (loginlarge));
+ CHK_FAIL_END
+#endif
+
+ char domainnamelarge[1000];
+ int res = getdomainname (domainnamelarge, sizeof (domainnamelarge));
+ asm volatile ("" : : "r" (res));
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ char smallbuf[1];
+ res = getdomainname (smallbuf, sizeof (domainnamelarge));
+ asm volatile ("" : : "r" (res));
+ CHK_FAIL_END
+#endif
+
+ fd_set s;
+ FD_ZERO (&s);
+
+ FD_SET (FD_SETSIZE - 1, &s);
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ FD_SET (FD_SETSIZE, &s);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ FD_SET (l0 + FD_SETSIZE, &s);
+ CHK_FAIL_END
+#endif
+
+ FD_CLR (FD_SETSIZE - 1, &s);
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ FD_CLR (FD_SETSIZE, &s);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ FD_SET (l0 + FD_SETSIZE, &s);
+ CHK_FAIL_END
+#endif
+
+ FD_ISSET (FD_SETSIZE - 1, &s);
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ FD_ISSET (FD_SETSIZE, &s);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ FD_ISSET (l0 + FD_SETSIZE, &s);
+ CHK_FAIL_END
+#endif
+
+ struct pollfd fds[1];
+ fds[0].fd = STDOUT_FILENO;
+ fds[0].events = POLLOUT;
+ poll (fds, 1, 0);
+#if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ poll (fds, 2, 0);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ poll (fds, l0 + 2, 0);
+ CHK_FAIL_END
+#endif
+#ifdef _GNU_SOURCE
+ ppoll (fds, 1, NULL, NULL);
+# if __USE_FORTIFY_LEVEL >= 1
+ CHK_FAIL_START
+ ppoll (fds, 2, NULL, NULL);
+ CHK_FAIL_END
+
+ CHK_FAIL_START
+ ppoll (fds, l0 + 2, NULL, NULL);
+ CHK_FAIL_END
+# endif
+#endif
+
+ return ret;
+}
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk1.c"
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk2.c"
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk3.c"
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk1.c"
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk2.c"
+++ /dev/null
-#define _FILE_OFFSET_BITS 64
-#include "tst-chk3.c"
--- /dev/null
+/* Smoke test to verify that realpath does not cause spurious warnings.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include <support/check.h>
+#include <support/support.h>
+
+static int
+do_test (void)
+{
+#ifdef PATH_MAX
+ char buf[PATH_MAX + 1];
+ char *res = realpath (".", buf);
+ TEST_VERIFY (res == buf);
+#endif
+
+ return 0;
+}
+
+#include <support/test-driver.c>
tststatic4-ENV = $(tststatic-ENV)
tststatic5-ENV = $(tststatic-ENV)
+tests-internal += \
+ tst-dlinfo-phdr \
+ # tests-internal
+
ifneq (,$(CXX))
modules-names += bug-atexit3-lib
else
__dladdr (const void *address, Dl_info *info)
{
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dladdr (address, info);
#endif
return _dl_addr (address, info, NULL, NULL);
__dladdr1 (const void *address, Dl_info *info, void **extra, int flags)
{
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dladdr1 (address, info, extra, flags);
#endif
__dlclose (void *handle)
{
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlclose (handle);
#endif
__dlerror (void)
{
# ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlerror ();
# endif
segment, or if the calling thread has not allocated a block for it. */
RTLD_DI_TLS_DATA = 10,
- RTLD_DI_MAX = 10
+ /* Treat ARG as const ElfW(Phdr) **, and store the address of the
+ program header array at that location. The dlinfo call returns
+ the number of program headers in the array. */
+ RTLD_DI_PHDR = 11,
+
+ RTLD_DI_MAX = 11
};
void *handle;
int request;
void *arg;
+
+ /* This is the value that is returned from dlinfo if no error is
+ signaled. */
+ int result;
};
static void
{
case RTLD_DI_CONFIGADDR:
default:
+ args->result = -1;
_dl_signal_error (0, NULL, NULL, N_("unsupported dlinfo request"));
break;
*(void **) args->arg = data;
break;
}
+
+ case RTLD_DI_PHDR:
+ *(const ElfW(Phdr) **) args->arg = l->l_phdr;
+ args->result = l->l_phnum;
+ break;
}
}
dlinfo_implementation (void *handle, int request, void *arg)
{
struct dlinfo_args args = { handle, request, arg };
- return _dlerror_run (&dlinfo_doit, &args) ? -1 : 0;
+ _dlerror_run (&dlinfo_doit, &args);
+ return args.result;
}
#ifdef SHARED
int
___dlinfo (void *handle, int request, void *arg)
{
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlinfo (handle, request, arg);
else
return dlinfo_implementation (handle, request, arg);
void *
___dlmopen (Lmid_t nsid, const char *file, int mode)
{
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlmopen (nsid, file, mode, RETURN_ADDRESS (0));
else
return dlmopen_implementation (nsid, file, mode, RETURN_ADDRESS (0));
void *
___dlopen (const char *file, int mode)
{
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0));
else
return dlopen_implementation (file, mode, RETURN_ADDRESS (0));
void *
__dlopen (const char *file, int mode, void *dl_caller)
{
- return dlopen_implementation (file, mode, RETURN_ADDRESS (0));
+ return dlopen_implementation (file, mode, dl_caller);
}
void *
mode |= RTLD_LAZY;
args.mode = mode;
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlopen (file, mode, RETURN_ADDRESS (0));
return _dlerror_run (dlopen_doit, &args) ? NULL : args.new;
void *
___dlsym (void *handle, const char *name)
{
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlsym (handle, name, RETURN_ADDRESS (0));
else
return dlsym_implementation (handle, name, RETURN_ADDRESS (0));
void *
___dlvsym (void *handle, const char *name, const char *version)
{
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->dlvsym (handle, name, version,
RETURN_ADDRESS (0));
else
--- /dev/null
+/* Test for dlinfo (RTLD_DI_PHDR).
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <link.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+/* Used to verify that the program header array appears as expected
+ among the dl_iterate_phdr callback invocations. */
+
+struct dlip_callback_args
+{
+ struct link_map *l; /* l->l_addr is used to find the object. */
+ const ElfW(Phdr) *phdr; /* Expected program header pointed. */
+ int phnum; /* Expected program header count. */
+ bool found; /* True if l->l_addr has been found. */
+};
+
+static int
+dlip_callback (struct dl_phdr_info *dlpi, size_t size, void *closure)
+{
+ TEST_COMPARE (sizeof (*dlpi), size);
+ struct dlip_callback_args *args = closure;
+
+ if (dlpi->dlpi_addr == args->l->l_addr)
+ {
+ TEST_VERIFY (!args->found);
+ args->found = true;
+ TEST_VERIFY (args->phdr == dlpi->dlpi_phdr);
+ TEST_COMPARE (args->phnum, dlpi->dlpi_phnum);
+ }
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ /* Avoid a copy relocation. */
+ struct r_debug *debug = xdlsym (RTLD_DEFAULT, "_r_debug");
+ struct link_map *l = (struct link_map *) debug->r_map;
+ TEST_VERIFY_EXIT (l != NULL);
+
+ do
+ {
+ printf ("info: checking link map %p (%p) for \"%s\"\n",
+ l, l->l_phdr, l->l_name);
+
+ /* Cause dlerror () to return an error message. */
+ dlsym (RTLD_DEFAULT, "does-not-exist");
+
+ /* Use the extension that link maps are valid dlopen handles. */
+ const ElfW(Phdr) *phdr;
+ int phnum = dlinfo (l, RTLD_DI_PHDR, &phdr);
+ TEST_VERIFY (phnum >= 0);
+ /* Verify that the error message has been cleared. */
+ TEST_COMPARE_STRING (dlerror (), NULL);
+
+ TEST_VERIFY (phdr == l->l_phdr);
+ TEST_COMPARE (phnum, l->l_phnum);
+
+ /* Check that we can find PT_DYNAMIC among the array. */
+ {
+ bool dynamic_found = false;
+ for (int i = 0; i < phnum; ++i)
+ if (phdr[i].p_type == PT_DYNAMIC)
+ {
+ dynamic_found = true;
+ TEST_COMPARE ((ElfW(Addr)) l->l_ld, l->l_addr + phdr[i].p_vaddr);
+ }
+ TEST_VERIFY (dynamic_found);
+ }
+
+ /* Check that dl_iterate_phdr finds the link map with the same
+ program headers. */
+ {
+ struct dlip_callback_args args =
+ {
+ .l = l,
+ .phdr = phdr,
+ .phnum = phnum,
+ .found = false,
+ };
+ TEST_COMPARE (dl_iterate_phdr (dlip_callback, &args), 0);
+ TEST_VERIFY (args.found);
+ }
+
+ if (l->l_prev == NULL)
+ {
+ /* This is the executable, so the information is also
+ available via getauxval. */
+ TEST_COMPARE_STRING (l->l_name, "");
+ TEST_VERIFY (phdr == (const ElfW(Phdr) *) getauxval (AT_PHDR));
+ TEST_COMPARE (phnum, getauxval (AT_PHNUM));
+ }
+
+ l = l->l_next;
+ }
+ while (l != NULL);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
include ../Makeconfig
-headers = elf.h bits/elfclass.h link.h bits/link.h
-routines = $(all-dl-routines) dl-support dl-iteratephdr \
- dl-addr dl-addr-obj enbl-secure dl-profstub \
- dl-origin dl-libc dl-sym dl-sysdep dl-error \
- dl-reloc-static-pie libc_early_init rtld_static_init
+headers = \
+ bits/elfclass.h \
+ bits/link.h \
+ bits/link_lavcurrent.h \
+ elf.h \
+ link.h \
+ # headers
+
+routines = \
+ $(all-dl-routines) \
+ dl-addr \
+ dl-addr-obj \
+ dl-early_allocate \
+ dl-error \
+ dl-iteratephdr \
+ dl-libc \
+ dl-origin \
+ dl-profstub \
+ dl-reloc-static-pie \
+ dl-support \
+ dl-sym \
+ dl-sysdep \
+ enbl-secure \
+ libc_early_init \
+ rtld_static_init \
+ # routines
# The core dynamic linking functions are in libc for the static and
# profiled libraries.
-dl-routines = $(addprefix dl-,load lookup object reloc deps \
- runtime init fini debug misc \
- version profile tls origin scope \
- execstack open close trampoline \
- exception sort-maps lookup-direct \
- call-libc-early-init write \
- thread_gscope_wait tls_init_tp)
+dl-routines = \
+ dl-call-libc-early-init \
+ dl-close \
+ dl-debug \
+ dl-deps \
+ dl-exception \
+ dl-execstack \
+ dl-fini \
+ dl-init \
+ dl-load \
+ dl-lookup \
+ dl-lookup-direct \
+ dl-misc \
+ dl-object \
+ dl-open \
+ dl-origin \
+ dl-profile \
+ dl-reloc \
+ dl-runtime \
+ dl-scope \
+ dl-sort-maps \
+ dl-thread_gscope_wait \
+ dl-tls \
+ dl-tls_init_tp \
+ dl-trampoline \
+ dl-version \
+ dl-write \
+ # dl-routines
+
ifeq (yes,$(use-ldconfig))
dl-routines += dl-cache
endif
all-dl-routines = $(dl-routines) $(sysdep-dl-routines)
# But they are absent from the shared libc, because that code is in ld.so.
-elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
- dl-sysdep dl-exception dl-reloc-static-pie \
- thread_gscope_wait rtld_static_init
+elide-routines.os = \
+ $(all-dl-routines) \
+ dl-early_allocate \
+ dl-exception \
+ dl-origin \
+ dl-reloc-static-pie \
+ dl-support \
+ dl-sysdep \
+ enbl-secure \
+ rtld_static_init \
+ thread_gscope_wait \
+ # elide-routines.os
# ld.so uses those routines, plus some special stuff for being the program
# interpreter and operating independent of libc.
-rtld-routines = rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
- dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
- dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \
- dl-mutex
+rtld-routines = \
+ $(all-dl-routines) \
+ dl-audit \
+ dl-compat \
+ dl-conflict \
+ dl-diagnostics \
+ dl-diagnostics-cpu \
+ dl-diagnostics-kernel \
+ dl-environ \
+ dl-error-minimal \
+ dl-hwcaps \
+ dl-hwcaps-subdirs \
+ dl-hwcaps_split \
+ dl-minimal \
+ dl-mutex \
+ dl-sysdep \
+ dl-usage \
+ rtld \
+ # rtld-routines
+
all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns
endif
+ifeq (yes,$(have-loop-to-function))
+# Likewise, during static library startup, memset is not yet available.
+CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
+endif
+
# Compile rtld itself without stack protection.
# Also compile all routines in the static library that are elided from
# the shared libc because they are in libc.a in the same way.
endif
ifeq (yes,$(build-shared))
-extra-objs = $(all-rtld-routines:%=%.os) sofini.os interp.os
-generated += librtld.os dl-allobjs.os ld.so ldd
+extra-objs = \
+ $(all-rtld-routines:%=%.os) \
+ sofini.os \
+ interp.os \
+ # extra-objs
+generated += \
+ dl-allobjs.os \
+ ld.so ldd \
+ librtld.os \
+ # generated
install-others = $(inst_rtlddir)/$(rtld-installed-name)
install-bin-script = ldd
endif
others += ldconfig
install-rootsbin += ldconfig
-ldconfig-modules := cache readlib xmalloc xstrdup chroot_canon static-stubs \
- stringtable
+ldconfig-modules := \
+ cache \
+ chroot_canon \
+ readlib \
+ static-stubs \
+ stringtable \
+ xmalloc \
+ xstrdup \
+ # ldconfig-modules
extra-objs += $(ldconfig-modules:=.o)
others-extras = $(ldconfig-modules)
endif
$(do-install-program)
endif
-tests-static-normal := tst-array1-static tst-array5-static \
- tst-dl-iter-static \
- tst-tlsalign-static tst-tlsalign-extern-static \
- tst-linkall-static tst-env-setuid tst-env-setuid-tunables \
- tst-single_threaded-static tst-single_threaded-pthread-static \
- tst-dst-static tst-getauxval-static
-
-tests-static-internal := tst-tls1-static tst-tls2-static \
- tst-ptrguard1-static tst-stackguard1-static \
- tst-tls1-static-non-pie
+tests-static-normal := \
+ tst-array1-static \
+ tst-array5-static \
+ tst-dl-iter-static \
+ tst-dst-static \
+ tst-env-setuid \
+ tst-env-setuid-tunables \
+ tst-getauxval-static \
+ tst-linkall-static \
+ tst-single_threaded-pthread-static \
+ tst-single_threaded-static \
+ tst-tls-allocation-failure-static \
+ tst-tlsalign-extern-static \
+ tst-tlsalign-static \
+ # tests-static-normal
+
+tests-static-internal := \
+ tst-ptrguard1-static \
+ tst-stackguard1-static \
+ tst-tls1-static \
+ tst-tls1-static-non-pie \
+ tst-tls2-static \
+ # tests-static-internal
CRT-tst-tls1-static-non-pie := $(csu-objpfx)crt1.o
tst-tls1-static-non-pie-no-pie = yes
tests-container := \
- tst-ldconfig-bad-aux-cache \
- tst-ldconfig-ld_so_conf-update
+ tst-ldconfig-bad-aux-cache \
+ tst-ldconfig-ld_so_conf-update \
+ # tests-container
ifeq (no,$(build-hardcoded-path-in-tests))
# This is an ld.so.cache test, and RPATH/RUNPATH in the executable
tests-container += tst-glibc-hwcaps-prepend-cache
endif
-tests := tst-tls9 tst-leaks1 \
- tst-array1 tst-array2 tst-array3 tst-array4 tst-array5 \
- tst-auxv tst-stringtable
-tests-internal := tst-tls1 tst-tls2 $(tests-static-internal)
+tests := \
+ tst-array1 \
+ tst-array2 \
+ tst-array3 \
+ tst-array4 \
+ tst-array5 \
+ tst-auxv \
+ tst-leaks1 \
+ tst-stringtable \
+ tst-tls9 \
+ # tests
+
+tests-internal := \
+ $(tests-static-internal) \
+ tst-tls1 \
+ tst-tls2 \
+ # tests-internal
+
tests-static := $(tests-static-normal) $(tests-static-internal)
ifeq (yes,$(build-shared))
-tests-static += tst-tls9-static tst-single_threaded-static-dlopen
+tests-static += \
+ tst-single_threaded-static-dlopen \
+ tst-tls9-static \
+ # tests-static
+
static-dlopen-environment = \
LD_LIBRARY_PATH=$(ld-library-path):$(common-objpfx)dlfcn
tst-tls9-static-ENV = $(static-dlopen-environment)
tst-single_threaded-static-dlopen-ENV = $(static-dlopen-environment)
-tests += restest1 preloadtest loadfail multiload origtest resolvfail \
- constload1 order noload filter \
- reldep reldep2 reldep3 reldep4 nodelete nodelete2 \
- nodlopen nodlopen2 lateglobal initfirst global \
- restest2 next dblload dblunload reldep5 reldep6 reldep7 reldep8 \
- tst-tls4 tst-tls5 \
- tst-tls10 tst-tls11 tst-tls12 tst-tls13 tst-tls14 tst-tls15 \
- tst-tls16 tst-tls17 tst-tls18 tst-tls19 tst-tls-dlinfo \
- tst-align tst-align2 \
- tst-dlmodcount tst-dlopenrpath tst-deep1 \
- tst-dlmopen1 tst-dlmopen3 \
- unload3 unload4 unload5 unload6 unload7 unload8 tst-global1 order2 \
- tst-audit1 tst-audit2 tst-audit8 tst-audit9 \
- tst-addr1 tst-thrlock \
- tst-unique1 tst-unique2 $(if $(CXX),tst-unique3 tst-unique4 \
- tst-nodelete tst-dlopen-nodelete-reloc) \
- tst-initorder tst-initorder2 tst-relsort1 tst-null-argv \
- tst-tlsalign tst-tlsalign-extern tst-nodelete-opened \
- tst-nodelete2 tst-audit11 tst-audit12 tst-dlsym-error tst-noload \
- tst-latepthread tst-tls-manydynamic tst-nodelete-dlclose \
- tst-debug1 tst-main1 tst-absolute-sym tst-absolute-zero tst-big-note \
- tst-unwind-ctor tst-unwind-main tst-audit13 \
- tst-sonamemove-link tst-sonamemove-dlopen tst-dlopen-tlsmodid \
- tst-dlopen-self tst-auditmany tst-initfinilazyfail tst-dlopenfail \
- tst-dlopenfail-2 \
- tst-filterobj tst-filterobj-dlopen tst-auxobj tst-auxobj-dlopen \
- tst-audit14 tst-audit15 tst-audit16 tst-audit17 \
- tst-single_threaded tst-single_threaded-pthread \
- tst-tls-ie tst-tls-ie-dlmopen argv0test \
- tst-glibc-hwcaps tst-glibc-hwcaps-prepend tst-glibc-hwcaps-mask \
- tst-tls20 tst-tls21 tst-dlmopen-dlerror tst-dlmopen-gethostbyname \
- tst-dl-is_dso
-# reldep9
-tests-internal += loadtest unload unload2 circleload1 \
- neededtest neededtest2 neededtest3 neededtest4 \
- tst-tls3 tst-tls6 tst-tls7 tst-tls8 tst-dlmopen2 \
- tst-ptrguard1 tst-stackguard1 \
- tst-create_format1 tst-tls-surplus tst-dl-hwcaps_split
-tests-container += tst-pldd tst-dlopen-tlsmodid-container \
- tst-dlopen-self-container tst-preload-pthread-libc
-test-srcs = tst-pathopt
+tests += \
+ argv0test \
+ constload1 \
+ dblload \
+ dblunload \
+ filter \
+ global \
+ initfirst \
+ lateglobal \
+ loadfail \
+ multiload \
+ next \
+ nodelete \
+ nodelete2 \
+ nodlopen \
+ nodlopen2 \
+ noload \
+ order \
+ order2 \
+ origtest \
+ preloadtest \
+ reldep \
+ reldep2 \
+ reldep3 \
+ reldep4 \
+ reldep5 \
+ reldep6 \
+ reldep7 \
+ reldep8 \
+ resolvfail \
+ restest1 \
+ restest2 \
+ tst-absolute-sym \
+ tst-absolute-zero \
+ tst-addr1 \
+ tst-align \
+ tst-align2 \
+ tst-audit1 \
+ tst-audit2 \
+ tst-audit8 \
+ tst-audit9 \
+ tst-audit11 \
+ tst-audit12 \
+ tst-audit13 \
+ tst-audit14 \
+ tst-audit15 \
+ tst-audit16 \
+ tst-audit17 \
+ tst-audit18 \
+ tst-audit19b \
+ tst-audit20 \
+ tst-audit21 \
+ tst-audit22 \
+ tst-audit23 \
+ tst-audit24a \
+ tst-audit24b \
+ tst-audit24c \
+ tst-audit24d \
+ tst-audit25a \
+ tst-audit25b \
+ tst-audit26 \
+ tst-auditmany \
+ tst-auxobj \
+ tst-auxobj-dlopen \
+ tst-big-note \
+ tst-debug1 \
+ tst-deep1 \
+ tst-dl-is_dso \
+ tst-dlmodcount \
+ tst-dlmopen1 \
+ tst-dlmopen3 \
+ tst-dlmopen-dlerror \
+ tst-dlmopen-gethostbyname \
+ tst-dlopenfail \
+ tst-dlopenfail-2 \
+ tst-dlopenrpath \
+ tst-dlopen-self \
+ tst-dlopen-tlsmodid \
+ tst-dlsym-error \
+ tst-filterobj \
+ tst-filterobj-dlopen \
+ tst-glibc-hwcaps \
+ tst-glibc-hwcaps-mask \
+ tst-glibc-hwcaps-prepend \
+ tst-global1 \
+ tst-initfinilazyfail \
+ tst-initorder \
+ tst-initorder2 \
+ tst-latepthread \
+ tst-main1 \
+ tst-nodelete2 \
+ tst-nodelete-dlclose \
+ tst-nodelete-opened \
+ tst-noload \
+ tst-null-argv \
+ tst-relsort1 \
+ tst-ro-dynamic \
+ tst-single_threaded \
+ tst-single_threaded-pthread \
+ tst-sonamemove-dlopen \
+ tst-sonamemove-link \
+ tst-thrlock \
+ tst-tls10 \
+ tst-tls11 \
+ tst-tls12 \
+ tst-tls13 \
+ tst-tls14 \
+ tst-tls15 \
+ tst-tls16 \
+ tst-tls17 \
+ tst-tls18 \
+ tst-tls19 \
+ tst-tls20 \
+ tst-tls21 \
+ tst-tls4 \
+ tst-tls5 \
+ tst-tlsalign \
+ tst-tlsalign-extern \
+ tst-tls-dlinfo \
+ tst-tls-ie \
+ tst-tls-ie-dlmopen \
+ tst-tls-manydynamic \
+ tst-unique1 \
+ tst-unique2 \
+ tst-unwind-ctor \
+ tst-unwind-main \
+ unload3 \
+ unload4 \
+ unload5 \
+ unload6 \
+ unload7 \
+ unload8 \
+ # tests
+tests-cxx = \
+ tst-dlopen-nodelete-reloc \
+ tst-nodelete \
+ tst-unique3 \
+ tst-unique4 \
+ # tests-cxx
+
+tests += $(if $(CXX),$(tests-cxx))
+tests-internal += \
+ circleload1 \
+ loadtest \
+ neededtest \
+ neededtest2 \
+ neededtest3 \
+ neededtest4 \
+ tst-audit19a \
+ tst-create_format1 \
+ tst-dl-hwcaps_split \
+ tst-dlmopen2 \
+ tst-ptrguard1 \
+ tst-stackguard1 \
+ tst-tls-surplus \
+ tst-tls3 \
+ tst-tls6 \
+ tst-tls7 \
+ tst-tls8 \
+ unload \
+ unload2 \
+ # tests-internal
+
+tests-container += \
+ tst-dlopen-self-container \
+ tst-dlopen-tlsmodid-container \
+ tst-pldd \
+ tst-preload-pthread-libc \
+ # tests-container
+
+test-srcs = \
+ tst-pathopt \
+ # tests-srcs
+
selinux-enabled := $(shell cat /selinux/enforce 2> /dev/null)
+
ifneq ($(selinux-enabled),1)
-tests-execstack-yes = tst-execstack tst-execstack-needed tst-execstack-prog
+tests-execstack-yes = \
+ tst-execstack \
+ tst-execstack-needed \
+ tst-execstack-prog \
+ # tests-execstack-yes
endif
endif
+
+tests-special += $(objpfx)tst-relro-ldso.out $(objpfx)tst-relro-libc.out
+$(objpfx)tst-relro-ldso.out: tst-relro-symbols.py $(..)/scripts/glibcelf.py \
+ $(objpfx)ld.so
+ $(PYTHON) tst-relro-symbols.py $(objpfx)ld.so \
+ --required=_rtld_global_ro \
+ > $@ 2>&1; $(evaluate-test)
+# The optional symbols are present in libc only if the architecture has
+# the GLIBC_2.0 symbol set in libc.
+$(objpfx)tst-relro-libc.out: tst-relro-symbols.py $(..)/scripts/glibcelf.py \
+ $(common-objpfx)libc.so
+ $(PYTHON) tst-relro-symbols.py $(common-objpfx)libc.so \
+ --required=_IO_cookie_jumps \
+ --required=_IO_file_jumps \
+ --required=_IO_file_jumps_maybe_mmap \
+ --required=_IO_file_jumps_mmap \
+ --required=_IO_helper_jumps \
+ --required=_IO_mem_jumps \
+ --required=_IO_obstack_jumps \
+ --required=_IO_proc_jumps \
+ --required=_IO_str_chk_jumps \
+ --required=_IO_str_jumps \
+ --required=_IO_strn_jumps \
+ --required=_IO_wfile_jumps \
+ --required=_IO_wfile_jumps_maybe_mmap \
+ --required=_IO_wfile_jumps_mmap \
+ --required=_IO_wmem_jumps \
+ --required=_IO_wstr_jumps \
+ --required=_IO_wstrn_jumps \
+ --optional=_IO_old_cookie_jumps \
+ --optional=_IO_old_file_jumps \
+ --optional=_IO_old_proc_jumps \
+ > $@ 2>&1; $(evaluate-test)
+
tests += $(tests-execstack-$(have-z-execstack))
ifeq ($(run-built-tests),yes)
-tests-special += $(objpfx)tst-leaks1-mem.out \
- $(objpfx)noload-mem.out \
- $(objpfx)tst-ldconfig-X.out $(objpfx)tst-rtld-help.out
+tests-special += \
+ $(objpfx)noload-mem.out \
+ $(objpfx)tst-ldconfig-X.out \
+ $(objpfx)tst-leaks1-mem.out \
+ $(objpfx)tst-rtld-help.out \
+ # tests-special
endif
tlsmod17a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
tlsmod18a-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
tst-tls-many-dynamic-modules-dep-bad-suffixes = 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
tst-tls-many-dynamic-modules-dep-bad = \
$(foreach n,$(tst-tls-many-dynamic-modules-dep-bad-suffixes),tst-tls-manydynamic$(n)mod-dep-bad)
-extra-test-objs += $(tlsmod17a-modules:=.os) $(tlsmod18a-modules:=.os) \
- tst-tlsalign-vars.o
-test-extras += tst-tlsmod17a tst-tlsmod18a tst-tlsalign-vars
-modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
- testobj1_1 failobj constload2 constload3 unloadmod \
- dep1 dep2 dep3 dep4 vismod1 vismod2 vismod3 \
- nodelmod1 nodelmod2 nodelmod3 nodelmod4 \
- nodel2mod1 nodel2mod2 nodel2mod3 \
- nodlopenmod nodlopenmod2 filtmod1 filtmod2 \
- reldepmod1 reldepmod2 reldepmod3 reldepmod4 nextmod1 nextmod2 \
- reldep4mod1 reldep4mod2 reldep4mod3 reldep4mod4 \
- neededobj1 neededobj2 neededobj3 neededobj4 \
- neededobj5 neededobj6 firstobj globalmod1 \
- unload2mod unload2dep ltglobmod1 ltglobmod2 pathoptobj \
- dblloadmod1 dblloadmod2 dblloadmod3 reldepmod5 reldepmod6 \
- reldep6mod0 reldep6mod1 reldep6mod2 reldep6mod3 reldep6mod4 \
- reldep7mod1 reldep7mod2 \
- tst-tlsmod1 tst-tlsmod2 tst-tlsmod3 tst-tlsmod4 \
- tst-tlsmod5 tst-tlsmod6 tst-tlsmod7 tst-tlsmod8 \
- tst-tlsmod9 tst-tlsmod10 tst-tlsmod11 tst-tlsmod12 \
- tst-tlsmod13 tst-tlsmod13a tst-tlsmod14a tst-tlsmod14b \
- tst-tlsmod15a tst-tlsmod15b tst-tlsmod16a tst-tlsmod16b \
- $(tlsmod17a-modules) tst-tlsmod17b $(tlsmod18a-modules) \
- tst-tls19mod1 tst-tls19mod2 tst-tls19mod3 \
- circlemod1 circlemod1a circlemod2 circlemod2a \
- circlemod3 circlemod3a \
- reldep8mod1 reldep8mod2 reldep8mod3 \
- reldep9mod1 reldep9mod2 reldep9mod3 \
- tst-alignmod tst-alignmod2 \
- $(modules-execstack-$(have-z-execstack)) \
- tst-dlopenrpathmod tst-deep1mod1 tst-deep1mod2 tst-deep1mod3 \
- tst-dlmopen1mod tst-auditmod1 \
- unload3mod1 unload3mod2 unload3mod3 unload3mod4 \
- unload4mod1 unload4mod2 unload4mod3 unload4mod4 \
- unload6mod1 unload6mod2 unload6mod3 \
- unload7mod1 unload7mod2 \
- unload8mod1 unload8mod1x unload8mod2 unload8mod3 \
- order2mod1 order2mod2 order2mod3 order2mod4 \
- tst-unique1mod1 tst-unique1mod2 \
- tst-unique2mod1 tst-unique2mod2 \
- tst-auditmod9a tst-auditmod9b \
- $(if $(CXX),tst-unique3lib tst-unique3lib2 tst-unique4lib \
- tst-nodelete-uniquemod tst-nodelete-rtldmod \
- tst-nodelete-zmod \
- tst-dlopen-nodelete-reloc-mod1 \
- tst-dlopen-nodelete-reloc-mod2 \
- tst-dlopen-nodelete-reloc-mod3 \
- tst-dlopen-nodelete-reloc-mod4 \
- tst-dlopen-nodelete-reloc-mod5 \
- tst-dlopen-nodelete-reloc-mod6 \
- tst-dlopen-nodelete-reloc-mod7 \
- tst-dlopen-nodelete-reloc-mod8 \
- tst-dlopen-nodelete-reloc-mod9 \
- tst-dlopen-nodelete-reloc-mod10 \
- tst-dlopen-nodelete-reloc-mod11 \
- tst-dlopen-nodelete-reloc-mod12 \
- tst-dlopen-nodelete-reloc-mod13 \
- tst-dlopen-nodelete-reloc-mod14 \
- tst-dlopen-nodelete-reloc-mod15 \
- tst-dlopen-nodelete-reloc-mod16 \
- tst-dlopen-nodelete-reloc-mod17) \
- tst-initordera1 tst-initorderb1 \
- tst-initordera2 tst-initorderb2 \
- tst-initordera3 tst-initordera4 \
- tst-initorder2a tst-initorder2b tst-initorder2c \
- tst-initorder2d \
- tst-relsort1mod1 tst-relsort1mod2 tst-array2dep \
- tst-array5dep tst-null-argv-lib \
- tst-tlsalign-lib tst-nodelete-opened-lib tst-nodelete2mod \
- tst-audit11mod1 tst-audit11mod2 tst-auditmod11 \
- tst-audit12mod1 tst-audit12mod2 tst-audit12mod3 tst-auditmod12 \
- tst-latepthreadmod $(tst-tls-many-dynamic-modules) \
- $(tst-tls-many-dynamic-modules-dep) \
- $(tst-tls-many-dynamic-modules-dep-bad) \
- tst-nodelete-dlclose-dso tst-nodelete-dlclose-plugin \
- tst-main1mod tst-absolute-sym-lib \
- tst-absolute-zero-lib tst-big-note-lib tst-unwind-ctor-lib \
- tst-audit13mod1 tst-sonamemove-linkmod1 \
- tst-sonamemove-runmod1 tst-sonamemove-runmod2 \
- tst-auditmanymod1 tst-auditmanymod2 tst-auditmanymod3 \
- tst-auditmanymod4 tst-auditmanymod5 tst-auditmanymod6 \
- tst-auditmanymod7 tst-auditmanymod8 tst-auditmanymod9 \
- tst-initlazyfailmod tst-finilazyfailmod \
- tst-dlopenfailmod1 tst-dlopenfaillinkmod tst-dlopenfailmod2 \
- tst-dlopenfailmod3 tst-dlopenfailnodelmod tst-ldconfig-ld-mod \
- tst-filterobj-flt tst-filterobj-aux tst-filterobj-filtee \
- tst-auditlogmod-1 tst-auditlogmod-2 tst-auditlogmod-3 \
- tst-single_threaded-mod1 tst-single_threaded-mod2 \
- tst-single_threaded-mod3 tst-single_threaded-mod4 \
- tst-tls-ie-mod0 tst-tls-ie-mod1 tst-tls-ie-mod2 \
- tst-tls-ie-mod3 tst-tls-ie-mod4 tst-tls-ie-mod5 \
- tst-tls-ie-mod6 libmarkermod1-1 libmarkermod1-2 libmarkermod1-3 \
- libmarkermod2-1 libmarkermod2-2 \
- libmarkermod3-1 libmarkermod3-2 libmarkermod3-3 \
- libmarkermod4-1 libmarkermod4-2 libmarkermod4-3 libmarkermod4-4 \
- tst-tls20mod-bad tst-tls21mod tst-dlmopen-dlerror-mod \
- tst-auxvalmod \
- tst-dlmopen-gethostbyname-mod \
+extra-test-objs += \
+ $(tlsmod17a-modules:=.os) \
+ $(tlsmod18a-modules:=.os) \
+ tst-tlsalign-vars.o \
+ # extra-test-objs
+test-extras += \
+ tst-tlsalign-vars \
+ tst-tlsmod17a \
+ tst-tlsmod18a \
+ # test-extras
+modules-names = \
+ circlemod1 \
+ circlemod1a \
+ circlemod2 \
+ circlemod2a \
+ circlemod3 \
+ circlemod3a \
+ constload2 \
+ constload3 \
+ dblloadmod1 \
+ dblloadmod2 \
+ dblloadmod3 \
+ dep1 \
+ dep2 \
+ dep3 \
+ dep4 \
+ failobj \
+ filtmod1 \
+ filtmod2 \
+ firstobj \
+ globalmod1 \
+ libmarkermod1-1 \
+ libmarkermod1-2 \
+ libmarkermod1-3 \
+ libmarkermod2-1 \
+ libmarkermod2-2 \
+ libmarkermod3-1 \
+ libmarkermod3-2 \
+ libmarkermod3-3 \
+ libmarkermod4-1 \
+ libmarkermod4-2 \
+ libmarkermod4-3 \
+ libmarkermod4-4 \
+ libmarkermod5-1 \
+ libmarkermod5-2 \
+ libmarkermod5-3 \
+ libmarkermod5-4 \
+ libmarkermod5-5 \
+ ltglobmod1 \
+ ltglobmod2 \
+ neededobj1 \
+ neededobj2 \
+ neededobj3 \
+ neededobj4 \
+ neededobj5 \
+ neededobj6 \
+ nextmod1 \
+ nextmod2 \
+ nodel2mod1 \
+ nodel2mod2 \
+ nodel2mod3 \
+ nodelmod1 \
+ nodelmod2 \
+ nodelmod3 \
+ nodelmod4 \
+ nodlopenmod \
+ nodlopenmod2 \
+ order2mod1 \
+ order2mod2 \
+ order2mod3 \
+ order2mod4 \
+ pathoptobj \
+ reldep4mod1 \
+ reldep4mod2 \
+ reldep4mod3 \
+ reldep4mod4 \
+ reldep6mod0 \
+ reldep6mod1 \
+ reldep6mod2 \
+ reldep6mod3 \
+ reldep6mod4 \
+ reldep7mod1 \
+ reldep7mod2 \
+ reldep8mod1 \
+ reldep8mod2 \
+ reldep8mod3 \
+ reldep9mod1 \
+ reldep9mod2 \
+ reldep9mod3 \
+ reldepmod1 \
+ reldepmod2 \
+ reldepmod3 \
+ reldepmod4 \
+ reldepmod5 \
+ reldepmod6 \
+ testobj1 \
+ testobj1_1 \
+ testobj2 \
+ testobj3 \
+ testobj4 \
+ testobj5 \
+ testobj6 \
+ tst-absolute-sym-lib \
+ tst-absolute-zero-lib \
+ tst-alignmod \
+ tst-alignmod2 \
+ tst-array2dep \
+ tst-array5dep \
+ tst-audit11mod1 \
+ tst-audit11mod2 \
+ tst-audit12mod1 \
+ tst-audit12mod2 \
+ tst-audit12mod3 \
+ tst-audit13mod1 \
+ tst-audit18mod \
+ tst-audit19bmod \
+ tst-audit23mod \
+ tst-audit24amod1 \
+ tst-audit24amod2 \
+ tst-audit24bmod1 \
+ tst-audit24bmod2 \
+ tst-audit24dmod1 \
+ tst-audit24dmod2 \
+ tst-audit24dmod3 \
+ tst-audit24dmod4 \
+ tst-audit25mod1 \
+ tst-audit25mod2 \
+ tst-audit25mod3 \
+ tst-audit25mod4 \
+ tst-auditlogmod-1 \
+ tst-auditlogmod-2 \
+ tst-auditlogmod-3 \
+ tst-auditmanymod1 \
+ tst-auditmanymod2 \
+ tst-auditmanymod3 \
+ tst-auditmanymod4 \
+ tst-auditmanymod5 \
+ tst-auditmanymod6 \
+ tst-auditmanymod7 \
+ tst-auditmanymod8 \
+ tst-auditmanymod9 \
+ tst-auditmod1 \
+ tst-auditmod9a \
+ tst-auditmod9b \
+ tst-auditmod11 \
+ tst-auditmod12 \
+ tst-auditmod18 \
+ tst-auditmod19a \
+ tst-auditmod19b \
+ tst-auditmod20 \
+ tst-auditmod21a \
+ tst-auditmod21b \
+ tst-auditmod22 \
+ tst-auditmod23 \
+ tst-auditmod24a \
+ tst-auditmod24b \
+ tst-auditmod24c \
+ tst-auditmod24d \
+ tst-auditmod25 \
+ tst-auditmod26 \
+ tst-auxvalmod \
+ tst-big-note-lib \
+ tst-deep1mod1 \
+ tst-deep1mod2 \
+ tst-deep1mod3 \
+ tst-dlmopen1mod \
+ tst-dlmopen-dlerror-mod \
+ tst-dlmopen-gethostbyname-mod \
+ tst-dlopenfaillinkmod \
+ tst-dlopenfailmod1 \
+ tst-dlopenfailmod2 \
+ tst-dlopenfailmod3 \
+ tst-dlopenfailnodelmod \
+ tst-dlopenrpathmod \
+ tst-filterobj-aux \
+ tst-filterobj-filtee \
+ tst-filterobj-flt \
+ tst-finilazyfailmod \
+ tst-initlazyfailmod \
+ tst-initorder2a \
+ tst-initorder2b \
+ tst-initorder2c \
+ tst-initorder2d \
+ tst-initordera1 \
+ tst-initordera2 \
+ tst-initordera3 \
+ tst-initordera4 \
+ tst-initorderb1 \
+ tst-initorderb2 \
+ tst-latepthreadmod \
+ tst-ldconfig-ld-mod \
+ tst-main1mod \
+ tst-nodelete2mod \
+ tst-nodelete-dlclose-dso \
+ tst-nodelete-dlclose-plugin \
+ tst-nodelete-opened-lib \
+ tst-null-argv-lib \
+ tst-relsort1mod1 \
+ tst-relsort1mod2 \
+ tst-ro-dynamic-mod \
+ tst-single_threaded-mod1 \
+ tst-single_threaded-mod2 \
+ tst-single_threaded-mod3 \
+ tst-single_threaded-mod4 \
+ tst-sonamemove-linkmod1 \
+ tst-sonamemove-runmod1 \
+ tst-sonamemove-runmod2 \
+ tst-tls19mod1 \
+ tst-tls19mod2 \
+ tst-tls19mod3 \
+ tst-tls20mod-bad \
+ tst-tls21mod \
+ tst-tlsalign-lib \
+ tst-tls-ie-mod0 \
+ tst-tls-ie-mod1 \
+ tst-tls-ie-mod2 \
+ tst-tls-ie-mod3 \
+ tst-tls-ie-mod4 \
+ tst-tls-ie-mod5 \
+ tst-tls-ie-mod6 \
+ tst-tlsmod1 \
+ tst-tlsmod10 \
+ tst-tlsmod11 \
+ tst-tlsmod12 \
+ tst-tlsmod13 \
+ tst-tlsmod13a \
+ tst-tlsmod14a \
+ tst-tlsmod14b \
+ tst-tlsmod15a \
+ tst-tlsmod15b \
+ tst-tlsmod16a \
+ tst-tlsmod16b \
+ tst-tlsmod17b \
+ tst-tlsmod2 \
+ tst-tlsmod3 \
+ tst-tlsmod4 \
+ tst-tlsmod5 \
+ tst-tlsmod6 \
+ tst-tlsmod7 \
+ tst-tlsmod8 \
+ tst-tlsmod9 \
+ tst-unique1mod1 \
+ tst-unique1mod2 \
+ tst-unique2mod1 \
+ tst-unique2mod2 \
+ tst-unwind-ctor-lib \
+ unload2dep \
+ unload2mod \
+ unload3mod1 \
+ unload3mod2 \
+ unload3mod3 \
+ unload3mod4 \
+ unload4mod1 \
+ unload4mod2 \
+ unload4mod3 \
+ unload4mod4 \
+ unload6mod1 \
+ unload6mod2 \
+ unload6mod3 \
+ unload7mod1 \
+ unload7mod2 \
+ unload8mod1 \
+ unload8mod1x \
+ unload8mod2 \
+ unload8mod3 \
+ unloadmod \
+ vismod1 \
+ vismod2 \
+ vismod3 \
+# modules-names
+
+modules-names-cxx = \
+ tst-dlopen-nodelete-reloc-mod1 \
+ tst-dlopen-nodelete-reloc-mod10 \
+ tst-dlopen-nodelete-reloc-mod11 \
+ tst-dlopen-nodelete-reloc-mod12 \
+ tst-dlopen-nodelete-reloc-mod13 \
+ tst-dlopen-nodelete-reloc-mod14 \
+ tst-dlopen-nodelete-reloc-mod15 \
+ tst-dlopen-nodelete-reloc-mod16 \
+ tst-dlopen-nodelete-reloc-mod17 \
+ tst-dlopen-nodelete-reloc-mod2 \
+ tst-dlopen-nodelete-reloc-mod3 \
+ tst-dlopen-nodelete-reloc-mod4 \
+ tst-dlopen-nodelete-reloc-mod5 \
+ tst-dlopen-nodelete-reloc-mod6 \
+ tst-dlopen-nodelete-reloc-mod7 \
+ tst-dlopen-nodelete-reloc-mod8 \
+ tst-dlopen-nodelete-reloc-mod9 \
+ tst-nodelete-rtldmod \
+ tst-nodelete-uniquemod \
+ tst-nodelete-zmod \
+ tst-unique3lib \
+ tst-unique3lib2 \
+ tst-unique4lib \
+ # modules-names-cxx
+
+modules-names += \
+ $(if $(CXX),$(modules-names-cxx)) \
+ $(modules-execstack-$(have-z-execstack)) \
+ $(tlsmod17a-modules) \
+ $(tlsmod18a-modules) \
+ $(tst-tls-many-dynamic-modules) \
+ $(tst-tls-many-dynamic-modules-dep) \
+ $(tst-tls-many-dynamic-modules-dep-bad) \
+ # modules-names
# Most modules build with _ISOMAC defined, but those filtered out
# depend on internal headers.
$(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so
tst-gnu2-tls1mod.so-no-z-defs = yes
CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2
+
+tests += tst-audit-tlsdesc tst-audit-tlsdesc-dlopen
+modules-names += tst-audit-tlsdesc-mod1 tst-audit-tlsdesc-mod2 tst-auditmod-tlsdesc
+$(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \
+ $(objpfx)tst-audit-tlsdesc-mod2.so \
+ $(shared-thread-library)
+CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2
+CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2
+$(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library)
+$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \
+ $(objpfx)tst-audit-tlsdesc-mod2.so
+$(objpfx)tst-audit-tlsdesc-mod1.so: $(objpfx)tst-audit-tlsdesc-mod2.so
+$(objpfx)tst-audit-tlsdesc.out: $(objpfx)tst-auditmod-tlsdesc.so
+tst-audit-tlsdesc-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so
+$(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-auditmod-tlsdesc.so
+tst-audit-tlsdesc-dlopen-ENV = LD_AUDIT=$(objpfx)tst-auditmod-tlsdesc.so
endif
ifeq (yes,$(have-protected-data))
modules-names += tst-protected1moda tst-protected1modb
modules-execstack-yes = tst-execstack-mod
extra-test-objs += $(addsuffix .os,$(strip $(modules-names)))
-# filtmod1.so, tst-big-note-lib.so have special rules.
-modules-names-nobuild := filtmod1 tst-big-note-lib
+# filtmod1.so, tst-big-note-lib.so, tst-ro-dynamic-mod.so have special
+# rules.
+modules-names-nobuild := filtmod1 tst-big-note-lib tst-ro-dynamic-mod \
+ tst-audit24bmod1 tst-audit24bmod2
tests += $(tests-static)
ifeq (yes,$(have-ifunc))
-tests-ifuncstatic := ifuncmain1static ifuncmain1picstatic \
- ifuncmain2static ifuncmain2picstatic \
- ifuncmain4static ifuncmain4picstatic \
- ifuncmain5static ifuncmain5picstatic \
- ifuncmain7static ifuncmain7picstatic
+tests-ifuncstatic := \
+ ifuncmain1static \
+ ifuncmain1picstatic \
+ ifuncmain2static \
+ ifuncmain2picstatic \
+ ifuncmain4static \
+ ifuncmain4picstatic \
+ ifuncmain5static \
+ ifuncmain5picstatic \
+ ifuncmain7static \
+ ifuncmain7picstatic \
+ # tests-ifuncstatic
ifeq (yes,$(have-gcc-ifunc))
tests-ifuncstatic += ifuncmain9static ifuncmain9picstatic
endif
tests-static += $(tests-ifuncstatic)
tests-internal += $(tests-ifuncstatic)
ifeq (yes,$(build-shared))
-tests += tst-ifunc-fault-lazy tst-ifunc-fault-bindnow
+tests += \
+ tst-ifunc-fault-bindnow \
+ tst-ifunc-fault-lazy \
+ # tests
# Note: sysdeps/x86_64/ifuncmain8.c uses ifuncmain8.
tests-internal += \
- ifuncmain1 ifuncmain1pic ifuncmain1vis ifuncmain1vispic \
- ifuncmain1staticpic \
- ifuncmain2 ifuncmain2pic ifuncmain3 ifuncmain4 \
- ifuncmain5 ifuncmain5pic ifuncmain5staticpic \
- ifuncmain7 ifuncmain7pic
+ ifuncmain1 \
+ ifuncmain1pic \
+ ifuncmain1staticpic \
+ ifuncmain1vis \
+ ifuncmain1vispic \
+ ifuncmain2 \
+ ifuncmain2pic \
+ ifuncmain3 \
+ ifuncmain4 \
+ ifuncmain5 \
+ ifuncmain5pic \
+ ifuncmain5staticpic \
+ ifuncmain7 \
+ ifuncmain7pic \
+ # tests-internal
ifeq (yes,$(have-gcc-ifunc))
-tests-internal += ifuncmain9 ifuncmain9pic
+tests-internal += \
+ ifuncmain9 \
+ ifuncmain9pic \
+ # tests-internal
endif
-ifunc-test-modules = ifuncdep1 ifuncdep1pic ifuncdep2 ifuncdep2pic \
- ifuncdep5 ifuncdep5pic
+ifunc-test-modules = \
+ ifuncdep1 \
+ ifuncdep1pic \
+ ifuncdep2 \
+ ifuncdep2pic \
+ ifuncdep5 \
+ ifuncdep5pic \
+ # ifunc-test-modules
extra-test-objs += $(ifunc-test-modules:=.o)
test-internal-extras += $(ifunc-test-modules)
ifeq (yes,$(have-fpie))
-ifunc-pie-tests = ifuncmain1pie ifuncmain1vispie ifuncmain1staticpie \
- ifuncmain5pie ifuncmain6pie ifuncmain7pie
+ifunc-pie-tests = \
+ ifuncmain1pie \
+ ifuncmain1staticpie \
+ ifuncmain1vispie \
+ ifuncmain5pie \
+ ifuncmain6pie \
+ ifuncmain7pie \
+ # ifunc-pie-tests
ifeq (yes,$(have-gcc-ifunc))
ifunc-pie-tests += ifuncmain9pie
endif
tests-internal += $(ifunc-pie-tests)
tests-pie += $(ifunc-pie-tests)
endif
-modules-names += ifuncmod1 ifuncmod3 ifuncmod5 ifuncmod6
+modules-names += \
+ ifuncmod1 \
+ ifuncmod3 \
+ ifuncmod5 \
+ ifuncmod6 \
+ # module-names
endif
endif
ifeq (yes,$(build-shared))
ifeq ($(run-built-tests),yes)
-tests-special += $(objpfx)tst-pathopt.out $(objpfx)tst-rtld-load-self.out \
- $(objpfx)tst-rtld-preload.out $(objpfx)argv0test.out \
- $(objpfx)tst-rtld-help.out
+tests-special += \
+ $(objpfx)argv0test.out \
+ $(objpfx)tst-pathopt.out \
+ $(objpfx)tst-rtld-help.out \
+ $(objpfx)tst-rtld-load-self.out \
+ $(objpfx)tst-rtld-preload.out \
+ # tests-special
endif
-tests-special += $(objpfx)check-textrel.out $(objpfx)check-execstack.out \
- $(objpfx)check-wx-segment.out \
- $(objpfx)check-localplt.out $(objpfx)check-initfini.out
+tests-special += \
+ $(objpfx)check-execstack.out \
+ $(objpfx)check-initfini.out \
+ $(objpfx)check-localplt.out \
+ $(objpfx)check-textrel.out \
+ $(objpfx)check-wx-segment.out \
+ # tests-special
endif
ifeq ($(run-built-tests),yes)
-tests-special += $(objpfx)order-cmp.out $(objpfx)tst-array1-cmp.out \
- $(objpfx)tst-array1-static-cmp.out \
- $(objpfx)tst-array2-cmp.out $(objpfx)tst-array3-cmp.out \
- $(objpfx)tst-array4-cmp.out $(objpfx)tst-array5-cmp.out \
- $(objpfx)tst-array5-static-cmp.out $(objpfx)order2-cmp.out \
- $(objpfx)tst-initorder-cmp.out \
- $(objpfx)tst-initorder2-cmp.out $(objpfx)tst-unused-dep.out \
- $(objpfx)tst-unused-dep-cmp.out
+tests-special += \
+ $(objpfx)order-cmp.out \
+ $(objpfx)order2-cmp.out \
+ $(objpfx)tst-array1-cmp.out \
+ $(objpfx)tst-array1-static-cmp.out \
+ $(objpfx)tst-array2-cmp.out \
+ $(objpfx)tst-array3-cmp.out \
+ $(objpfx)tst-array4-cmp.out \
+ $(objpfx)tst-array5-cmp.out \
+ $(objpfx)tst-array5-static-cmp.out \
+ $(objpfx)tst-initorder-cmp.out \
+ $(objpfx)tst-initorder2-cmp.out \
+ $(objpfx)tst-unused-dep-cmp.out \
+ $(objpfx)tst-unused-dep.out \
+ # tests-special
endif
check-abi: $(objpfx)check-abi-ld.out
tst-prelink-no-pie = yes
endif
+tests-special += $(objpfx)tst-glibcelf.out
+$(objpfx)tst-glibcelf.out: tst-glibcelf.py elf.h $(..)/scripts/glibcelf.py \
+ $(..)/scripts/glibcextract.py
+ PYTHONPATH=$(..)scripts $(PYTHON) tst-glibcelf.py \
+ --cc="$(CC) $(patsubst -DMODULE_NAME=%,-DMODULE_NAME=testsuite,$(CPPFLAGS))" \
+ < /dev/null > $@ 2>&1; $(evaluate-test)
+
+ifeq ($(run-built-tests),yes)
+tests-special += $(objpfx)tst-tls-allocation-failure-static-patched.out
+endif
+
# The test requires shared _and_ PIE because the executable
# unit test driver must be able to link with the shared object
# that is going to eventually go into an installed DSO.
free \
malloc \
realloc \
+ # rtld-stubbed-symbols
ifeq ($(have-ssp),yes)
# rtld is not built with the stack protector, so these references will
CFLAGS-.os += $(call elide-stack-protector,.os,tst-auditmod17)
tst-audit17-ENV = LD_AUDIT=$(objpfx)tst-auditmod17.so
+$(objpfx)tst-audit18.out: $(objpfx)tst-auditmod18.so \
+ $(objpfx)tst-audit18mod.so
+tst-audit18-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit19a.out: $(objpfx)tst-auditmod19a.so
+tst-audit19a-ENV = LD_AUDIT=$(objpfx)tst-auditmod19a.so
+
+$(objpfx)tst-audit19b.out: $(objpfx)tst-auditmod19b.so
+$(objpfx)tst-audit19b: $(objpfx)tst-audit19bmod.so
+tst-audit19b-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit20.out: $(objpfx)tst-auditmod20.so
+tst-audit20-ENV = LD_AUDIT=$(objpfx)tst-auditmod20.so
+
+$(objpfx)tst-audit21: $(shared-thread-library)
+$(objpfx)tst-audit21.out: $(objpfx)tst-auditmod21a.so
+$(objpfx)tst-auditmod21a.so: $(objpfx)tst-auditmod21b.so
+tst-audit21-ENV = LD_AUDIT=$(objpfx)tst-auditmod21a.so
+
+$(objpfx)tst-audit22.out: $(objpfx)tst-auditmod22.so
+tst-audit22-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit23.out: $(objpfx)tst-auditmod23.so \
+ $(objpfx)tst-audit23mod.so
+tst-audit23-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit24a.out: $(objpfx)tst-auditmod24a.so
+$(objpfx)tst-audit24a: $(objpfx)tst-audit24amod1.so \
+ $(objpfx)tst-audit24amod2.so
+tst-audit24a-ENV = LD_AUDIT=$(objpfx)tst-auditmod24a.so
+LDFLAGS-tst-audit24a = -Wl,-z,now
+
+$(objpfx)tst-audit24b.out: $(objpfx)tst-auditmod24b.so
+$(objpfx)tst-audit24b: $(objpfx)tst-audit24bmod1.so \
+ $(objpfx)tst-audit24bmod2.so
+$(objpfx)tst-audit24bmod1: $(objpfx)tst-audit24bmod2.so
+# The test checks if a library without .gnu.version correctly calls the
+# audit callbacks. So it uses an explicit link rule to avoid linking
+# against libc.so.
+$(objpfx)tst-audit24bmod1.so: $(objpfx)tst-audit24bmod1.os
+ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod1.os \
+ -Wl,-z,now
+ $(call after-link,$@.new)
+ mv -f $@.new $@
+CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod1)
+$(objpfx)tst-audit24bmod2.so: $(objpfx)tst-audit24bmod2.os
+ $(CC) -nostdlib -nostartfiles -shared -o $@.new $(objpfx)tst-audit24bmod2.os
+ $(call after-link,$@.new)
+ mv -f $@.new $@
+CFLAGS-.os += $(call elide-stack-protector,.os,tst-audit24bmod2)
+tst-audit24b-ENV = LD_AUDIT=$(objpfx)tst-auditmod24b.so
+LDFLAGS-tst-audit24b = -Wl,-z,now
+
+# Same as tst-audit24a, but tests LD_BIND_NOW
+$(objpfx)tst-audit24c.out: $(objpfx)tst-auditmod24c.so
+$(objpfx)tst-audit24c: $(objpfx)tst-audit24amod1.so \
+ $(objpfx)tst-audit24amod2.so
+tst-audit24c-ENV = LD_BIND_NOW=1 LD_AUDIT=$(objpfx)tst-auditmod24c.so
+LDFLAGS-tst-audit24c = -Wl,-z,lazy
+
+$(objpfx)tst-audit24d.out: $(objpfx)tst-auditmod24d.so
+$(objpfx)tst-audit24d: $(objpfx)tst-audit24dmod1.so \
+ $(objpfx)tst-audit24dmod2.so
+$(objpfx)tst-audit24dmod1.so: $(objpfx)tst-audit24dmod3.so
+LDFLAGS-tst-audit24dmod1.so = -Wl,-z,now
+$(objpfx)tst-audit24dmod2.so: $(objpfx)tst-audit24dmod4.so
+LDFLAGS-tst-audit24dmod2.so = -Wl,-z,lazy
+tst-audit24d-ENV = LD_AUDIT=$(objpfx)tst-auditmod24d.so
+LDFLAGS-tst-audit24d = -Wl,-z,lazy
+
+$(objpfx)tst-audit25a.out: $(objpfx)tst-auditmod25.so
+$(objpfx)tst-audit25a: $(objpfx)tst-audit25mod1.so \
+ $(objpfx)tst-audit25mod2.so \
+ $(objpfx)tst-audit25mod3.so \
+ $(objpfx)tst-audit25mod4.so
+LDFLAGS-tst-audit25a = -Wl,-z,lazy
+$(objpfx)tst-audit25mod1.so: $(objpfx)tst-audit25mod3.so
+LDFLAGS-tst-audit25mod1.so = -Wl,-z,now
+$(objpfx)tst-audit25mod2.so: $(objpfx)tst-audit25mod4.so
+LDFLAGS-tst-audit25mod2.so = -Wl,-z,lazy
+tst-audit25a-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit25b.out: $(objpfx)tst-auditmod25.so
+$(objpfx)tst-audit25b: $(objpfx)tst-audit25mod1.so \
+ $(objpfx)tst-audit25mod2.so \
+ $(objpfx)tst-audit25mod3.so \
+ $(objpfx)tst-audit25mod4.so
+LDFLAGS-tst-audit25b = -Wl,-z,now
+tst-audit25b-ARGS = -- $(host-test-program-cmd)
+
+$(objpfx)tst-audit26.out: $(objpfx)tst-auditmod26.so
+$(objpfx)tst-auditmod26.so: $(libsupport)
+tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so
+
# tst-sonamemove links against an older implementation of the library.
LDFLAGS-tst-sonamemove-linkmod1.so = \
-Wl,--version-script=tst-sonamemove-linkmod1.map \
LDFLAGS-libmarkermod2-1.so += -Wl,-soname,libmarkermod2.so
LDFLAGS-libmarkermod3-1.so += -Wl,-soname,libmarkermod3.so
LDFLAGS-libmarkermod4-1.so += -Wl,-soname,libmarkermod4.so
+LDFLAGS-libmarkermod5-1.so += -Wl,-soname,libmarkermod5.so
$(objpfx)libmarkermod%.os : markermodMARKER-VALUE.c
$(compile-command.c) \
-DMARKER=marker$(firstword $(subst -, ,$*)) \
cp $< $@
$(objpfx)libmarkermod4.so: $(objpfx)libmarkermod4-1.so
cp $< $@
+$(objpfx)libmarkermod5.so: $(objpfx)libmarkermod5-1.so
+ cp $< $@
# tst-glibc-hwcaps-prepend checks that --glibc-hwcaps-prepend is
# preferred over auto-detected subdirectories.
tst-getauxval-static-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx)
$(objpfx)tst-dlmopen-gethostbyname.out: $(objpfx)tst-dlmopen-gethostbyname-mod.so
+
+$(objpfx)tst-ro-dynamic: $(objpfx)tst-ro-dynamic-mod.so
+$(objpfx)tst-ro-dynamic-mod.so: $(objpfx)tst-ro-dynamic-mod.os \
+ tst-ro-dynamic-mod.map
+ $(LINK.o) -nostdlib -nostartfiles -shared -o $@ \
+ -Wl,--script=tst-ro-dynamic-mod.map \
+ $(objpfx)tst-ro-dynamic-mod.os
+
+$(objpfx)tst-tls-allocation-failure-static-patched: \
+ $(objpfx)tst-tls-allocation-failure-static $(..)scripts/tst-elf-edit.py
+ cp $< $@
+ $(PYTHON) $(..)scripts/tst-elf-edit.py --maximize-tls-size $@
+
+$(objpfx)tst-tls-allocation-failure-static-patched.out: \
+ $(objpfx)tst-tls-allocation-failure-static-patched
+ $< > $@ 2>&1; echo "status: $$?" >> $@
+ grep -q '^Fatal glibc error: Cannot allocate TLS block$$' $@ \
+ && grep -q '^status: 127$$' $@; \
+ $(evaluate-test)
# stack canary
__stack_chk_guard;
}
+ GLIBC_2.34 {
+ __rtld_version_placeholder;
+ }
GLIBC_PRIVATE {
# Those are in the dynamic linker, but used by libc.so.
__libc_enable_secure;
_dl_argv; _dl_find_dso_for_object; _dl_get_tls_static_info;
_dl_deallocate_tls; _dl_make_stack_executable;
_dl_rtld_di_serinfo; _dl_starting_up; _dl_fatal_printf;
+ _dl_audit_symbind_alt; _dl_audit_preinit;
_rtld_global; _rtld_global_ro;
# Only here for gdb while a better method is developed.
--- /dev/null
+/* Audit common functions.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <link.h>
+#include <ldsodefs.h>
+#include <dl-machine.h>
+#include <dl-runtime.h>
+#include <dl-fixup-attribute.h>
+
+void
+_dl_audit_activity_map (struct link_map *l, int action)
+{
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->activity != NULL)
+ afct->activity (&link_map_audit_state (l, cnt)->cookie, action);
+ afct = afct->next;
+ }
+}
+
+void
+_dl_audit_activity_nsid (Lmid_t nsid, int action)
+{
+ /* If head is NULL, the namespace has become empty, and the audit interface
+ does not give us a way to signal LA_ACT_CONSISTENT for it because the
+ first loaded module is used to identify the namespace. */
+ struct link_map *head = GL(dl_ns)[nsid]._ns_loaded;
+ if (__glibc_likely (GLRO(dl_naudit) == 0)
+ || head == NULL || head->l_auditing)
+ return;
+
+ _dl_audit_activity_map (head, action);
+}
+
+const char *
+_dl_audit_objsearch (const char *name, struct link_map *l, unsigned int code)
+{
+ if (l == NULL || l->l_auditing || code == 0)
+ return name;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->objsearch != NULL)
+ {
+ struct auditstate *state = link_map_audit_state (l, cnt);
+ name = afct->objsearch (name, &state->cookie, code);
+ if (name == NULL)
+ return NULL;
+ }
+ afct = afct->next;
+ }
+
+ return name;
+}
+
+void
+_dl_audit_objopen (struct link_map *l, Lmid_t nsid)
+{
+ if (__glibc_likely (GLRO(dl_naudit) == 0))
+ return;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->objopen != NULL)
+ {
+ struct auditstate *state = link_map_audit_state (l, cnt);
+ state->bindflags = afct->objopen (l, nsid, &state->cookie);
+ l->l_audit_any_plt |= state->bindflags != 0;
+ }
+
+ afct = afct->next;
+ }
+}
+
+void
+_dl_audit_objclose (struct link_map *l)
+{
+ if (__glibc_likely (GLRO(dl_naudit) == 0)
+ || GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing)
+ return;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->objclose != NULL)
+ {
+ struct auditstate *state= link_map_audit_state (l, cnt);
+ /* Return value is ignored. */
+ afct->objclose (&state->cookie);
+ }
+
+ afct = afct->next;
+ }
+}
+
+void
+_dl_audit_preinit (struct link_map *l)
+{
+ if (__glibc_likely (GLRO(dl_naudit) == 0))
+ return;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->preinit != NULL)
+ afct->preinit (&link_map_audit_state (l, cnt)->cookie);
+ afct = afct->next;
+ }
+}
+
+void
+_dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref, void **value,
+ lookup_t result)
+{
+ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0)
+ return;
+
+ const char *strtab = (const char *) D_PTR (result, l_info[DT_STRTAB]);
+ /* Compute index of the symbol entry in the symbol table of the DSO with
+ the definition. */
+ unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result, l_info[DT_SYMTAB]));
+
+ unsigned int altvalue = 0;
+ /* Synthesize a symbol record where the st_value field is the result. */
+ ElfW(Sym) sym = *ref;
+ sym.st_value = (ElfW(Addr)) *value;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ struct auditstate *match_audit = link_map_audit_state (l, cnt);
+ struct auditstate *result_audit = link_map_audit_state (result, cnt);
+ if (afct->symbind != NULL
+ && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0
+ || ((result_audit->bindflags & LA_FLG_BINDTO)
+ != 0)))
+ {
+ unsigned int flags = altvalue | LA_SYMB_DLSYM;
+ uintptr_t new_value = afct->symbind (&sym, ndx,
+ &match_audit->cookie,
+ &result_audit->cookie,
+ &flags, strtab + ref->st_name);
+ if (new_value != (uintptr_t) sym.st_value)
+ {
+ altvalue = LA_SYMB_ALTVALUE;
+ sym.st_value = new_value;
+ }
+
+ afct = afct->next;
+ }
+
+ *value = (void *) sym.st_value;
+ }
+}
+rtld_hidden_def (_dl_audit_symbind_alt)
+
+void
+_dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result,
+ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value,
+ lookup_t result)
+{
+ bool for_jmp_slot = reloc_result == NULL;
+
+ /* Compute index of the symbol entry in the symbol table of the DSO
+ with the definition. */
+ unsigned int boundndx = defsym - (ElfW(Sym) *) D_PTR (result,
+ l_info[DT_SYMTAB]);
+ if (!for_jmp_slot)
+ {
+ reloc_result->bound = result;
+ reloc_result->boundndx = boundndx;
+ }
+
+ if ((l->l_audit_any_plt | result->l_audit_any_plt) == 0)
+ {
+ /* Set all bits since this symbol binding is not interesting. */
+ if (!for_jmp_slot)
+ reloc_result->enterexit = (1u << DL_NNS) - 1;
+ return;
+ }
+
+ /* Synthesize a symbol record where the st_value field is the result. */
+ ElfW(Sym) sym = *defsym;
+ sym.st_value = DL_FIXUP_VALUE_ADDR (*value);
+
+ /* Keep track whether there is any interest in tracing the call in the lower
+ two bits. */
+ assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8);
+ assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3);
+ uint32_t enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT;
+
+ const char *strtab2 = (const void *) D_PTR (result, l_info[DT_STRTAB]);
+
+ unsigned int flags = 0;
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ uintptr_t new_value = (uintptr_t) sym.st_value;
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ /* XXX Check whether both DSOs must request action or only one */
+ struct auditstate *l_state = link_map_audit_state (l, cnt);
+ struct auditstate *result_state = link_map_audit_state (result, cnt);
+ if ((l_state->bindflags & LA_FLG_BINDFROM) != 0
+ && (result_state->bindflags & LA_FLG_BINDTO) != 0)
+ {
+ if (afct->symbind != NULL)
+ {
+ flags |= for_jmp_slot ? LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT
+ : 0;
+ new_value = afct->symbind (&sym, boundndx,
+ &l_state->cookie,
+ &result_state->cookie, &flags,
+ strtab2 + defsym->st_name);
+ if (new_value != (uintptr_t) sym.st_value)
+ {
+ flags |= LA_SYMB_ALTVALUE;
+ sym.st_value = for_jmp_slot
+ ? DL_FIXUP_BINDNOW_ADDR_VALUE (new_value) : new_value;
+ }
+ }
+
+ /* Remember the results for every audit library and store a summary
+ in the first two bits. */
+ enterexit &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT);
+ enterexit |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT))
+ << ((cnt + 1) * 2));
+ }
+ else
+ /* If the bind flags say this auditor is not interested, set the bits
+ manually. */
+ enterexit |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)
+ << ((cnt + 1) * 2));
+ afct = afct->next;
+ }
+
+ if (!for_jmp_slot)
+ {
+ reloc_result->enterexit = enterexit;
+ reloc_result->flags = flags;
+ }
+
+ if (flags & LA_SYMB_ALTVALUE)
+ DL_FIXUP_BINDNOW_RELOC (value, new_value, sym.st_value);
+}
+
+void
+_dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result,
+ DL_FIXUP_VALUE_TYPE *value, void *regs, long int *framesize)
+{
+ /* Don't do anything if no auditor wants to intercept this call. */
+ if (GLRO(dl_naudit) == 0
+ || (reloc_result->enterexit & LA_SYMB_NOPLTENTER))
+ return;
+
+ /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been
+ initialized earlier in this function or in another thread. */
+ assert (DL_FIXUP_VALUE_CODE_ADDR (*value) != 0);
+ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
+ l_info[DT_SYMTAB])
+ + reloc_result->boundndx);
+
+ /* Set up the sym parameter. */
+ ElfW(Sym) sym = *defsym;
+ sym.st_value = DL_FIXUP_VALUE_ADDR (*value);
+
+ /* Get the symbol name. */
+ const char *strtab = (const void *) D_PTR (reloc_result->bound,
+ l_info[DT_STRTAB]);
+ const char *symname = strtab + sym.st_name;
+
+ /* Keep track of overwritten addresses. */
+ unsigned int flags = reloc_result->flags;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->ARCH_LA_PLTENTER != NULL
+ && (reloc_result->enterexit
+ & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0)
+ {
+ long int new_framesize = -1;
+ struct auditstate *l_state = link_map_audit_state (l, cnt);
+ struct auditstate *bound_state
+ = link_map_audit_state (reloc_result->bound, cnt);
+ uintptr_t new_value
+ = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx,
+ &l_state->cookie, &bound_state->cookie,
+ regs, &flags, symname, &new_framesize);
+ if (new_value != (uintptr_t) sym.st_value)
+ {
+ flags |= LA_SYMB_ALTVALUE;
+ sym.st_value = new_value;
+ }
+
+ /* Remember the results for every audit library and store a summary
+ in the first two bits. */
+ reloc_result->enterexit |= ((flags & (LA_SYMB_NOPLTENTER
+ | LA_SYMB_NOPLTEXIT))
+ << (2 * (cnt + 1)));
+
+ if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT
+ << (2 * (cnt + 1))))
+ == 0 && new_framesize != -1 && *framesize != -2)
+ {
+ /* If this is the first call providing information, use it. */
+ if (*framesize == -1)
+ *framesize = new_framesize;
+ /* If two pltenter calls provide conflicting information, use
+ the larger value. */
+ else if (new_framesize != *framesize)
+ *framesize = MAX (new_framesize, *framesize);
+ }
+ }
+
+ afct = afct->next;
+ }
+
+ *value = DL_FIXUP_ADDR_VALUE (sym.st_value);
+}
+
+void
+DL_ARCH_FIXUP_ATTRIBUTE
+_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg,
+ const void *inregs, void *outregs)
+{
+ const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]);
+
+ /* This is the address in the array where we store the result of previous
+ relocations. */
+ // XXX Maybe the bound information must be stored on the stack since
+ // XXX with bind_not a new value could have been stored in the meantime.
+ struct reloc_result *reloc_result =
+ &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))];
+ ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
+ l_info[DT_SYMTAB])
+ + reloc_result->boundndx);
+
+ /* Set up the sym parameter. */
+ ElfW(Sym) sym = *defsym;
+ sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr);
+
+ /* Get the symbol name. */
+ const char *strtab = (const void *) D_PTR (reloc_result->bound,
+ l_info[DT_STRTAB]);
+ const char *symname = strtab + sym.st_name;
+
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ if (afct->ARCH_LA_PLTEXIT != NULL
+ && (reloc_result->enterexit
+ & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0)
+ {
+ struct auditstate *l_state = link_map_audit_state (l, cnt);
+ struct auditstate *bound_state
+ = link_map_audit_state (reloc_result->bound, cnt);
+ afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx,
+ &l_state->cookie, &bound_state->cookie,
+ inregs, outregs, symname);
+ }
+
+ afct = afct->next;
+ }
+}
used + (nsid == LM_ID_BASE), true);
/* Call all termination functions at once. */
-#ifdef SHARED
- bool do_audit = GLRO(dl_naudit) > 0 && !ns->_ns_loaded->l_auditing;
-#endif
bool unload_any = false;
bool scope_mem_left = false;
unsigned int unload_global = 0;
#ifdef SHARED
/* Auditing checkpoint: we remove an object. */
- if (__glibc_unlikely (do_audit))
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->objclose != NULL)
- {
- struct auditstate *state
- = link_map_audit_state (imap, cnt);
- /* Return value is ignored. */
- (void) afct->objclose (&state->cookie);
- }
-
- afct = afct->next;
- }
- }
+ _dl_audit_objclose (imap);
#endif
/* This object must not be used anymore. */
#ifdef SHARED
/* Auditing checkpoint: we will start deleting objects. */
- if (__glibc_unlikely (do_audit))
- {
- struct link_map *head = ns->_ns_loaded;
- struct audit_ifaces *afct = GLRO(dl_audit);
- /* Do not call the functions for any auditing object. */
- if (head->l_auditing == 0)
- {
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- {
- struct auditstate *state = link_map_audit_state (head, cnt);
- afct->activity (&state->cookie, LA_ACT_DELETE);
- }
-
- afct = afct->next;
- }
- }
- }
+ _dl_audit_activity_nsid (nsid, LA_ACT_DELETE);
#endif
/* Notify the debugger we are about to remove some loaded objects. */
size_t tls_free_end;
tls_free_start = tls_free_end = NO_TLS_OFFSET;
+ /* Protects global and module specitic TLS state. */
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
+
/* We modify the list of loaded objects. */
__rtld_lock_lock_recursive (GL(dl_load_write_lock));
GL(dl_tls_static_used) = tls_free_start;
}
-#ifdef SHARED
- /* Auditing checkpoint: we have deleted all objects. */
- if (__glibc_unlikely (do_audit))
- {
- struct link_map *head = ns->_ns_loaded;
- /* If head is NULL, the namespace has become empty, and the
- audit interface does not give us a way to signal
- LA_ACT_CONSISTENT for it because the first loaded module is
- used to identify the namespace.
-
- Furthermore, do not notify auditors of the cleanup of a
- failed audit module loading attempt. */
- if (head != NULL && head->l_auditing == 0)
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- {
- struct auditstate *state = link_map_audit_state (head, cnt);
- afct->activity (&state->cookie, LA_ACT_CONSISTENT);
- }
+ /* TLS is cleaned up for the unloaded modules. */
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
- afct = afct->next;
- }
- }
- }
+#ifdef SHARED
+ /* Auditing checkpoint: we have deleted all objects. Also, do not notify
+ auditors of the cleanup of a failed audit module loading attempt. */
+ _dl_audit_activity_nsid (nsid, LA_ACT_CONSISTENT);
#endif
if (__builtin_expect (ns->_ns_loaded == NULL, 0)
--- /dev/null
+/* Placeholder compatibility symbols.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <shlib-compat.h>
+#include <sys/cdefs.h>
+
+/* GLIBC_2.34 placeholder for future symbol moves. */
+
+void
+attribute_compat_text_section
+__attribute_used__
+__rtld_version_placeholder_1 (void)
+{
+}
+
+compat_symbol (ld, __rtld_version_placeholder_1,
+ __rtld_version_placeholder, GLIBC_2_34);
License along with the GNU C Library; see the file COPYING.LIB. If
not, see <https://www.gnu.org/licenses/>. */
+#include <assert.h>
#include <errno.h>
#include <libintl.h>
#include <stdlib.h>
#include <sys/types.h>
#include "dynamic-link.h"
-void
-_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict,
- ElfW(Rela) *conflictend)
-{
-#if ! ELF_MACHINE_NO_RELA
- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC))
- _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name));
-
- {
- /* Do the conflict relocation of the object and library GOT and other
- data. */
+/* Used at loading time solely for prelink executable. It is not called
+ concurrently so it is be safe to defined as static. */
+static struct link_map *resolve_conflict_map __attribute__ ((__unused__));
/* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */
-#define RESOLVE_MAP(ref, version, flags) (*ref = NULL, NULL)
+#define RESOLVE_MAP(map, scope, ref, version, flags) (*ref = NULL, NULL)
#define RESOLVE(ref, version, flags) (*ref = NULL, 0)
#define RESOLVE_CONFLICT_FIND_MAP(map, r_offset) \
do { \
(map) = resolve_conflict_map; \
} while (0)
+#include "dynamic-link.h"
+
+void
+_dl_resolve_conflicts (struct link_map *l, ElfW(Rela) *conflict,
+ ElfW(Rela) *conflictend)
+{
+#if ! ELF_MACHINE_NO_RELA
+ if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_RELOC))
+ _dl_debug_printf ("\nconflict processing: %s\n", DSO_FILENAME (l->l_name));
+
+ {
+ /* Do the conflict relocation of the object and library GOT and other
+ data. */
+
/* Prelinking makes no sense for anything but the main namespace. */
assert (l->l_ns == LM_ID_BASE);
- struct link_map *resolve_conflict_map __attribute__ ((__unused__))
- = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
-
-#include "dynamic-link.h"
+ resolve_conflict_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
/* Override these, defined in dynamic-link.h. */
#undef CHECK_STATIC_TLS
GL(dl_num_cache_relocations) += conflictend - conflict;
for (; conflict < conflictend; ++conflict)
- elf_machine_rela (l, conflict, NULL, NULL, (void *) conflict->r_offset,
- 0);
+ elf_machine_rela (l, NULL, conflict, NULL, NULL,
+ (void *) conflict->r_offset, 0);
}
#endif
}
--- /dev/null
+/* Early memory allocation for the dynamic loader. Generic version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <stddef.h>
+#include <unistd.h>
+
+void *
+_dl_early_allocate (size_t size)
+{
+ void *result = __sbrk (size);
+ if (result == (void *) -1)
+ result = NULL;
+ return result;
+}
__rtld_lock_unlock_recursive (GL(dl_load_lock));
else
{
+#ifdef SHARED
+ _dl_audit_activity_nsid (ns, LA_ACT_DELETE);
+#endif
+
/* Now we can allocate an array to hold all the pointers and
copy the pointers in. */
struct link_map *maps[nloaded];
#ifdef SHARED
/* Auditing checkpoint: another object closed. */
- if (!do_audit && __builtin_expect (GLRO(dl_naudit) > 0, 0))
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->objclose != NULL)
- {
- struct auditstate *state
- = link_map_audit_state (l, cnt);
- /* Return value is ignored. */
- (void) afct->objclose (&state->cookie);
- }
- afct = afct->next;
- }
- }
+ _dl_audit_objclose (l);
#endif
}
/* Correct the previous increment. */
--l->l_direct_opencount;
}
+
+#ifdef SHARED
+ _dl_audit_activity_nsid (ns, LA_ACT_CONSISTENT);
+#endif
}
}
args.caller_dlopen = RETURN_ADDRESS (0);
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->libc_dlopen_mode (name, mode);
#endif
return dlerror_run (do_dlopen, &args) ? NULL : (void *) args.map;
args.name = name;
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->libc_dlsym (map, name);
#endif
return (dlerror_run (do_dlsym, &args) ? NULL
__libc_dlvsym (void *map, const char *name, const char *version)
{
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->libc_dlvsym (map, name, version);
#endif
__libc_dlclose (void *map)
{
#ifdef SHARED
- if (!rtld_active ())
+ if (GLRO (dl_dlfcn_hook) != NULL)
return GLRO (dl_dlfcn_hook)->libc_dlclose (map);
#endif
return dlerror_run (do_dlclose, map);
};
#include "dynamic-link.h"
+#include "get-dynamic-info.h"
#include <abi-tag.h>
#include <stackinfo.h>
#include <sysdep.h>
/* This is the ELF header. We read it in `open_verify'. */
header = (void *) fbp->buf;
- /* Signal that we are going to add new objects. */
- if (r->r_state == RT_CONSISTENT)
- {
-#ifdef SHARED
- /* Auditing checkpoint: we are going to add new objects. */
- if ((mode & __RTLD_AUDIT) == 0
- && __glibc_unlikely (GLRO(dl_naudit) > 0))
- {
- struct link_map *head = GL(dl_ns)[nsid]._ns_loaded;
- /* Do not call the functions for any auditing object. */
- if (head->l_auditing == 0)
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- afct->activity (&link_map_audit_state (head, cnt)->cookie,
- LA_ACT_ADD);
-
- afct = afct->next;
- }
- }
- }
-#endif
-
- /* Notify the debugger we have added some objects. We need to
- call _dl_debug_initialize in a static program in case dynamic
- linking has not been used before. */
- r->r_state = RT_ADD;
- _dl_debug_state ();
- LIBC_PROBE (map_start, 2, nsid, r);
- make_consistent = true;
- }
- else
- assert (r->r_state == RT_ADD);
-
/* Enter the new object in the list of loaded objects. */
l = _dl_new_object (realname, name, l_type, loader, mode, nsid);
if (__glibc_unlikely (l == NULL))
struct loadcmd loadcmds[l->l_phnum];
size_t nloadcmds = 0;
bool has_holes = false;
+ bool empty_dynamic = false;
/* The struct is initialized to zero so this is not necessary:
l->l_ld = 0;
segments are mapped in. We record the addresses it says
verbatim, and later correct for the run-time load address. */
case PT_DYNAMIC:
- if (ph->p_filesz)
+ if (ph->p_filesz == 0)
+ empty_dynamic = true; /* Usually separate debuginfo. */
+ else
{
/* Debuginfo only files from "objcopy --only-keep-debug"
contain a PT_DYNAMIC segment with p_filesz == 0. Skip
such a segment to avoid a crash later. */
l->l_ld = (void *) ph->p_vaddr;
l->l_ldnum = ph->p_memsz / sizeof (ElfW(Dyn));
+ l->l_ld_readonly = (ph->p_flags & PF_W) == 0;
}
break;
goto lose;
}
+ /* This check recognizes most separate debuginfo files. */
+ if (__glibc_unlikely ((l->l_ld == 0 && type == ET_DYN) || empty_dynamic))
+ {
+ errstring = N_("object file has no dynamic section");
+ goto lose;
+ }
+
/* Length of the sections to be loaded. */
maplength = loadcmds[nloadcmds - 1].allocend - loadcmds[0].mapstart;
}
}
- if (l->l_ld == 0)
- {
- if (__glibc_unlikely (type == ET_DYN))
- {
- errstring = N_("object file has no dynamic section");
- goto lose;
- }
- }
- else
+ if (l->l_ld != 0)
l->l_ld = (ElfW(Dyn) *) ((ElfW(Addr)) l->l_ld + l->l_addr);
- elf_get_dynamic_info (l, NULL);
+ elf_get_dynamic_info (l, false, false);
/* Make sure we are not dlopen'ing an object that has the
DF_1_NOOPEN flag set, or a PIE object. */
/* Now that the object is fully initialized add it to the object list. */
_dl_add_to_namespace_list (l, nsid);
-#ifdef SHARED
- /* Auditing checkpoint: we have a new object. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0)
- && !GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing)
+ /* Signal that we are going to add new objects. */
+ if (r->r_state == RT_CONSISTENT)
{
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->objopen != NULL)
- {
- struct auditstate *state = link_map_audit_state (l, cnt);
- state->bindflags = afct->objopen (l, nsid, &state->cookie);
- l->l_audit_any_plt |= state->bindflags != 0;
- }
+#ifdef SHARED
+ /* Auditing checkpoint: we are going to add new objects. Since this
+ is called after _dl_add_to_namespace_list the namespace is guaranteed
+ to not be empty. */
+ if ((mode & __RTLD_AUDIT) == 0)
+ _dl_audit_activity_nsid (nsid, LA_ACT_ADD);
+#endif
- afct = afct->next;
- }
+ /* Notify the debugger we have added some objects. We need to
+ call _dl_debug_initialize in a static program in case dynamic
+ linking has not been used before. */
+ r->r_state = RT_ADD;
+ _dl_debug_state ();
+ LIBC_PROBE (map_start, 2, nsid, r);
+ make_consistent = true;
}
+ else
+ assert (r->r_state == RT_ADD);
+
+#ifdef SHARED
+ /* Auditing checkpoint: we have a new object. */
+ if (!GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing)
+ _dl_audit_objopen (l, nsid);
#endif
return l;
#ifdef SHARED
/* Give the auditing libraries a chance. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0) && whatcode != 0
- && loader->l_auditing == 0)
+ if (__glibc_unlikely (GLRO(dl_naudit) > 0))
{
const char *original_name = name;
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->objsearch != NULL)
- {
- struct auditstate *state = link_map_audit_state (loader, cnt);
- name = afct->objsearch (name, &state->cookie, whatcode);
- if (name == NULL)
- /* Ignore the path. */
- return -1;
- }
-
- afct = afct->next;
- }
+ name = _dl_audit_objsearch (name, loader, whatcode);
+ if (name == NULL)
+ return -1;
if (fd != -1 && name != original_name && strcmp (name, original_name))
- {
- /* An audit library changed what we're supposed to open,
- so FD no longer matches it. */
- __close_nocancel (fd);
- fd = -1;
- }
+ {
+ /* An audit library changed what we're supposed to open,
+ so FD no longer matches it. */
+ __close_nocancel (fd);
+ fd = -1;
+ }
}
#endif
#ifdef SHARED
/* Give the auditing libraries a chance to change the name before we
try anything. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0)
- && (loader == NULL || loader->l_auditing == 0))
+ if (__glibc_unlikely (GLRO(dl_naudit) > 0))
{
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ const char *before = name;
+ name = _dl_audit_objsearch (name, loader, LA_SER_ORIG);
+ if (name == NULL)
{
- if (afct->objsearch != NULL)
- {
- const char *before = name;
- struct auditstate *state = link_map_audit_state (loader, cnt);
- name = afct->objsearch (name, &state->cookie, LA_SER_ORIG);
- if (name == NULL)
- {
- /* Do not try anything further. */
- fd = -1;
- goto no_file;
- }
- if (before != name && strcmp (before, name) != 0)
- {
- if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
- _dl_debug_printf ("audit changed filename %s -> %s\n",
- before, name);
-
- if (origname == NULL)
- origname = before;
- }
- }
-
- afct = afct->next;
+ fd = -1;
+ goto no_file;
}
+ if (before != name && strcmp (before, name) != 0)
+ origname = before;
}
#endif
{
#ifdef SHARED
unsigned int naudit;
- if (__glibc_unlikely ((mode & __RTLD_OPENEXEC) != 0))
+ if (__glibc_unlikely ((mode & (__RTLD_OPENEXEC | __RTLD_VDSO)) != 0))
{
- assert (type == lt_executable);
- assert (nsid == LM_ID_BASE);
+ if (mode & __RTLD_OPENEXEC)
+ {
+ assert (type == lt_executable);
+ assert (nsid == LM_ID_BASE);
- /* Ignore the specified libname for the main executable. It is
- only known with an explicit loader invocation. */
- libname = "";
+ /* Ignore the specified libname for the main executable. It is
+ only known with an explicit loader invocation. */
+ libname = "";
+ }
- /* We create the map for the executable before we know whether
+ /* We create the map for the executable and vDSO before we know whether
we have auditing libraries and if yes, how many. Assume the
worst. */
naudit = DL_NNS;
libc_map value in the namespace in case of a dlopen failure. */
bool libc_already_loaded;
+ /* Set to true if the end of dl_open_worker_begin was reached. */
+ bool worker_continue;
+
/* Original parameters to the program and the current environment. */
int argc;
char **argv;
}
static void
-dl_open_worker (void *a)
+dl_open_worker_begin (void *a)
{
struct dl_open_args *args = a;
const char *file = args->file;
#ifdef SHARED
/* Auditing checkpoint: we have added all objects. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0))
- {
- struct link_map *head = GL(dl_ns)[new->l_ns]._ns_loaded;
- /* Do not call the functions for any auditing object. */
- if (head->l_auditing == 0)
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- {
- struct auditstate *state = link_map_audit_state (head, cnt);
- afct->activity (&state->cookie, LA_ACT_CONSISTENT);
- }
-
- afct = afct->next;
- }
- }
- }
+ _dl_audit_activity_nsid (new->l_ns, LA_ACT_CONSISTENT);
#endif
/* Notify the debugger all new objects are now ready to go. */
_dl_call_libc_early_init (libc_map, false);
}
+ args->worker_continue = true;
+}
+
+static void
+dl_open_worker (void *a)
+{
+ struct dl_open_args *args = a;
+
+ args->worker_continue = false;
+
+ {
+ /* Protects global and module specific TLS state. */
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
+
+ struct dl_exception ex;
+ int err = _dl_catch_exception (&ex, dl_open_worker_begin, args);
+
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
+
+ if (__glibc_unlikely (ex.errstring != NULL))
+ /* Reraise the error. */
+ _dl_signal_exception (err, &ex, NULL);
+ }
+
+ if (!args->worker_continue)
+ return;
+
+ int mode = args->mode;
+ struct link_map *new = args->map;
+
/* Run the initializer functions of new objects. Temporarily
disable the exception handler, so that lazy binding failures are
fatal. */
/* Avoid keeping around a dangling reference to the libc.so link
map in case it has been cached in libc_map. */
if (!args.libc_already_loaded)
- GL(dl_ns)[nsid].libc_map = NULL;
+ GL(dl_ns)[args.nsid].libc_map = NULL;
/* Remove the object from memory. It may be in an inconsistent
state if relocation failed, for example. */
the flag here. */
}
- assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
-
/* Release the lock. */
__rtld_lock_unlock_recursive (GL(dl_load_lock));
#if ENABLE_STATIC_PIE
/* Mark symbols hidden in static PIE for early self relocation to work. */
# pragma GCC visibility push(hidden)
+#include <assert.h>
#include <unistd.h>
#include <ldsodefs.h>
+
+#include <dl-machine.h>
+
+#define RESOLVE_MAP(map, scope, sym, version, flags) map
#include "dynamic-link.h"
+#include "get-dynamic-info.h"
/* Relocate static executable with PIE. */
{
struct link_map *main_map = _dl_get_dl_main_map ();
-# define STATIC_PIE_BOOTSTRAP
-# define BOOTSTRAP_MAP (main_map)
-# define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP
-# include "dynamic-link.h"
-
/* Figure out the run-time load address of static PIE. */
main_map->l_addr = elf_machine_load_address ();
/* Read our own dynamic section and fill in the info array. */
main_map->l_ld = ((void *) main_map->l_addr + elf_machine_dynamic ());
- elf_get_dynamic_info (main_map, NULL);
+
+ const ElfW(Phdr) *ph, *phdr = GL(dl_phdr);
+ size_t phnum = GL(dl_phnum);
+ for (ph = phdr; ph < &phdr[phnum]; ++ph)
+ if (ph->p_type == PT_DYNAMIC)
+ {
+ main_map->l_ld_readonly = (ph->p_flags & PF_W) == 0;
+ break;
+ }
+
+ elf_get_dynamic_info (main_map, false, true);
# ifdef ELF_MACHINE_BEFORE_RTLD_RELOC
- ELF_MACHINE_BEFORE_RTLD_RELOC (main_map->l_info);
+ ELF_MACHINE_BEFORE_RTLD_RELOC (main_map, main_map->l_info);
# endif
/* Relocate ourselves so we can do normal function calls and
data access using the global offset table. */
- ELF_DYNAMIC_RELOCATE (main_map, 0, 0, 0);
+ ELF_DYNAMIC_RELOCATE (main_map, NULL, 0, 0, 0);
main_map->l_relocated = 1;
/* Initialize _r_debug. */
}
#endif /* !THREAD_GSCOPE_IN_TCB */
+/* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */
+#define RESOLVE_MAP(l, scope, ref, version, r_type) \
+ ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \
+ && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \
+ ? ((__glibc_unlikely ((*ref) == l->l_lookup_cache.sym) \
+ && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \
+ ? (bump_num_cache_relocations (), \
+ (*ref) = l->l_lookup_cache.ret, \
+ l->l_lookup_cache.value) \
+ : ({ lookup_t _lr; \
+ int _tc = elf_machine_type_class (r_type); \
+ l->l_lookup_cache.type_class = _tc; \
+ l->l_lookup_cache.sym = (*ref); \
+ const struct r_found_version *v = NULL; \
+ if ((version) != NULL && (version)->hash != 0) \
+ v = (version); \
+ _lr = _dl_lookup_symbol_x ((const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name, \
+ l, (ref), scope, v, _tc, \
+ DL_LOOKUP_ADD_DEPENDENCY \
+ | DL_LOOKUP_FOR_RELOCATE, NULL); \
+ l->l_lookup_cache.ret = (*ref); \
+ l->l_lookup_cache.value = _lr; })) \
+ : l)
+
+#include "dynamic-link.h"
+
void
_dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[],
int reloc_mode, int consider_profiling)
int skip_ifunc = reloc_mode & __RTLD_NOIFUNC;
#ifdef SHARED
+ bool consider_symbind = false;
/* If we are auditing, install the same handlers we need for profiling. */
if ((reloc_mode & __RTLD_AUDIT) == 0)
- consider_profiling |= GLRO(dl_audit) != NULL;
+ {
+ struct audit_ifaces *afct = GLRO(dl_audit);
+ for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
+ {
+ /* Profiling is needed only if PLT hooks are provided. */
+ if (afct->ARCH_LA_PLTENTER != NULL
+ || afct->ARCH_LA_PLTEXIT != NULL)
+ consider_profiling = 1;
+ if (afct->symbind != NULL)
+ consider_symbind = true;
+
+ afct = afct->next;
+ }
+ }
#elif defined PROF
/* Never use dynamic linker profiling for gprof profiling code. */
# define consider_profiling 0
+#else
+# define consider_symbind 0
#endif
if (l->l_relocated)
{
/* Do the actual relocation of the object's GOT and other data. */
- /* String table object symbols. */
- const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
-
- /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */
-#define RESOLVE_MAP(ref, version, r_type) \
- ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \
- && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \
- ? ((__builtin_expect ((*ref) == l->l_lookup_cache.sym, 0) \
- && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \
- ? (bump_num_cache_relocations (), \
- (*ref) = l->l_lookup_cache.ret, \
- l->l_lookup_cache.value) \
- : ({ lookup_t _lr; \
- int _tc = elf_machine_type_class (r_type); \
- l->l_lookup_cache.type_class = _tc; \
- l->l_lookup_cache.sym = (*ref); \
- const struct r_found_version *v = NULL; \
- if ((version) != NULL && (version)->hash != 0) \
- v = (version); \
- _lr = _dl_lookup_symbol_x (strtab + (*ref)->st_name, l, (ref), \
- scope, v, _tc, \
- DL_LOOKUP_ADD_DEPENDENCY \
- | DL_LOOKUP_FOR_RELOCATE, NULL); \
- l->l_lookup_cache.ret = (*ref); \
- l->l_lookup_cache.value = _lr; })) \
- : l)
-
-#include "dynamic-link.h"
-
- ELF_DYNAMIC_RELOCATE (l, lazy, consider_profiling, skip_ifunc);
+ ELF_DYNAMIC_RELOCATE (l, scope, lazy, consider_profiling, skip_ifunc);
#ifndef PROF
- if (__glibc_unlikely (consider_profiling)
+ if ((consider_profiling || consider_symbind)
&& l->l_info[DT_PLTRELSZ] != NULL)
{
/* Allocate the array which will contain the already found
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define IN_DL_RUNTIME 1 /* This can be tested in dl-machine.h. */
-
#include <alloca.h>
+#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/param.h>
#include <dl-runtime.h>
-#if (!ELF_MACHINE_NO_RELA && !defined ELF_MACHINE_PLT_REL) \
- || ELF_MACHINE_NO_REL
-# define PLTREL ElfW(Rela)
-#else
-# define PLTREL ElfW(Rel)
-#endif
-
-/* The fixup functions might have need special attributes. If none
- are provided define the macro as empty. */
-#ifndef ARCH_FIXUP_ATTRIBUTE
-# define ARCH_FIXUP_ATTRIBUTE
-#endif
-
/* This function is called through a special trampoline from the PLT the
first time each PLT entry is called. We must perform the relocation
specified in the PLT of the given shared object, and return the resolved
function. */
DL_FIXUP_VALUE_TYPE
-attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
+attribute_hidden __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE
_dl_fixup (
# ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
ELF_MACHINE_RUNTIME_FIXUP_ARGS,
&& __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0))
value = elf_ifunc_invoke (DL_FIXUP_VALUE_ADDR (value));
+#ifdef SHARED
+ /* Auditing checkpoint: we have a new binding. Provide the auditing
+ libraries the possibility to change the value and tell us whether further
+ auditing is wanted.
+ The l_reloc_result is only allocated if there is an audit module which
+ provides a la_symbind. */
+ if (l->l_reloc_result != NULL)
+ {
+ /* This is the address in the array where we store the result of previous
+ relocations. */
+ struct reloc_result *reloc_result
+ = &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))];
+ unsigned int init = atomic_load_acquire (&reloc_result->init);
+ if (init == 0)
+ {
+ _dl_audit_symbind (l, reloc_result, sym, &value, result);
+
+ /* Store the result for later runs. */
+ if (__glibc_likely (! GLRO(dl_bind_not)))
+ {
+ reloc_result->addr = value;
+ /* Guarantee all previous writes complete before init is
+ updated. See CONCURRENCY NOTES below. */
+ atomic_store_release (&reloc_result->init, 1);
+ }
+ }
+ else
+ value = reloc_result->addr;
+ }
+#endif
+
/* Finally, fix up the plt itself. */
if (__glibc_unlikely (GLRO(dl_bind_not)))
return value;
#ifndef PROF
DL_FIXUP_VALUE_TYPE
-__attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
+__attribute ((noinline))
+DL_ARCH_FIXUP_ATTRIBUTE
_dl_profile_fixup (
#ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
ELF_MACHINE_RUNTIME_FIXUP_ARGS,
auditing libraries the possibility to change the value and
tell us whether further auditing is wanted. */
if (defsym != NULL && GLRO(dl_naudit) > 0)
- {
- reloc_result->bound = result;
- /* Compute index of the symbol entry in the symbol table of
- the DSO with the definition. */
- reloc_result->boundndx = (defsym
- - (ElfW(Sym) *) D_PTR (result,
- l_info[DT_SYMTAB]));
-
- /* Determine whether any of the two participating DSOs is
- interested in auditing. */
- if ((l->l_audit_any_plt | result->l_audit_any_plt) != 0)
- {
- unsigned int flags = 0;
- struct audit_ifaces *afct = GLRO(dl_audit);
- /* Synthesize a symbol record where the st_value field is
- the result. */
- ElfW(Sym) sym = *defsym;
- sym.st_value = DL_FIXUP_VALUE_ADDR (value);
-
- /* Keep track whether there is any interest in tracing
- the call in the lower two bits. */
- assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8);
- assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3);
- reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT;
-
- const char *strtab2 = (const void *) D_PTR (result,
- l_info[DT_STRTAB]);
-
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- /* XXX Check whether both DSOs must request action or
- only one */
- struct auditstate *l_state = link_map_audit_state (l, cnt);
- struct auditstate *result_state
- = link_map_audit_state (result, cnt);
- if ((l_state->bindflags & LA_FLG_BINDFROM) != 0
- && (result_state->bindflags & LA_FLG_BINDTO) != 0)
- {
- if (afct->symbind != NULL)
- {
- uintptr_t new_value
- = afct->symbind (&sym, reloc_result->boundndx,
- &l_state->cookie,
- &result_state->cookie,
- &flags,
- strtab2 + defsym->st_name);
- if (new_value != (uintptr_t) sym.st_value)
- {
- flags |= LA_SYMB_ALTVALUE;
- sym.st_value = new_value;
- }
- }
-
- /* Remember the results for every audit library and
- store a summary in the first two bits. */
- reloc_result->enterexit
- &= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT);
- reloc_result->enterexit
- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT))
- << ((cnt + 1) * 2));
- }
- else
- /* If the bind flags say this auditor is not interested,
- set the bits manually. */
- reloc_result->enterexit
- |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)
- << ((cnt + 1) * 2));
-
- afct = afct->next;
- }
-
- reloc_result->flags = flags;
- value = DL_FIXUP_ADDR_VALUE (sym.st_value);
- }
- else
- /* Set all bits since this symbol binding is not interesting. */
- reloc_result->enterexit = (1u << DL_NNS) - 1;
- }
+ _dl_audit_symbind (l, reloc_result, defsym, &value, result);
#endif
/* Store the result for later runs. */
#ifdef SHARED
/* Auditing checkpoint: report the PLT entering and allow the
auditors to change the value. */
- if (GLRO(dl_naudit) > 0
- /* Don't do anything if no auditor wants to intercept this call. */
- && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0)
- {
- /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been
- initialized earlier in this function or in another thread. */
- assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0);
- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
- l_info[DT_SYMTAB])
- + reloc_result->boundndx);
-
- /* Set up the sym parameter. */
- ElfW(Sym) sym = *defsym;
- sym.st_value = DL_FIXUP_VALUE_ADDR (value);
-
- /* Get the symbol name. */
- const char *strtab = (const void *) D_PTR (reloc_result->bound,
- l_info[DT_STRTAB]);
- const char *symname = strtab + sym.st_name;
-
- /* Keep track of overwritten addresses. */
- unsigned int flags = reloc_result->flags;
-
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->ARCH_LA_PLTENTER != NULL
- && (reloc_result->enterexit
- & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0)
- {
- long int new_framesize = -1;
- struct auditstate *l_state = link_map_audit_state (l, cnt);
- struct auditstate *bound_state
- = link_map_audit_state (reloc_result->bound, cnt);
- uintptr_t new_value
- = afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx,
- &l_state->cookie,
- &bound_state->cookie,
- regs, &flags, symname,
- &new_framesize);
- if (new_value != (uintptr_t) sym.st_value)
- {
- flags |= LA_SYMB_ALTVALUE;
- sym.st_value = new_value;
- }
-
- /* Remember the results for every audit library and
- store a summary in the first two bits. */
- reloc_result->enterexit
- |= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT))
- << (2 * (cnt + 1)));
-
- if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT
- << (2 * (cnt + 1))))
- == 0 && new_framesize != -1 && framesize != -2)
- {
- /* If this is the first call providing information,
- use it. */
- if (framesize == -1)
- framesize = new_framesize;
- /* If two pltenter calls provide conflicting information,
- use the larger value. */
- else if (new_framesize != framesize)
- framesize = MAX (new_framesize, framesize);
- }
- }
-
- afct = afct->next;
- }
-
- value = DL_FIXUP_ADDR_VALUE (sym.st_value);
- }
+ _dl_audit_pltenter (l, reloc_result, &value, regs, &framesize);
#endif
/* Store the frame size information. */
}
#endif /* PROF */
-
-
-#include <stdio.h>
-void
-ARCH_FIXUP_ATTRIBUTE
-_dl_call_pltexit (struct link_map *l, ElfW(Word) reloc_arg,
- const void *inregs, void *outregs)
-{
-#ifdef SHARED
- const uintptr_t pltgot = (uintptr_t) D_PTR (l, l_info[DT_PLTGOT]);
-
- /* This is the address in the array where we store the result of previous
- relocations. */
- // XXX Maybe the bound information must be stored on the stack since
- // XXX with bind_not a new value could have been stored in the meantime.
- struct reloc_result *reloc_result =
- &l->l_reloc_result[reloc_index (pltgot, reloc_arg, sizeof (PLTREL))];
- ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
- l_info[DT_SYMTAB])
- + reloc_result->boundndx);
-
- /* Set up the sym parameter. */
- ElfW(Sym) sym = *defsym;
- sym.st_value = DL_FIXUP_VALUE_ADDR (reloc_result->addr);
-
- /* Get the symbol name. */
- const char *strtab = (const void *) D_PTR (reloc_result->bound,
- l_info[DT_STRTAB]);
- const char *symname = strtab + sym.st_name;
-
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->ARCH_LA_PLTEXIT != NULL
- && (reloc_result->enterexit
- & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0)
- {
- struct auditstate *l_state = link_map_audit_state (l, cnt);
- struct auditstate *bound_state
- = link_map_audit_state (reloc_result->bound, cnt);
- afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx,
- &l_state->cookie, &bound_state->cookie,
- inregs, outregs, symname);
- }
-
- afct = afct->next;
- }
-#endif
-}
--- /dev/null
+/* Inline functions for dynamic linking.
+ Copyright (C) 1995-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_STATIC_TLS_H
+#define _DL_STATIC_TLS_H
+
+/* This macro is used as a callback from elf_machine_rel{a,} when a
+ static TLS reloc is about to be performed. Since (in dl-load.c) we
+ permit dynamic loading of objects that might use such relocs, we
+ have to check whether each use is actually doable. If the object
+ whose TLS segment the reference resolves to was allocated space in
+ the static TLS block at startup, then it's ok. Otherwise, we make
+ an attempt to allocate it in surplus space on the fly. If that
+ can't be done, we fall back to the error that DF_STATIC_TLS is
+ intended to produce. */
+#define HAVE_STATIC_TLS(map, sym_map) \
+ (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \
+ && ((sym_map)->l_tls_offset \
+ != FORCED_DYNAMIC_TLS_OFFSET), 1))
+
+#define CHECK_STATIC_TLS(map, sym_map) \
+ do { \
+ if (!HAVE_STATIC_TLS (map, sym_map)) \
+ _dl_allocate_static_tls (sym_map); \
+ } while (0)
+
+#define TRY_STATIC_TLS(map, sym_map) \
+ (__builtin_expect ((sym_map)->l_tls_offset \
+ != FORCED_DYNAMIC_TLS_OFFSET, 1) \
+ && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \
+ || _dl_try_allocate_static_tls (sym_map, true) == 0))
+
+int _dl_try_allocate_static_tls (struct link_map *map, bool optional)
+ attribute_hidden;
+
+#endif
#include <dl-vdso.h>
#include <dl-vdso-setup.h>
#include <dl-auxv.h>
+#include <array_length.h>
extern char *__progname;
char **_dl_argv = &__progname; /* This is checked for some error messages. */
list of loaded objects while an object is added to or removed from
that list. */
__rtld_lock_define_initialized_recursive (, _dl_load_write_lock)
+ /* This lock protects global and module specific TLS related data.
+ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
+ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
+ accessed and when TLS related relocations are processed for a
+ module. It was introduced to keep pthread_create accessing TLS
+ state that is being set up. */
+__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock)
#ifdef HAVE_AUX_VECTOR
+#include <dl-parse_auxv.h>
+
int _dl_clktck;
void
_dl_aux_init (ElfW(auxv_t) *av)
{
- int seen = 0;
- uid_t uid = 0;
- gid_t gid = 0;
-
#ifdef NEED_DL_SYSINFO
/* NB: Avoid RELATIVE relocation in static PIE. */
GL(dl_sysinfo) = DL_SYSINFO_DEFAULT;
#endif
_dl_auxv = av;
- for (; av->a_type != AT_NULL; ++av)
- switch (av->a_type)
- {
- case AT_PAGESZ:
- if (av->a_un.a_val != 0)
- GLRO(dl_pagesize) = av->a_un.a_val;
- break;
- case AT_CLKTCK:
- GLRO(dl_clktck) = av->a_un.a_val;
- break;
- case AT_PHDR:
- GL(dl_phdr) = (const void *) av->a_un.a_val;
- break;
- case AT_PHNUM:
- GL(dl_phnum) = av->a_un.a_val;
- break;
- case AT_PLATFORM:
- GLRO(dl_platform) = (void *) av->a_un.a_val;
- break;
- case AT_HWCAP:
- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val;
- break;
- case AT_HWCAP2:
- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val;
- break;
- case AT_FPUCW:
- GLRO(dl_fpu_control) = av->a_un.a_val;
- break;
-#ifdef NEED_DL_SYSINFO
- case AT_SYSINFO:
- GL(dl_sysinfo) = av->a_un.a_val;
- break;
-#endif
-#ifdef NEED_DL_SYSINFO_DSO
- case AT_SYSINFO_EHDR:
- GL(dl_sysinfo_dso) = (void *) av->a_un.a_val;
- break;
-#endif
- case AT_UID:
- uid ^= av->a_un.a_val;
- seen |= 1;
- break;
- case AT_EUID:
- uid ^= av->a_un.a_val;
- seen |= 2;
- break;
- case AT_GID:
- gid ^= av->a_un.a_val;
- seen |= 4;
- break;
- case AT_EGID:
- gid ^= av->a_un.a_val;
- seen |= 8;
- break;
- case AT_SECURE:
- seen = -1;
- __libc_enable_secure = av->a_un.a_val;
- __libc_enable_secure_decided = 1;
- break;
- case AT_RANDOM:
- _dl_random = (void *) av->a_un.a_val;
- break;
- case AT_MINSIGSTKSZ:
- _dl_minsigstacksize = av->a_un.a_val;
- break;
- DL_PLATFORM_AUXV
- }
- if (seen == 0xf)
- {
- __libc_enable_secure = uid != 0 || gid != 0;
- __libc_enable_secure_decided = 1;
- }
+ dl_parse_auxv_t auxv_values;
+ /* Use an explicit initialization loop here because memset may not
+ be available yet. */
+ for (int i = 0; i < array_length (auxv_values); ++i)
+ auxv_values[i] = 0;
+ _dl_parse_auxv (av, auxv_values);
}
#endif
return &_dl_main_map;
}
#endif
+
+/* This is used by _dl_runtime_profile, not used on static code. */
+void
+DL_ARCH_FIXUP_ATTRIBUTE
+_dl_audit_pltexit (struct link_map *l, ElfW(Word) reloc_arg,
+ const void *inregs, void *outregs)
+{
+}
tell us whether further auditing is wanted. */
if (__glibc_unlikely (GLRO(dl_naudit) > 0))
{
- const char *strtab = (const char *) D_PTR (result,
- l_info[DT_STRTAB]);
- /* Compute index of the symbol entry in the symbol table of
- the DSO with the definition. */
- unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result,
- l_info[DT_SYMTAB]));
-
if (match == NULL)
match = _dl_sym_find_caller_link_map (caller);
-
- if ((match->l_audit_any_plt | result->l_audit_any_plt) != 0)
- {
- unsigned int altvalue = 0;
- struct audit_ifaces *afct = GLRO(dl_audit);
- /* Synthesize a symbol record where the st_value field is
- the result. */
- ElfW(Sym) sym = *ref;
- sym.st_value = (ElfW(Addr)) value;
-
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- struct auditstate *match_audit
- = link_map_audit_state (match, cnt);
- struct auditstate *result_audit
- = link_map_audit_state (result, cnt);
- if (afct->symbind != NULL
- && ((match_audit->bindflags & LA_FLG_BINDFROM) != 0
- || ((result_audit->bindflags & LA_FLG_BINDTO)
- != 0)))
- {
- unsigned int flags = altvalue | LA_SYMB_DLSYM;
- uintptr_t new_value
- = afct->symbind (&sym, ndx,
- &match_audit->cookie,
- &result_audit->cookie,
- &flags, strtab + ref->st_name);
- if (new_value != (uintptr_t) sym.st_value)
- {
- altvalue = LA_SYMB_ALTVALUE;
- sym.st_value = new_value;
- }
- }
-
- afct = afct->next;
- }
-
- value = (void *) sym.st_value;
- }
+ _dl_audit_symbind_alt (match, ref, &value, result);
}
#endif
return value;
-/* Operating system support for run-time dynamic linker. Generic Unix version.
- Copyright (C) 1995-2021 Free Software Foundation, Inc.
+/* Operating system support for run-time dynamic linker. Stub version.
+ Copyright (C) 1995-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* We conditionalize the whole of this file rather than simply eliding it
- from the static build, because other sysdeps/ versions of this file
- might define things needed by a static build. */
-
-#ifdef SHARED
-
-#include <assert.h>
-#include <elf.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <libintl.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <ldsodefs.h>
-#include <_itoa.h>
-#include <fpu_control.h>
-
-#include <entry.h>
-#include <dl-machine.h>
-#include <dl-procinfo.h>
-#include <dl-osinfo.h>
-#include <libc-internal.h>
-#include <tls.h>
-
-#include <dl-tunables.h>
-#include <dl-auxv.h>
-#include <dl-hwcap-check.h>
-
-extern char **_environ attribute_hidden;
-extern char _end[] attribute_hidden;
-
-/* Protect SUID program against misuse of file descriptors. */
-extern void __libc_check_standard_fds (void);
-
-#ifdef NEED_DL_BASE_ADDR
-ElfW(Addr) _dl_base_addr;
-#endif
-int __libc_enable_secure attribute_relro = 0;
-rtld_hidden_data_def (__libc_enable_secure)
-/* This variable contains the lowest stack address ever used. */
-void *__libc_stack_end attribute_relro = NULL;
-rtld_hidden_data_def(__libc_stack_end)
-void *_dl_random attribute_relro = NULL;
-
-#ifndef DL_FIND_ARG_COMPONENTS
-# define DL_FIND_ARG_COMPONENTS(cookie, argc, argv, envp, auxp) \
- do { \
- void **_tmp; \
- (argc) = *(long int *) cookie; \
- (argv) = (char **) ((long int *) cookie + 1); \
- (envp) = (argv) + (argc) + 1; \
- for (_tmp = (void **) (envp); *_tmp; ++_tmp) \
- continue; \
- (auxp) = (void *) ++_tmp; \
- } while (0)
-#endif
-
-#ifndef DL_STACK_END
-# define DL_STACK_END(cookie) ((void *) (cookie))
-#endif
-
-ElfW(Addr)
-_dl_sysdep_start (void **start_argptr,
- void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum,
- ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv))
-{
- const ElfW(Phdr) *phdr = NULL;
- ElfW(Word) phnum = 0;
- ElfW(Addr) user_entry;
- ElfW(auxv_t) *av;
-#ifdef HAVE_AUX_SECURE
-# define set_seen(tag) (tag) /* Evaluate for the side effects. */
-# define set_seen_secure() ((void) 0)
-#else
- uid_t uid = 0;
- gid_t gid = 0;
- unsigned int seen = 0;
-# define set_seen_secure() (seen = -1)
-# ifdef HAVE_AUX_XID
-# define set_seen(tag) (tag) /* Evaluate for the side effects. */
-# else
-# define M(type) (1 << (type))
-# define set_seen(tag) seen |= M ((tag)->a_type)
-# endif
-#endif
-#ifdef NEED_DL_SYSINFO
- uintptr_t new_sysinfo = 0;
-#endif
-
- __libc_stack_end = DL_STACK_END (start_argptr);
- DL_FIND_ARG_COMPONENTS (start_argptr, _dl_argc, _dl_argv, _environ,
- GLRO(dl_auxv));
-
- user_entry = (ElfW(Addr)) ENTRY_POINT;
- GLRO(dl_platform) = NULL; /* Default to nothing known about the platform. */
-
- /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */
- _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ),
- "CONSTANT_MINSIGSTKSZ is constant");
- GLRO(dl_minsigstacksize) = CONSTANT_MINSIGSTKSZ;
-
- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; set_seen (av++))
- switch (av->a_type)
- {
- case AT_PHDR:
- phdr = (void *) av->a_un.a_val;
- break;
- case AT_PHNUM:
- phnum = av->a_un.a_val;
- break;
- case AT_PAGESZ:
- GLRO(dl_pagesize) = av->a_un.a_val;
- break;
- case AT_ENTRY:
- user_entry = av->a_un.a_val;
- break;
-#ifdef NEED_DL_BASE_ADDR
- case AT_BASE:
- _dl_base_addr = av->a_un.a_val;
- break;
-#endif
-#ifndef HAVE_AUX_SECURE
- case AT_UID:
- case AT_EUID:
- uid ^= av->a_un.a_val;
- break;
- case AT_GID:
- case AT_EGID:
- gid ^= av->a_un.a_val;
- break;
-#endif
- case AT_SECURE:
-#ifndef HAVE_AUX_SECURE
- seen = -1;
-#endif
- __libc_enable_secure = av->a_un.a_val;
- break;
- case AT_PLATFORM:
- GLRO(dl_platform) = (void *) av->a_un.a_val;
- break;
- case AT_HWCAP:
- GLRO(dl_hwcap) = (unsigned long int) av->a_un.a_val;
- break;
- case AT_HWCAP2:
- GLRO(dl_hwcap2) = (unsigned long int) av->a_un.a_val;
- break;
- case AT_CLKTCK:
- GLRO(dl_clktck) = av->a_un.a_val;
- break;
- case AT_FPUCW:
- GLRO(dl_fpu_control) = av->a_un.a_val;
- break;
-#ifdef NEED_DL_SYSINFO
- case AT_SYSINFO:
- new_sysinfo = av->a_un.a_val;
- break;
-#endif
-#ifdef NEED_DL_SYSINFO_DSO
- case AT_SYSINFO_EHDR:
- GLRO(dl_sysinfo_dso) = (void *) av->a_un.a_val;
- break;
-#endif
- case AT_RANDOM:
- _dl_random = (void *) av->a_un.a_val;
- break;
- case AT_MINSIGSTKSZ:
- GLRO(dl_minsigstacksize) = av->a_un.a_val;
- break;
- DL_PLATFORM_AUXV
- }
-
- dl_hwcap_check ();
-
-#ifndef HAVE_AUX_SECURE
- if (seen != -1)
- {
- /* Fill in the values we have not gotten from the kernel through the
- auxiliary vector. */
-# ifndef HAVE_AUX_XID
-# define SEE(UID, var, uid) \
- if ((seen & M (AT_##UID)) == 0) var ^= __get##uid ()
- SEE (UID, uid, uid);
- SEE (EUID, uid, euid);
- SEE (GID, gid, gid);
- SEE (EGID, gid, egid);
-# endif
-
- /* If one of the two pairs of IDs does not match this is a setuid
- or setgid run. */
- __libc_enable_secure = uid | gid;
- }
-#endif
-
-#ifndef HAVE_AUX_PAGESIZE
- if (GLRO(dl_pagesize) == 0)
- GLRO(dl_pagesize) = __getpagesize ();
-#endif
-
-#ifdef NEED_DL_SYSINFO
- if (new_sysinfo != 0)
- {
-# ifdef NEED_DL_SYSINFO_DSO
- /* Only set the sysinfo value if we also have the vsyscall DSO. */
- if (GLRO(dl_sysinfo_dso) != 0)
-# endif
- GLRO(dl_sysinfo) = new_sysinfo;
- }
-#endif
-
- __tunables_init (_environ);
-
-#ifdef DL_SYSDEP_INIT
- DL_SYSDEP_INIT;
-#endif
-
-#ifdef DL_PLATFORM_INIT
- DL_PLATFORM_INIT;
-#endif
-
- /* Determine the length of the platform name. */
- if (GLRO(dl_platform) != NULL)
- GLRO(dl_platformlen) = strlen (GLRO(dl_platform));
-
- if (__sbrk (0) == _end)
- /* The dynamic linker was run as a program, and so the initial break
- starts just after our bss, at &_end. The malloc in dl-minimal.c
- will consume the rest of this page, so tell the kernel to move the
- break up that far. When the user program examines its break, it
- will see this new value and not clobber our data. */
- __sbrk (GLRO(dl_pagesize)
- - ((_end - (char *) 0) & (GLRO(dl_pagesize) - 1)));
-
- /* If this is a SUID program we make sure that FDs 0, 1, and 2 are
- allocated. If necessary we are doing it ourself. If it is not
- possible we stop the program. */
- if (__builtin_expect (__libc_enable_secure, 0))
- __libc_check_standard_fds ();
-
- (*dl_main) (phdr, phnum, &user_entry, GLRO(dl_auxv));
- return user_entry;
-}
-
-void
-_dl_sysdep_start_cleanup (void)
-{
-}
-
-void
-_dl_show_auxv (void)
-{
- char buf[64];
- ElfW(auxv_t) *av;
-
- /* Terminate string. */
- buf[63] = '\0';
-
- /* The following code assumes that the AT_* values are encoded
- starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values
- close by (otherwise the array will be too large). In case we have
- to support a platform where these requirements are not fulfilled
- some alternative implementation has to be used. */
- for (av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av)
- {
- static const struct
- {
- const char label[22];
- enum { unknown = 0, dec, hex, str, ignore } form : 8;
- } auxvars[] =
- {
- [AT_EXECFD - 2] = { "EXECFD: ", dec },
- [AT_EXECFN - 2] = { "EXECFN: ", str },
- [AT_PHDR - 2] = { "PHDR: 0x", hex },
- [AT_PHENT - 2] = { "PHENT: ", dec },
- [AT_PHNUM - 2] = { "PHNUM: ", dec },
- [AT_PAGESZ - 2] = { "PAGESZ: ", dec },
- [AT_BASE - 2] = { "BASE: 0x", hex },
- [AT_FLAGS - 2] = { "FLAGS: 0x", hex },
- [AT_ENTRY - 2] = { "ENTRY: 0x", hex },
- [AT_NOTELF - 2] = { "NOTELF: ", hex },
- [AT_UID - 2] = { "UID: ", dec },
- [AT_EUID - 2] = { "EUID: ", dec },
- [AT_GID - 2] = { "GID: ", dec },
- [AT_EGID - 2] = { "EGID: ", dec },
- [AT_PLATFORM - 2] = { "PLATFORM: ", str },
- [AT_HWCAP - 2] = { "HWCAP: ", hex },
- [AT_CLKTCK - 2] = { "CLKTCK: ", dec },
- [AT_FPUCW - 2] = { "FPUCW: ", hex },
- [AT_DCACHEBSIZE - 2] = { "DCACHEBSIZE: 0x", hex },
- [AT_ICACHEBSIZE - 2] = { "ICACHEBSIZE: 0x", hex },
- [AT_UCACHEBSIZE - 2] = { "UCACHEBSIZE: 0x", hex },
- [AT_IGNOREPPC - 2] = { "IGNOREPPC", ignore },
- [AT_SECURE - 2] = { "SECURE: ", dec },
- [AT_BASE_PLATFORM - 2] = { "BASE_PLATFORM: ", str },
- [AT_SYSINFO - 2] = { "SYSINFO: 0x", hex },
- [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex },
- [AT_RANDOM - 2] = { "RANDOM: 0x", hex },
- [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex },
- [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ ", dec },
- [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec },
- [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex },
- [AT_L1D_CACHESIZE - 2] = { "L1D_CACHESIZE: ", dec },
- [AT_L1D_CACHEGEOMETRY - 2] = { "L1D_CACHEGEOMETRY: 0x", hex },
- [AT_L2_CACHESIZE - 2] = { "L2_CACHESIZE: ", dec },
- [AT_L2_CACHEGEOMETRY - 2] = { "L2_CACHEGEOMETRY: 0x", hex },
- [AT_L3_CACHESIZE - 2] = { "L3_CACHESIZE: ", dec },
- [AT_L3_CACHEGEOMETRY - 2] = { "L3_CACHEGEOMETRY: 0x", hex },
- };
- unsigned int idx = (unsigned int) (av->a_type - 2);
-
- if ((unsigned int) av->a_type < 2u
- || (idx < sizeof (auxvars) / sizeof (auxvars[0])
- && auxvars[idx].form == ignore))
- continue;
-
- assert (AT_NULL == 0);
- assert (AT_IGNORE == 1);
-
- /* Some entries are handled in a special way per platform. */
- if (_dl_procinfo (av->a_type, av->a_un.a_val) == 0)
- continue;
-
- if (idx < sizeof (auxvars) / sizeof (auxvars[0])
- && auxvars[idx].form != unknown)
- {
- const char *val = (char *) av->a_un.a_val;
-
- if (__builtin_expect (auxvars[idx].form, dec) == dec)
- val = _itoa ((unsigned long int) av->a_un.a_val,
- buf + sizeof buf - 1, 10, 0);
- else if (__builtin_expect (auxvars[idx].form, hex) == hex)
- val = _itoa ((unsigned long int) av->a_un.a_val,
- buf + sizeof buf - 1, 16, 0);
-
- _dl_printf ("AT_%s%s\n", auxvars[idx].label, val);
-
- continue;
- }
-
- /* Unknown value: print a generic line. */
- char buf2[17];
- buf2[sizeof (buf2) - 1] = '\0';
- const char *val2 = _itoa ((unsigned long int) av->a_un.a_val,
- buf2 + sizeof buf2 - 1, 16, 0);
- const char *val = _itoa ((unsigned long int) av->a_type,
- buf + sizeof buf - 1, 16, 0);
- _dl_printf ("AT_??? (0x%s): 0x%s\n", val, val2);
- }
-}
-
-#endif
+#error dl-sysdep support missing.
}
+/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
+ for the TLS space. The DTV may be resized, and so this function may
+ call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
+ is taken when manipulating global TLS-related data in the loader. */
void *
-_dl_allocate_tls_init (void *result)
+_dl_allocate_tls_init (void *result, bool init_tls)
{
if (result == NULL)
/* The memory allocation failed. */
size_t maxgen = 0;
/* Protects global dynamic TLS related state. */
- __rtld_lock_lock_recursive (GL(dl_load_lock));
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
/* Check if the current dtv is big enough. */
if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
some platforms use in static programs requires it. */
dtv[map->l_tls_modid].pointer.val = dest;
- /* Copy the initialization image and clear the BSS part. */
+ /* Copy the initialization image and clear the BSS part. For
+ audit modules or dependencies with initial-exec TLS, we can not
+ set the initial TLS image on default loader initialization
+ because it would already be set by the audit setup. However,
+ subsequent thread creation would need to follow the default
+ behaviour. */
+ if (map->l_ns != LM_ID_BASE && !init_tls)
+ continue;
memset (__mempcpy (dest, map->l_tls_initimage,
map->l_tls_initimage_size), '\0',
map->l_tls_blocksize - map->l_tls_initimage_size);
listp = listp->next;
assert (listp != NULL);
}
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
/* The DTV version is up-to-date now. */
dtv[0].counter = maxgen;
{
return _dl_allocate_tls_init (mem == NULL
? _dl_allocate_tls_storage ()
- : allocate_dtv (mem));
+ : allocate_dtv (mem), true);
}
rtld_hidden_def (_dl_allocate_tls)
Here the dtv needs to be updated to new_gen generation count.
- This code may be called during TLS access when GL(dl_load_lock)
+ This code may be called during TLS access when GL(dl_load_tls_lock)
is not held. In that case the user code has to synchronize with
dlopen and dlclose calls of relevant modules. A module m is
relevant if the generation of m <= new_gen and dlclose of m is
if (__glibc_unlikely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
{
- __rtld_lock_lock_recursive (GL(dl_load_lock));
+ __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
{
the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
}
else if (__glibc_likely (the_map->l_tls_offset
!= FORCED_DYNAMIC_TLS_OFFSET))
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
dtv[GET_ADDR_MODULE].pointer.val = p;
return (char *) p + GET_ADDR_OFFSET;
}
else
- __rtld_lock_unlock_recursive (GL(dl_load_lock));
+ __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
}
struct dtv_pointer result = allocate_and_init (the_map);
dtv[GET_ADDR_MODULE].pointer = result;
return NULL;
dtv_t *dtv = THREAD_DTV ();
- /* This may be called without holding the GL(dl_load_lock). Reading
+ /* This may be called without holding the GL(dl_load_tls_lock). Reading
arbitrary gen value is fine since this is best effort code. */
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
if (__glibc_unlikely (dtv[0].counter != gen))
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <ldsodefs.h>
+
/* This file may be included twice, to define both
`elf_dynamic_do_rel' and `elf_dynamic_do_rela'. */
relocations; they should be set up to call _dl_runtime_resolve, rather
than fully resolved now. */
-auto inline void __attribute__ ((always_inline))
-elf_dynamic_do_Rel (struct link_map *map,
+static inline void __attribute__ ((always_inline))
+elf_dynamic_do_Rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) reladdr, ElfW(Addr) relsize,
__typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative,
int lazy, int skip_ifunc)
}
else
# endif
- elf_machine_lazy_rel (map, l_addr, r, skip_ifunc);
+ elf_machine_lazy_rel (map, scope, l_addr, r, skip_ifunc);
# ifdef ELF_MACHINE_IRELATIVE
if (r2 != NULL)
for (; r2 <= end2; ++r2)
if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE)
- elf_machine_lazy_rel (map, l_addr, r2, skip_ifunc);
+ elf_machine_lazy_rel (map, scope, l_addr, r2, skip_ifunc);
# endif
}
else
for (; r < end; ++r)
{
+ ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
+ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)];
+ void *const r_addr_arg = (void *) (l_addr + r->r_offset);
+ const struct r_found_version *rversion = &map->l_versions[ndx];
#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP
if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
{
}
#endif
- ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
- &map->l_versions[ndx],
- (void *) (l_addr + r->r_offset), skip_ifunc);
+ elf_machine_rel (map, scope, r, sym, rversion, r_addr_arg,
+ skip_ifunc);
+#if defined SHARED && !defined RTLD_BOOTSTRAP
+ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT
+ && GLRO(dl_naudit) > 0)
+ {
+ struct link_map *sym_map
+ = RESOLVE_MAP (map, scope, &sym, rversion,
+ ELF_MACHINE_JMP_SLOT);
+ if (sym != NULL)
+ _dl_audit_symbind (map, NULL, sym, r_addr_arg, sym_map);
+ }
+#endif
}
#if defined ELF_MACHINE_IRELATIVE && !defined RTLD_BOOTSTRAP
{
ElfW(Half) ndx
= version[ELFW(R_SYM) (r2->r_info)] & 0x7fff;
- elf_machine_rel (map, r2,
+ elf_machine_rel (map, scope, r2,
&symtab[ELFW(R_SYM) (r2->r_info)],
&map->l_versions[ndx],
(void *) (l_addr + r2->r_offset),
else
{
for (; r < end; ++r)
+ {
+ const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (r->r_info)];
+ void *const r_addr_arg = (void *) (l_addr + r->r_offset);
# ifdef ELF_MACHINE_IRELATIVE
- if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
- {
- if (r2 == NULL)
- r2 = r;
- end2 = r;
- }
- else
+ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_IRELATIVE)
+ {
+ if (r2 == NULL)
+ r2 = r;
+ end2 = r;
+ continue;
+ }
# endif
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
- (void *) (l_addr + r->r_offset), skip_ifunc);
+ elf_machine_rel (map, scope, r, sym, NULL, r_addr_arg,
+ skip_ifunc);
+# if defined SHARED && !defined RTLD_BOOTSTRAP
+ if (ELFW(R_TYPE) (r->r_info) == ELF_MACHINE_JMP_SLOT
+ && GLRO(dl_naudit) > 0)
+ {
+ struct link_map *sym_map
+ = RESOLVE_MAP (map, scope, &sym,
+ (struct r_found_version *) NULL,
+ ELF_MACHINE_JMP_SLOT);
+ if (sym != NULL)
+ _dl_audit_symbind (map, NULL , sym,r_addr_arg, sym_map);
+ }
+# endif
+ }
# ifdef ELF_MACHINE_IRELATIVE
if (r2 != NULL)
for (; r2 <= end2; ++r2)
if (ELFW(R_TYPE) (r2->r_info) == ELF_MACHINE_IRELATIVE)
- elf_machine_rel (map, r2, &symtab[ELFW(R_SYM) (r2->r_info)],
+ elf_machine_rel (map, scope, r2, &symtab[ELFW(R_SYM) (r2->r_info)],
NULL, (void *) (l_addr + r2->r_offset),
skip_ifunc);
# endif
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* This macro is used as a callback from elf_machine_rel{a,} when a
- static TLS reloc is about to be performed. Since (in dl-load.c) we
- permit dynamic loading of objects that might use such relocs, we
- have to check whether each use is actually doable. If the object
- whose TLS segment the reference resolves to was allocated space in
- the static TLS block at startup, then it's ok. Otherwise, we make
- an attempt to allocate it in surplus space on the fly. If that
- can't be done, we fall back to the error that DF_STATIC_TLS is
- intended to produce. */
-#define HAVE_STATIC_TLS(map, sym_map) \
- (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET \
- && ((sym_map)->l_tls_offset \
- != FORCED_DYNAMIC_TLS_OFFSET), 1))
-
-#define CHECK_STATIC_TLS(map, sym_map) \
- do { \
- if (!HAVE_STATIC_TLS (map, sym_map)) \
- _dl_allocate_static_tls (sym_map); \
- } while (0)
-
-#define TRY_STATIC_TLS(map, sym_map) \
- (__builtin_expect ((sym_map)->l_tls_offset \
- != FORCED_DYNAMIC_TLS_OFFSET, 1) \
- && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1) \
- || _dl_try_allocate_static_tls (sym_map, true) == 0))
-
-int _dl_try_allocate_static_tls (struct link_map *map, bool optional)
- attribute_hidden;
-
+#include <dl-machine.h>
#include <elf.h>
#ifdef RESOLVE_MAP
copying memory, breaking the very code written to handle the
unaligned cases. */
# if ! ELF_MACHINE_NO_REL
-auto inline void __attribute__((always_inline))
-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
+static inline void __attribute__((always_inline))
+elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rel) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
void *const reloc_addr, int skip_ifunc);
-auto inline void __attribute__((always_inline))
+static inline void __attribute__((always_inline))
elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc,
void *const reloc_addr);
# endif
# if ! ELF_MACHINE_NO_RELA
-auto inline void __attribute__((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
- void *const reloc_addr, int skip_ifunc);
-auto inline void __attribute__((always_inline))
+static inline void __attribute__((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version, void *const reloc_addr,
+ int skip_ifunc);
+static inline void __attribute__((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr);
# endif
# if ELF_MACHINE_NO_RELA || defined ELF_MACHINE_PLT_REL
-auto inline void __attribute__((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr, const ElfW(Rel) *reloc,
int skip_ifunc);
# else
-auto inline void __attribute__((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
int skip_ifunc);
# endif
#endif
-#include <dl-machine.h>
-
-#include "get-dynamic-info.h"
-
#ifdef RESOLVE_MAP
# if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP
consumes precisely the very end of the DT_REL*, or DT_JMPREL and DT_REL*
are completely separate and there is a gap between them. */
-# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, do_lazy, skip_ifunc, test_rel) \
+# define _ELF_DYNAMIC_DO_RELOC(RELOC, reloc, map, scope, do_lazy, skip_ifunc, test_rel) \
do { \
struct { ElfW(Addr) start, size; \
__typeof (((ElfW(Dyn) *) 0)->d_un.d_val) nrelative; int lazy; } \
} \
\
if (ELF_DURING_STARTUP) \
- elf_dynamic_do_##reloc ((map), ranges[0].start, ranges[0].size, \
- ranges[0].nrelative, 0, skip_ifunc); \
+ elf_dynamic_do_##reloc ((map), scope, ranges[0].start, ranges[0].size, \
+ ranges[0].nrelative, 0, skip_ifunc); \
else \
{ \
int ranges_index; \
for (ranges_index = 0; ranges_index < 2; ++ranges_index) \
- elf_dynamic_do_##reloc ((map), \
+ elf_dynamic_do_##reloc ((map), scope, \
ranges[ranges_index].start, \
ranges[ranges_index].size, \
ranges[ranges_index].nrelative, \
ranges[ranges_index].lazy, \
- skip_ifunc); \
+ skip_ifunc); \
} \
} while (0)
# if ! ELF_MACHINE_NO_REL
# include "do-rel.h"
-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) \
- _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, lazy, skip_ifunc, _ELF_CHECK_REL)
+# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) \
+ _ELF_DYNAMIC_DO_RELOC (REL, Rel, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL)
# else
-# define ELF_DYNAMIC_DO_REL(map, lazy, skip_ifunc) /* Nothing to do. */
+# define ELF_DYNAMIC_DO_REL(map, scope, lazy, skip_ifunc) /* Nothing to do. */
# endif
# if ! ELF_MACHINE_NO_RELA
# define DO_RELA
# include "do-rel.h"
-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) \
- _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, lazy, skip_ifunc, _ELF_CHECK_REL)
+# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) \
+ _ELF_DYNAMIC_DO_RELOC (RELA, Rela, map, scope, lazy, skip_ifunc, _ELF_CHECK_REL)
# else
-# define ELF_DYNAMIC_DO_RELA(map, lazy, skip_ifunc) /* Nothing to do. */
+# define ELF_DYNAMIC_DO_RELA(map, scope, lazy, skip_ifunc) /* Nothing to do. */
# endif
/* This can't just be an inline function because GCC is too dumb
to inline functions containing inlines themselves. */
-# define ELF_DYNAMIC_RELOCATE(map, lazy, consider_profile, skip_ifunc) \
+# define ELF_DYNAMIC_RELOCATE(map, scope, lazy, consider_profile, skip_ifunc) \
do { \
- int edr_lazy = elf_machine_runtime_setup ((map), (lazy), \
+ int edr_lazy = elf_machine_runtime_setup ((map), (scope), (lazy), \
(consider_profile)); \
- ELF_DYNAMIC_DO_REL ((map), edr_lazy, skip_ifunc); \
- ELF_DYNAMIC_DO_RELA ((map), edr_lazy, skip_ifunc); \
+ ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc); \
+ ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc); \
} while (0)
#endif
#include <startup.h>
#include <libc-internal.h>
-/* If nonzero __libc_enable_secure is already set. */
-int __libc_enable_secure_decided;
/* Safest assumption, if somehow the initializer isn't run. */
int __libc_enable_secure = 1;
-
-void
-__libc_init_secure (void)
-{
- if (__libc_enable_secure_decided == 0)
- __libc_enable_secure = (startup_geteuid () != startup_getuid ()
- || startup_getegid () != startup_getgid ());
-}
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* This file is included multiple times and therefore lacks a header
- file inclusion guard. */
+/* Populate dynamic tags in l_info. */
+
+#ifndef _GET_DYNAMIC_INFO_H
+#define _GET_DYNAMIC_INFO_H
#include <assert.h>
+#include <dl-machine-rel.h>
#include <libc-diag.h>
-#ifndef RESOLVE_MAP
-static
-#else
-auto
-#endif
-inline void __attribute__ ((unused, always_inline))
-elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
+static inline void __attribute__ ((unused, always_inline))
+elf_get_dynamic_info (struct link_map *l, bool bootstrap,
+ bool static_pie_bootstrap)
{
#if __ELF_NATIVE_CLASS == 32
typedef Elf32_Word d_tag_utype;
typedef Elf64_Xword d_tag_utype;
#endif
-#if !defined RTLD_BOOTSTRAP && !defined STATIC_PIE_BOOTSTRAP
- if (l->l_ld == NULL)
+#ifndef STATIC_PIE_BOOTSTRAP
+ if (!bootstrap && l->l_ld == NULL)
return;
#endif
info[i] = dyn;
}
-#define DL_RO_DYN_TEMP_CNT 8
-
-#ifndef DL_RO_DYN_SECTION
/* Don't adjust .dynamic unnecessarily. */
- if (l->l_addr != 0)
+ if (l->l_addr != 0 && dl_relocate_ld (l))
{
ElfW(Addr) l_addr = l->l_addr;
- int cnt = 0;
# define ADJUST_DYN_INFO(tag) \
do \
if (info[tag] != NULL) \
- { \
- if (temp) \
- { \
- temp[cnt].d_tag = info[tag]->d_tag; \
- temp[cnt].d_un.d_ptr = info[tag]->d_un.d_ptr + l_addr; \
- info[tag] = temp + cnt++; \
- } \
- else \
- info[tag]->d_un.d_ptr += l_addr; \
- } \
+ info[tag]->d_un.d_ptr += l_addr; \
while (0)
ADJUST_DYN_INFO (DT_HASH);
ADJUST_DYN_INFO (VERSYMIDX (DT_VERSYM));
ADJUST_DYN_INFO (ADDRIDX (DT_GNU_HASH));
# undef ADJUST_DYN_INFO
- assert (cnt <= DL_RO_DYN_TEMP_CNT);
}
-#endif
if (info[DT_PLTREL] != NULL)
{
#if ELF_MACHINE_NO_RELA
if (info[DT_REL] != NULL)
assert (info[DT_RELENT]->d_un.d_val == sizeof (ElfW(Rel)));
#endif
-#ifdef RTLD_BOOTSTRAP
- /* Only the bind now flags are allowed. */
- assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL
- || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0);
- /* Flags must not be set for ld.so. */
- assert (info[DT_FLAGS] == NULL
- || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0);
-#endif
-#if defined RTLD_BOOTSTRAP || defined STATIC_PIE_BOOTSTRAP
- assert (info[DT_RUNPATH] == NULL);
- assert (info[DT_RPATH] == NULL);
-#else
- if (info[DT_FLAGS] != NULL)
+ if (bootstrap || static_pie_bootstrap)
{
- /* Flags are used. Translate to the old form where available.
- Since these l_info entries are only tested for NULL pointers it
- is ok if they point to the DT_FLAGS entry. */
- l->l_flags = info[DT_FLAGS]->d_un.d_val;
-
- if (l->l_flags & DF_SYMBOLIC)
- info[DT_SYMBOLIC] = info[DT_FLAGS];
- if (l->l_flags & DF_TEXTREL)
- info[DT_TEXTREL] = info[DT_FLAGS];
- if (l->l_flags & DF_BIND_NOW)
- info[DT_BIND_NOW] = info[DT_FLAGS];
+ assert (info[DT_RUNPATH] == NULL);
+ assert (info[DT_RPATH] == NULL);
}
- if (info[VERSYMIDX (DT_FLAGS_1)] != NULL)
+ if (bootstrap)
{
- l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
- if (l->l_flags_1 & DF_1_NODELETE)
- l->l_nodelete_pending = true;
-
- /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like
- to assert this, but we can't. Users have been setting
- unsupported DF_1_* flags for a long time and glibc has ignored
- them. Therefore to avoid breaking existing applications the
- best we can do is add a warning during debugging with the
- intent of notifying the user of the problem. */
- if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0)
- && l->l_flags_1 & ~DT_1_SUPPORTED_MASK)
- _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x in DT_FLAGS_1.\n",
- l->l_flags_1 & ~DT_1_SUPPORTED_MASK);
-
- if (l->l_flags_1 & DF_1_NOW)
- info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)];
+ /* Only the bind now flags are allowed. */
+ assert (info[VERSYMIDX (DT_FLAGS_1)] == NULL
+ || (info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val & ~DF_1_NOW) == 0);
+ /* Flags must not be set for ld.so. */
+ assert (info[DT_FLAGS] == NULL
+ || (info[DT_FLAGS]->d_un.d_val & ~DF_BIND_NOW) == 0);
}
- if (info[DT_RUNPATH] != NULL)
- /* If both RUNPATH and RPATH are given, the latter is ignored. */
- info[DT_RPATH] = NULL;
-#endif
+ else
+ {
+ if (info[DT_FLAGS] != NULL)
+ {
+ /* Flags are used. Translate to the old form where available.
+ Since these l_info entries are only tested for NULL pointers it
+ is ok if they point to the DT_FLAGS entry. */
+ l->l_flags = info[DT_FLAGS]->d_un.d_val;
+
+ if (l->l_flags & DF_SYMBOLIC)
+ info[DT_SYMBOLIC] = info[DT_FLAGS];
+ if (l->l_flags & DF_TEXTREL)
+ info[DT_TEXTREL] = info[DT_FLAGS];
+ if (l->l_flags & DF_BIND_NOW)
+ info[DT_BIND_NOW] = info[DT_FLAGS];
+ }
+
+ if (info[VERSYMIDX (DT_FLAGS_1)] != NULL)
+ {
+ l->l_flags_1 = info[VERSYMIDX (DT_FLAGS_1)]->d_un.d_val;
+ if (l->l_flags_1 & DF_1_NODELETE)
+ l->l_nodelete_pending = true;
+
+ /* Only DT_1_SUPPORTED_MASK bits are supported, and we would like
+ to assert this, but we can't. Users have been setting
+ unsupported DF_1_* flags for a long time and glibc has ignored
+ them. Therefore to avoid breaking existing applications the
+ best we can do is add a warning during debugging with the
+ intent of notifying the user of the problem. */
+ if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0)
+ && l->l_flags_1 & ~DT_1_SUPPORTED_MASK)
+ _dl_debug_printf ("\nWARNING: Unsupported flag value(s) of 0x%x "
+ "in DT_FLAGS_1.\n",
+ l->l_flags_1 & ~DT_1_SUPPORTED_MASK);
+
+ if (l->l_flags_1 & DF_1_NOW)
+ info[DT_BIND_NOW] = info[VERSYMIDX (DT_FLAGS_1)];
+ }
+
+ if (info[DT_RUNPATH] != NULL)
+ /* If both RUNPATH and RPATH are given, the latter is ignored. */
+ info[DT_RPATH] = NULL;
+ }
}
+
+#endif
entry->path[--i] = '\0';
if (i == 0)
- return;
+ {
+ free (entry->path);
+ free (entry);
+ return;
+ }
char *path = entry->path;
if (opt_chroot != NULL)
#ifdef __USE_GNU
/* Version numbers for la_version handshake interface. */
-#define LAV_CURRENT 1
+#include <bits/link_lavcurrent.h>
/* Activity types signaled through la_activity. */
enum
#include <fpu_control.h>
#include <hp-timing.h>
#include <libc-lock.h>
-#include "dynamic-link.h"
#include <dl-librecon.h>
#include <unsecvars.h>
#include <dl-cache.h>
#include <dl-main.h>
#include <gnu/lib-names.h>
#include <dl-tunables.h>
+#include <get-dynamic-info.h>
+#include <dl-audit-check.h>
#include <assert.h>
+/* This #define produces dynamic linking inline functions for
+ bootstrap relocation instead of general-purpose relocation.
+ Since ld.so must not have any undefined symbols the result
+ is trivial: always the map of ld.so itself. */
+#define RTLD_BOOTSTRAP
+#define RESOLVE_MAP(map, scope, sym, version, flags) map
+#include "dynamic-link.h"
+
/* Only enables rtld profiling for architectures which provides non generic
hp-timing support. The generic support requires either syscall
(clock_gettime), which will incur in extra overhead on loading time.
#ifdef _LIBC_REENTRANT
._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
+ ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
#endif
._dl_nns = 1,
._dl_ns =
#ifndef DONT_USE_BOOTSTRAP_MAP
GL(dl_rtld_map).l_addr = info->l.l_addr;
GL(dl_rtld_map).l_ld = info->l.l_ld;
+ GL(dl_rtld_map).l_ld_readonly = info->l.l_ld_readonly;
memcpy (GL(dl_rtld_map).l_info, info->l.l_info,
sizeof GL(dl_rtld_map).l_info);
GL(dl_rtld_map).l_mach = info->l.l_mach;
return start_addr;
}
-static ElfW(Addr) __attribute_used__
-_dl_start (void *arg)
-{
#ifdef DONT_USE_BOOTSTRAP_MAP
# define bootstrap_map GL(dl_rtld_map)
#else
- struct dl_start_final_info info;
# define bootstrap_map info.l
#endif
- /* This #define produces dynamic linking inline functions for
- bootstrap relocation instead of general-purpose relocation.
- Since ld.so must not have any undefined symbols the result
- is trivial: always the map of ld.so itself. */
-#define RTLD_BOOTSTRAP
-#define BOOTSTRAP_MAP (&bootstrap_map)
-#define RESOLVE_MAP(sym, version, flags) BOOTSTRAP_MAP
-#include "dynamic-link.h"
-
+static ElfW(Addr) __attribute_used__
+_dl_start (void *arg)
+{
#ifdef DONT_USE_BOOTSTRAP_MAP
rtld_timer_start (&start_time);
#else
+ struct dl_start_final_info info;
rtld_timer_start (&info.start_time);
#endif
/* Read our own dynamic section and fill in the info array. */
bootstrap_map.l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic ();
- elf_get_dynamic_info (&bootstrap_map, NULL);
+ bootstrap_map.l_ld_readonly = DL_RO_DYN_SECTION;
+ elf_get_dynamic_info (&bootstrap_map, true, false);
#if NO_TLS_OFFSET != 0
bootstrap_map.l_tls_offset = NO_TLS_OFFSET;
#endif
#ifdef ELF_MACHINE_BEFORE_RTLD_RELOC
- ELF_MACHINE_BEFORE_RTLD_RELOC (bootstrap_map.l_info);
+ ELF_MACHINE_BEFORE_RTLD_RELOC (&bootstrap_map, bootstrap_map.l_info);
#endif
if (bootstrap_map.l_addr || ! bootstrap_map.l_info[VALIDX(DT_GNU_PRELINKED)])
/* Relocate ourselves so we can do normal function calls and
data access using the global offset table. */
- ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0, 0);
+ ELF_DYNAMIC_RELOCATE (&bootstrap_map, NULL, 0, 0, 0);
}
bootstrap_map.l_relocated = 1;
return;
}
- if (lav > LAV_CURRENT)
+ if (!_dl_audit_check_version (lav))
{
_dl_debug_printf ("\
ERROR: audit interface '%s' requires version %d (maximum supported version %d); ignored.\n",
"la_objsearch\0"
"la_objopen\0"
"la_preinit\0"
-#if __ELF_NATIVE_CLASS == 32
- "la_symbind32\0"
-#elif __ELF_NATIVE_CLASS == 64
- "la_symbind64\0"
-#else
-# error "__ELF_NATIVE_CLASS must be defined"
-#endif
+ LA_SYMBIND "\0"
#define STRING(s) __STRING (s)
"la_" STRING (ARCH_LA_PLTENTER) "\0"
"la_" STRING (ARCH_LA_PLTEXIT) "\0"
dlmargs.map->l_auditing = 1;
}
-/* Notify the the audit modules that the object MAP has already been
- loaded. */
-static void
-notify_audit_modules_of_loaded_object (struct link_map *map)
-{
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->objopen != NULL)
- {
- struct auditstate *state = link_map_audit_state (map, cnt);
- state->bindflags = afct->objopen (map, LM_ID_BASE, &state->cookie);
- map->l_audit_any_plt |= state->bindflags != 0;
- }
-
- afct = afct->next;
- }
-}
-
/* Load all audit modules. */
static void
load_audit_modules (struct link_map *main_map, struct audit_list *audit_list)
program and the dynamic linker itself). */
if (GLRO(dl_naudit) > 0)
{
- notify_audit_modules_of_loaded_object (main_map);
- notify_audit_modules_of_loaded_object (&GL(dl_rtld_map));
+ _dl_audit_objopen (main_map, LM_ID_BASE);
+ _dl_audit_objopen (&GL(dl_rtld_map), LM_ID_BASE);
+ }
+}
+
+/* Adjusts the contents of the stack and related globals for the user
+ entry point. The ld.so processed skip_args arguments and bumped
+ _dl_argv and _dl_argc accordingly. Those arguments are removed from
+ argv here. */
+static void
+_dl_start_args_adjust (int skip_args)
+{
+ void **sp = (void **) (_dl_argv - skip_args - 1);
+ void **p = sp + skip_args;
+
+ if (skip_args == 0)
+ return;
+
+ /* Sanity check. */
+ intptr_t argc = (intptr_t) sp[0] - skip_args;
+ assert (argc == _dl_argc);
+
+ /* Adjust argc on stack. */
+ sp[0] = (void *) (intptr_t) _dl_argc;
+
+ /* Update globals in rtld. */
+ _dl_argv -= skip_args;
+ _environ -= skip_args;
+
+ /* Shuffle argv down. */
+ do
+ *++sp = *++p;
+ while (*p != NULL);
+
+ assert (_environ == (char **) (sp + 1));
+
+ /* Shuffle envp down. */
+ do
+ *++sp = *++p;
+ while (*p != NULL);
+
+#ifdef HAVE_AUX_VECTOR
+ void **auxv = (void **) GLRO(dl_auxv) - skip_args;
+ GLRO(dl_auxv) = (ElfW(auxv_t) *) auxv; /* Aliasing violation. */
+ assert (auxv == sp + 1);
+
+ /* Shuffle auxv down. */
+ ElfW(auxv_t) ax;
+ char *oldp = (char *) (p + 1);
+ char *newp = (char *) (sp + 1);
+ do
+ {
+ memcpy (&ax, oldp, sizeof (ax));
+ memcpy (newp, &ax, sizeof (ax));
+ oldp += sizeof (ax);
+ newp += sizeof (ax);
}
+ while (ax.a_type != AT_NULL);
+#endif
}
static void
rtld_is_main = true;
char *argv0 = NULL;
+ char **orig_argv = _dl_argv;
/* Note the place where the dynamic linker actually came from. */
GL(dl_rtld_map).l_name = rtld_progname;
GLRO(dl_lazy) = -1;
}
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
}
if (state.mode != rtld_mode_help)
state.mode = rtld_mode_verify;
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
}
else if (! strcmp (_dl_argv[1], "--inhibit-cache"))
{
GLRO(dl_inhibit_cache) = 1;
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
}
state.library_path = _dl_argv[2];
state.library_path_source = "--library-path";
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
{
GLRO(dl_inhibit_rpath) = _dl_argv[2];
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
{
audit_list_add_string (&state.audit_list, _dl_argv[2]);
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
else if (! strcmp (_dl_argv[1], "--preload") && _dl_argc > 2)
{
state.preloadarg = _dl_argv[2];
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
{
argv0 = _dl_argv[2];
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
&& _dl_argc > 2)
{
state.glibc_hwcaps_prepend = _dl_argv[2];
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
&& _dl_argc > 2)
{
state.glibc_hwcaps_mask = _dl_argv[2];
- _dl_skip_args += 2;
_dl_argc -= 2;
_dl_argv += 2;
}
{
state.mode = rtld_mode_list_tunables;
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
}
{
state.mode = rtld_mode_list_diagnostics;
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
}
_dl_usage (ld_so_name, NULL);
}
- ++_dl_skip_args;
--_dl_argc;
++_dl_argv;
/* Set the argv[0] string now that we've processed the executable. */
if (argv0 != NULL)
_dl_argv[0] = argv0;
+
+ /* Adjust arguments for the application entry point. */
+ _dl_start_args_adjust (_dl_argv - orig_argv);
}
else
{
/* This tells us where to find the dynamic section,
which tells us everything we need to do. */
main_map->l_ld = (void *) main_map->l_addr + ph->p_vaddr;
+ main_map->l_ld_readonly = (ph->p_flags & PF_W) == 0;
break;
case PT_INTERP:
/* This "interpreter segment" was used by the program loader to
if (! rtld_is_main)
{
/* Extract the contents of the dynamic section for easy access. */
- elf_get_dynamic_info (main_map, NULL);
+ elf_get_dynamic_info (main_map, false, false);
/* If the main map is libc.so, update the base namespace to
refer to this map. If libc.so is loaded later, this happens
/* Auditing checkpoint: we are ready to signal that the initial map
is being constructed. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0))
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- afct->activity (&link_map_audit_state (main_map, cnt)->cookie,
- LA_ACT_ADD);
-
- afct = afct->next;
- }
- }
+ _dl_audit_activity_map (main_map, LA_ACT_ADD);
/* We have two ways to specify objects to preload: via environment
variable and via the file /etc/ld.so.preload. The latter can also
assert (i == npreloads);
}
+#ifdef NEED_DL_SYSINFO_DSO
+ /* Now that the audit modules are opened, call la_objopen for the vDSO. */
+ if (GLRO(dl_sysinfo_map) != NULL)
+ _dl_audit_objopen (GLRO(dl_sysinfo_map), LM_ID_BASE);
+#endif
+
/* Load all the libraries specified by DT_NEEDED entries. If LD_PRELOAD
specified some libraries to load, these are inserted before the actual
dependencies in the executable's searchlist for symbol resolution. */
into the main thread's TLS area, which we allocated above.
Note: thread-local variables must only be accessed after completing
the next step. */
- _dl_allocate_tls_init (tcbp);
+ _dl_allocate_tls_init (tcbp, false);
/* And finally install it for the main thread. */
if (! tls_init_tp_called)
#ifdef SHARED
/* Auditing checkpoint: we have added all objects. */
- if (__glibc_unlikely (GLRO(dl_naudit) > 0))
- {
- struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
- /* Do not call the functions for any auditing object. */
- if (head->l_auditing == 0)
- {
- struct audit_ifaces *afct = GLRO(dl_audit);
- for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
- {
- if (afct->activity != NULL)
- afct->activity (&link_map_audit_state (head, cnt)->cookie,
- LA_ACT_CONSISTENT);
-
- afct = afct->next;
- }
- }
- }
+ _dl_audit_activity_nsid (LM_ID_BASE, LA_ACT_CONSISTENT);
#endif
/* Notify the debugger all new objects are now ready to go. We must re-get
We just want our data structures to describe it as if we had just
mapped and relocated it normally. */
struct link_map *l = _dl_new_object ((char *) "", "", lt_library, NULL,
- 0, LM_ID_BASE);
+ __RTLD_VDSO, LM_ID_BASE);
if (__glibc_likely (l != NULL))
{
- static ElfW(Dyn) dyn_temp[DL_RO_DYN_TEMP_CNT] attribute_relro;
-
l->l_phdr = ((const void *) GLRO(dl_sysinfo_dso)
+ GLRO(dl_sysinfo_dso)->e_phoff);
l->l_phnum = GLRO(dl_sysinfo_dso)->e_phnum;
{
l->l_ld = (void *) ph->p_vaddr;
l->l_ldnum = ph->p_memsz / sizeof (ElfW(Dyn));
+ l->l_ld_readonly = (ph->p_flags & PF_W) == 0;
}
else if (ph->p_type == PT_LOAD)
{
l->l_map_end += l->l_addr;
l->l_text_end += l->l_addr;
l->l_ld = (void *) ((ElfW(Addr)) l->l_ld + l->l_addr);
- elf_get_dynamic_info (l, dyn_temp);
+ elf_get_dynamic_info (l, false, false);
_dl_setup_hash (l);
l->l_relocated = 1;
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <err.h>
#include <error.h>
#include <fcntl.h>
#include <stdio.h>
la_symbind (Elf_Sym *sym, unsigned int ndx, uintptr_t *refcook,
uintptr_t *defcook, unsigned int *flags, const char *symname)
{
+ if (*flags & LA_SYMB_NOPLTENTER)
+ warnx ("cannot trace PLT enter (bind-now enabled)");
+
+ if (do_exit && *flags & LA_SYMB_NOPLTEXIT)
+ warnx ("cannot trace PLT exit (bind-now enabled)");
+
if (!do_exit)
*flags = LA_SYMB_NOPLTEXIT;
--- /dev/null
+/* DT_AUDIT with modules with TLSDESC.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xdlfcn.h>
+
+static void *
+thr_func (void *mod)
+{
+ int* (*get_global1)(void) = xdlsym (mod, "get_global1");
+ int* (*get_global2)(void) = xdlsym (mod, "get_global2");
+ void (*set_global2)(int) = xdlsym (mod, "set_global2");
+ int* (*get_local1)(void) = xdlsym (mod, "get_local1");
+ int* (*get_local2)(void) = xdlsym (mod, "get_local2");
+
+ int *global1 = get_global1 ();
+ TEST_COMPARE (*global1, 0);
+ ++*global1;
+
+ int *global2 = get_global2 ();
+ TEST_COMPARE (*global2, 0);
+ ++*global2;
+ TEST_COMPARE (*global2, 1);
+
+ set_global2 (10);
+ TEST_COMPARE (*global2, 10);
+
+ int *local1 = get_local1 ();
+ TEST_COMPARE (*local1, 0);
+ ++*local1;
+
+ int *local2 = get_local2 ();
+ TEST_COMPARE (*local2, 0);
+ ++*local2;
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ void *mod = xdlopen ("tst-audit-tlsdesc-mod1.so", RTLD_LAZY);
+
+ pthread_t thr = xpthread_create (NULL, thr_func, mod);
+ void *r = xpthread_join (thr);
+ TEST_VERIFY (r == NULL);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* DT_AUDIT with modules with TLSDESC.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+__thread int global1;
+
+int *
+get_global1 (void)
+{
+ return &global1;
+}
+
+static __thread int local1;
+
+void *
+get_local1 (void)
+{
+ return &local1;
+}
+
+extern __thread int global2;
+
+void
+set_global2 (int v)
+{
+ global2 = v;
+}
--- /dev/null
+/* DT_AUDIT with modules with TLSDESC.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+__thread int global2;
+
+int *
+get_global2 (void)
+{
+ return &global2;
+}
+
+static __thread int local2;
+
+void *
+get_local2 (void)
+{
+ return &local2;
+}
--- /dev/null
+/* DT_AUDIT with modules with TLSDESC.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/xthread.h>
+
+extern __thread int global1;
+extern __thread int global2;
+void *get_local1 (void);
+void set_global2 (int v);
+void *get_local2 (void);
+
+static void *
+thr_func (void *clousure)
+{
+ TEST_COMPARE (global1, 0);
+ ++global1;
+ TEST_COMPARE (global2, 0);
+ ++global2;
+ TEST_COMPARE (global2, 1);
+
+ set_global2 (10);
+ TEST_COMPARE (global2, 10);
+
+ int *local1 = get_local1 ();
+ TEST_COMPARE (*local1, 0);
+ ++*local1;
+
+ int *local2 = get_local2 ();
+ TEST_COMPARE (*local2, 0);
+ ++*local2;
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ pthread_t thr = xpthread_create (NULL, thr_func, NULL);
+ void *r = xpthread_join (thr);
+ TEST_VERIFY (r == NULL);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check DT_AUDIT with dlmopen.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <getopt.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <gnu/lib-names.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xdlfcn.h>
+#include <support/xstdio.h>
+#include <support/support.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+static int
+handle_restart (void)
+{
+ {
+ void *h = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW);
+
+ pid_t (*s) (void) = xdlsym (h, "getpid");
+ TEST_COMPARE (s (), getpid ());
+
+ xdlclose (h);
+ }
+
+ {
+ void *h = xdlmopen (LM_ID_NEWLM, "tst-audit18mod.so", RTLD_NOW);
+
+ int (*foo) (void) = xdlsym (h, "foo");
+ TEST_COMPARE (foo (), 10);
+
+ xdlclose (h);
+ }
+
+ return 0;
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ /* We must have either:
+ - One our fource parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+
+ if (restart)
+ return handle_restart ();
+
+ char *spargv[9];
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+
+ setenv ("LD_AUDIT", "tst-auditmod18.so", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit18", 0, sc_allow_stderr);
+
+ struct
+ {
+ const char *name;
+ bool found;
+ } audit_iface[] =
+ {
+ { "la_version", false },
+ { "la_objsearch", false },
+ { "la_activity", false },
+ { "la_objopen", false },
+ { "la_objclose", false },
+ { "la_preinit", false },
+#if __WORDSIZE == 32
+ { "la_symbind32", false },
+#elif __WORDSIZE == 64
+ { "la_symbind64", false },
+#endif
+ };
+
+ /* Some hooks are called more than once but the test only check if any
+ is called at least once. */
+ FILE *out = fmemopen (result.err.buffer, result.err.length, "r");
+ TEST_VERIFY (out != NULL);
+ char *buffer = NULL;
+ size_t buffer_length = 0;
+ while (xgetline (&buffer, &buffer_length, out))
+ {
+ for (int i = 0; i < array_length (audit_iface); i++)
+ if (strncmp (buffer, audit_iface[i].name,
+ strlen (audit_iface[i].name)) == 0)
+ audit_iface[i].found = true;
+ }
+ free (buffer);
+ xfclose (out);
+
+ for (int i = 0; i < array_length (audit_iface); i++)
+ TEST_COMPARE (audit_iface[i].found, true);
+
+ support_capture_subprocess_free (&result);
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Check DT_AUDIT with dlmopen.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int
+foo (void)
+{
+ return 10;
+}
--- /dev/null
+/* Check if DT_AUDIT a module without la_plt{enter,exit} symbols does not incur
+ in profiling (BZ#15533).
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <support/xdlfcn.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ void *h = xdlopen ("tst-auditmod19a.so", RTLD_NOW);
+
+ struct link_map *lmap;
+ TEST_VERIFY_EXIT (dlinfo (h, RTLD_DI_LINKMAP, &lmap) == 0);
+
+ /* The internal array is only allocated if profiling is enabled. */
+ TEST_VERIFY (lmap->l_reloc_result == NULL);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check if DT_AUDIT a module with la_plt{enter,exit} call la_symbind
+ for lazy resolution.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <getopt.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+int tst_audit18bmod1_func (void);
+
+static int
+handle_restart (void)
+{
+ TEST_COMPARE (tst_audit18bmod1_func (), 10);
+ return 0;
+}
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0;
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ /* We must have either:
+ - One our fource parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+
+ if (restart)
+ return handle_restart ();
+
+ char *spargv[9];
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+
+ setenv ("LD_AUDIT", "tst-auditmod18b.so", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit18b", 0, sc_allow_stderr);
+
+ bool find_symbind = false;
+
+ FILE *out = fmemopen (result.err.buffer, result.err.length, "r");
+ TEST_VERIFY (out != NULL);
+ char *buffer = NULL;
+ size_t buffer_length = 0;
+ while (xgetline (&buffer, &buffer_length, out))
+ if (startswith (buffer, "la_symbind: tst_audit18bmod1_func") == 0)
+ find_symbind = true;
+
+ TEST_COMPARE (find_symbind, true);
+
+ free (buffer);
+ xfclose (out);
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Extra module for tst-audit18b.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int
+tst_audit18bmod1_func (void)
+{
+ return 10;
+}
--- /dev/null
+/* Check dlopen failure on audit modules.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+static int
+do_test (void)
+{
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check LD_AUDIT with static TLS.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ctype.h>
+#include <support/xthread.h>
+#include <support/check.h>
+
+static volatile __thread int out __attribute__ ((tls_model ("initial-exec")));
+
+static void *
+tf (void *arg)
+{
+ TEST_COMPARE (out, 0);
+ out = isspace (' ');
+ return NULL;
+}
+
+int main (int argc, char *argv[])
+{
+ TEST_COMPARE (out, 0);
+ out = isspace (' ');
+
+ pthread_t t = xpthread_create (NULL, tf, NULL);
+ xpthread_join (t);
+
+ return 0;
+}
--- /dev/null
+/* Check DTAUDIT and vDSO interaction.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <support/support.h>
+#include <sys/auxv.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+static uintptr_t vdso_addr;
+
+static int
+handle_restart (void)
+{
+ fprintf (stderr, "vdso: %p\n", (void*) vdso_addr);
+ return 0;
+}
+
+static uintptr_t
+parse_address (const char *str)
+{
+ void *r;
+ TEST_COMPARE (sscanf (str, "%p\n", &r), 1);
+ return (uintptr_t) r;
+}
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0;
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ vdso_addr = getauxval (AT_SYSINFO_EHDR);
+ if (vdso_addr == 0)
+ FAIL_UNSUPPORTED ("getauxval (AT_SYSINFO_EHDR) returned 0");
+
+ /* We must have either:
+ - One our fource parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+ if (restart)
+ return handle_restart ();
+
+ char *spargv[9];
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+
+ setenv ("LD_AUDIT", "tst-auditmod22.so", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr);
+
+ /* The respawned process should always print the vDSO address (otherwise it
+ will fails as unsupported). However, on some architectures the audit
+ module might see the vDSO with l_addr being 0, meaning a fixed mapping
+ (linux-gate.so). In this case we don't check its value against
+ AT_SYSINFO_EHDR one. */
+ uintptr_t vdso_process = 0;
+ bool vdso_audit_found = false;
+ uintptr_t vdso_audit = 0;
+
+ FILE *out = fmemopen (result.err.buffer, result.err.length, "r");
+ TEST_VERIFY (out != NULL);
+ char *buffer = NULL;
+ size_t buffer_length = 0;
+ while (xgetline (&buffer, &buffer_length, out))
+ {
+ if (startswith (buffer, "vdso: "))
+ vdso_process = parse_address (buffer + strlen ("vdso: "));
+ else if (startswith (buffer, "vdso found: "))
+ {
+ vdso_audit = parse_address (buffer + strlen ("vdso found: "));
+ vdso_audit_found = true;
+ }
+ }
+
+ TEST_COMPARE (vdso_audit_found, true);
+ if (vdso_audit != 0)
+ TEST_COMPARE (vdso_process, vdso_audit);
+
+ free (buffer);
+ xfclose (out);
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Check for expected la_objopen and la_objeclose for all objects.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <errno.h>
+#include <getopt.h>
+#include <link.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <gnu/lib-names.h>
+#include <string.h>
+#include <stdlib.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <support/xdlfcn.h>
+#include <support/support.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+static int
+handle_restart (void)
+{
+ xdlopen ("tst-audit23mod.so", RTLD_NOW);
+ xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW);
+
+ return 0;
+}
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0;
+}
+
+static inline bool
+is_vdso (const char *str)
+{
+ return startswith (str, "linux-gate")
+ || startswith (str, "linux-vdso");
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ /* We must have either:
+ - One or four parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+ if (restart)
+ return handle_restart ();
+
+ char *spargv[9];
+ TEST_VERIFY_EXIT (((argc - 1) + 3) < array_length (spargv));
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+
+ setenv ("LD_AUDIT", "tst-auditmod23.so", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit22", 0, sc_allow_stderr);
+
+ /* The expected la_objopen/la_objclose:
+ 1. executable
+ 2. loader
+ 3. libc.so
+ 4. tst-audit23mod.so
+ 5. libc.so (LM_ID_NEWLM).
+ 6. vdso (optional and ignored). */
+ enum { max_objs = 6 };
+ struct la_obj_t
+ {
+ char *lname;
+ uintptr_t laddr;
+ Lmid_t lmid;
+ bool closed;
+ } objs[max_objs] = { [0 ... max_objs-1] = { .closed = false } };
+ size_t nobjs = 0;
+
+ /* The expected namespaces are one for the audit module, one for the
+ application, and another for the dlmopen on handle_restart. */
+ enum { max_ns = 3 };
+ uintptr_t acts[max_ns] = { 0 };
+ size_t nacts = 0;
+ int last_act = -1;
+ uintptr_t last_act_cookie = -1;
+ bool seen_first_objclose = false;
+
+ FILE *out = fmemopen (result.err.buffer, result.err.length, "r");
+ TEST_VERIFY (out != NULL);
+ char *buffer = NULL;
+ size_t buffer_length = 0;
+ while (xgetline (&buffer, &buffer_length, out))
+ {
+ if (startswith (buffer, "la_activity: "))
+ {
+ uintptr_t cookie;
+ int this_act;
+ int r = sscanf (buffer, "la_activity: %d %"SCNxPTR"", &this_act,
+ &cookie);
+ TEST_COMPARE (r, 2);
+
+ /* The cookie identifies the object at the head of the link map,
+ so we only add a new namespace if it changes from the previous
+ one. This works since dlmopen is the last in the test body. */
+ if (cookie != last_act_cookie && last_act_cookie != -1)
+ TEST_COMPARE (last_act, LA_ACT_CONSISTENT);
+
+ if (this_act == LA_ACT_ADD && acts[nacts] != cookie)
+ {
+ acts[nacts++] = cookie;
+ last_act_cookie = cookie;
+ }
+ /* The LA_ACT_DELETE is called in the reverse order of LA_ACT_ADD
+ at program termination (if the tests adds a dlclose or a library
+ with extra dependencies this will need to be adapted). */
+ else if (this_act == LA_ACT_DELETE)
+ {
+ last_act_cookie = acts[--nacts];
+ TEST_COMPARE (acts[nacts], cookie);
+ acts[nacts] = 0;
+ }
+ else if (this_act == LA_ACT_CONSISTENT)
+ {
+ TEST_COMPARE (cookie, last_act_cookie);
+
+ /* LA_ACT_DELETE must always be followed by an la_objclose. */
+ if (last_act == LA_ACT_DELETE)
+ TEST_COMPARE (seen_first_objclose, true);
+ else
+ TEST_COMPARE (last_act, LA_ACT_ADD);
+ }
+
+ last_act = this_act;
+ seen_first_objclose = false;
+ }
+ else if (startswith (buffer, "la_objopen: "))
+ {
+ char *lname;
+ uintptr_t laddr;
+ Lmid_t lmid;
+ uintptr_t cookie;
+ int r = sscanf (buffer, "la_objopen: %"SCNxPTR" %ms %"SCNxPTR" %ld",
+ &cookie, &lname, &laddr, &lmid);
+ TEST_COMPARE (r, 4);
+
+ /* la_objclose is not triggered by vDSO because glibc does not
+ unload it. */
+ if (is_vdso (lname))
+ continue;
+ if (nobjs == max_objs)
+ FAIL_EXIT1 ("non expected la_objopen: %s %"PRIxPTR" %ld",
+ lname, laddr, lmid);
+ objs[nobjs].lname = lname;
+ objs[nobjs].laddr = laddr;
+ objs[nobjs].lmid = lmid;
+ objs[nobjs].closed = false;
+ nobjs++;
+
+ /* This indirectly checks that la_objopen always comes before
+ la_objclose btween la_activity calls. */
+ seen_first_objclose = false;
+ }
+ else if (startswith (buffer, "la_objclose: "))
+ {
+ char *lname;
+ uintptr_t laddr;
+ Lmid_t lmid;
+ uintptr_t cookie;
+ int r = sscanf (buffer, "la_objclose: %"SCNxPTR" %ms %"SCNxPTR" %ld",
+ &cookie, &lname, &laddr, &lmid);
+ TEST_COMPARE (r, 4);
+
+ for (size_t i = 0; i < nobjs; i++)
+ {
+ if (strcmp (lname, objs[i].lname) == 0 && lmid == objs[i].lmid)
+ {
+ TEST_COMPARE (objs[i].closed, false);
+ objs[i].closed = true;
+ break;
+ }
+ }
+
+ /* la_objclose should be called after la_activity(LA_ACT_DELETE) for
+ the closed object's namespace. */
+ TEST_COMPARE (last_act, LA_ACT_DELETE);
+ if (!seen_first_objclose)
+ {
+ TEST_COMPARE (last_act_cookie, cookie);
+ seen_first_objclose = true;
+ }
+ }
+ }
+
+ for (size_t i = 0; i < nobjs; i++)
+ {
+ TEST_COMPARE (objs[i].closed, true);
+ free (objs[i].lname);
+ }
+
+ /* la_activity(LA_ACT_CONSISTENT) should be the last callback received.
+ Since only one link map may be not-CONSISTENT at a time, this also
+ ensures la_activity(LA_ACT_CONSISTENT) is the last callback received
+ for every namespace. */
+ TEST_COMPARE (last_act, LA_ACT_CONSISTENT);
+
+ free (buffer);
+ xfclose (out);
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Extra module for tst-audit23
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int
+foo (void)
+{
+ return 0;
+}
--- /dev/null
+/* LD_AUDIT test for la_symbind and bind-now.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/support.h>
+
+int tst_audit24amod1_func1 (void);
+int tst_audit24amod1_func2 (void);
+int tst_audit24amod2_func1 (void);
+
+int
+do_test (void)
+{
+ TEST_COMPARE (tst_audit24amod1_func1 (), 1);
+ TEST_COMPARE (tst_audit24amod1_func2 (), 2);
+ TEST_COMPARE (tst_audit24amod2_func1 (), 10);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Module used by tst-audit24a.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+_Noreturn int
+tst_audit24amod1_func1 (void)
+{
+ abort ();
+}
+
+int
+tst_audit24amod1_func2 (void)
+{
+ return 2;
+}
--- /dev/null
+/* Module used by tst-audit24a.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+_Noreturn int
+tst_audit24amod2_func1 (void)
+{
+ abort ();
+}
--- /dev/null
+/* LD_AUDIT test for la_symbind and bind-now.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This is similar to tst-audit24a, with the difference this modules
+ does not have the .gnu.version section header. */
+
+#include <support/check.h>
+#include <support/support.h>
+
+int tst_audit24bmod1_func1 (void);
+int tst_audit24bmod1_func2 (void);
+
+int
+do_test (void)
+{
+ TEST_COMPARE (tst_audit24bmod1_func1 (), 1);
+ TEST_COMPARE (tst_audit24bmod1_func2 (), 2);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Module used by tst-audit24c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int tst_audit24bmod2_func1 (void);
+
+int
+tst_audit24bmod1_func1 (void)
+{
+ return -1;
+}
+
+int
+tst_audit24bmod1_func2 (void)
+{
+ return tst_audit24bmod2_func1 ();
+}
--- /dev/null
+/* Module used by tst-audit24b.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int
+tst_audit24bmod2_func1 (void)
+{
+ return -1;
+}
--- /dev/null
+/* It tests LD_BIND_NOW=1 instead of linking with -Wl,-z,now */
+#include "tst-audit24a.c"
--- /dev/null
+/* LD_AUDIT test for la_symbind and bind-now.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/support.h>
+
+int tst_audit24dmod1_func1 (void);
+int tst_audit24dmod1_func2 (void);
+int tst_audit24dmod2_func1 (void);
+
+int
+do_test (void)
+{
+ TEST_COMPARE (tst_audit24dmod1_func1 (), 1);
+ TEST_COMPARE (tst_audit24dmod1_func2 (), 32);
+ TEST_COMPARE (tst_audit24dmod2_func1 (), 10);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Module used by tst-audit24d.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+int tst_audit24dmod3_func1 (void);
+
+_Noreturn int
+tst_audit24dmod1_func1 (void)
+{
+ abort ();
+}
+
+int
+tst_audit24dmod1_func2 (void)
+{
+ return 2 + tst_audit24dmod3_func1 ();;
+}
--- /dev/null
+/* Module for tst-audit24d.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+int tst_audit24dmod4_func1 (void);
+
+_Noreturn int
+tst_audit24dmod2_func1 (void)
+{
+ tst_audit24dmod4_func1 ();
+ abort ();
+}
--- /dev/null
+/* Module for tst-audit24d.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+_Noreturn int
+tst_audit24dmod3_func1 (void)
+{
+ abort ();
+}
+
+int
+tst_audit24dmod3_func2 (void)
+{
+ return 4;
+}
--- /dev/null
+/* Module for tst-audit24d.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+
+_Noreturn int
+tst_audit24dmod4_func1 (void)
+{
+ abort ();
+}
--- /dev/null
+/* Check LD_AUDIT and LD_BIND_NOW.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <support/support.h>
+#include <sys/auxv.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+void tst_audit25mod1_func1 (void);
+void tst_audit25mod1_func2 (void);
+void tst_audit25mod2_func1 (void);
+void tst_audit25mod2_func2 (void);
+
+static int
+handle_restart (void)
+{
+ tst_audit25mod1_func1 ();
+ tst_audit25mod1_func2 ();
+ tst_audit25mod2_func1 ();
+ tst_audit25mod2_func2 ();
+
+ return 0;
+}
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0;
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ /* We must have either:
+ - One or four parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+
+ if (restart)
+ return handle_restart ();
+
+ setenv ("LD_AUDIT", "tst-auditmod25.so", 0);
+
+ char *spargv[9];
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+ TEST_VERIFY_EXIT (i < array_length (spargv));
+
+ {
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit25a", 0,
+ sc_allow_stderr);
+
+ /* tst-audit25a is build with -Wl,-z,lazy and tst-audit25mod1 with
+ -Wl,-z,now; so only tst_audit25mod3_func1 should be expected to
+ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */
+ TEST_COMPARE_STRING (result.err.buffer,
+ "la_symbind: tst_audit25mod3_func1 1\n"
+ "la_symbind: tst_audit25mod1_func1 0\n"
+ "la_symbind: tst_audit25mod1_func2 0\n"
+ "la_symbind: tst_audit25mod2_func1 0\n"
+ "la_symbind: tst_audit25mod4_func1 0\n"
+ "la_symbind: tst_audit25mod2_func2 0\n");
+
+ support_capture_subprocess_free (&result);
+ }
+
+ {
+ setenv ("LD_BIND_NOW", "1", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit25a", 0,
+ sc_allow_stderr);
+
+ /* With LD_BIND_NOW all symbols are expected to have
+ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution
+ order is done in breadth-first order. */
+ TEST_COMPARE_STRING (result.err.buffer,
+ "la_symbind: tst_audit25mod4_func1 1\n"
+ "la_symbind: tst_audit25mod3_func1 1\n"
+ "la_symbind: tst_audit25mod1_func1 1\n"
+ "la_symbind: tst_audit25mod2_func1 1\n"
+ "la_symbind: tst_audit25mod1_func2 1\n"
+ "la_symbind: tst_audit25mod2_func2 1\n");
+
+ support_capture_subprocess_free (&result);
+ }
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Check LD_AUDIT and LD_BIND_NOW.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include <support/capture_subprocess.h>
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <support/support.h>
+#include <sys/auxv.h>
+
+static int restart;
+#define CMDLINE_OPTIONS \
+ { "restart", no_argument, &restart, 1 },
+
+void tst_audit25mod1_func1 (void);
+void tst_audit25mod1_func2 (void);
+void tst_audit25mod2_func1 (void);
+void tst_audit25mod2_func2 (void);
+
+static int
+handle_restart (void)
+{
+ tst_audit25mod1_func1 ();
+ tst_audit25mod1_func2 ();
+ tst_audit25mod2_func1 ();
+ tst_audit25mod2_func2 ();
+
+ return 0;
+}
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr >= lenpre && memcmp (pre, str, lenpre) == 0;
+}
+
+static int
+do_test (int argc, char *argv[])
+{
+ /* We must have either:
+ - One or four parameters left if called initially:
+ + path to ld.so optional
+ + "--library-path" optional
+ + the library path optional
+ + the application name */
+
+ if (restart)
+ return handle_restart ();
+
+ setenv ("LD_AUDIT", "tst-auditmod25.so", 0);
+
+ char *spargv[9];
+ int i = 0;
+ for (; i < argc - 1; i++)
+ spargv[i] = argv[i + 1];
+ spargv[i++] = (char *) "--direct";
+ spargv[i++] = (char *) "--restart";
+ spargv[i] = NULL;
+
+ {
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit25a", 0,
+ sc_allow_stderr);
+
+ /* tst-audit25a and tst-audit25mod1 are built with -Wl,-z,now, but
+ tst-audit25mod2 is built with -Wl,-z,lazy. So only
+ tst_audit25mod4_func1 (called by tst_audit25mod2_func1) should not
+ have LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. */
+ TEST_COMPARE_STRING (result.err.buffer,
+ "la_symbind: tst_audit25mod3_func1 1\n"
+ "la_symbind: tst_audit25mod1_func1 1\n"
+ "la_symbind: tst_audit25mod2_func1 1\n"
+ "la_symbind: tst_audit25mod1_func2 1\n"
+ "la_symbind: tst_audit25mod2_func2 1\n"
+ "la_symbind: tst_audit25mod4_func1 0\n");
+
+ support_capture_subprocess_free (&result);
+ }
+
+ {
+ setenv ("LD_BIND_NOW", "1", 0);
+ struct support_capture_subprocess result
+ = support_capture_subprogram (spargv[0], spargv);
+ support_capture_subprocess_check (&result, "tst-audit25a", 0,
+ sc_allow_stderr);
+
+ /* With LD_BIND_NOW all symbols are expected to have
+ LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT. Also the resolution
+ order is done in breadth-first order. */
+ TEST_COMPARE_STRING (result.err.buffer,
+ "la_symbind: tst_audit25mod4_func1 1\n"
+ "la_symbind: tst_audit25mod3_func1 1\n"
+ "la_symbind: tst_audit25mod1_func1 1\n"
+ "la_symbind: tst_audit25mod2_func1 1\n"
+ "la_symbind: tst_audit25mod1_func2 1\n"
+ "la_symbind: tst_audit25mod2_func2 1\n");
+
+ support_capture_subprocess_free (&result);
+ }
+
+ return 0;
+}
+
+#define TEST_FUNCTION_ARGV do_test
+#include <support/test-driver.c>
--- /dev/null
+/* Module used by tst-audit25.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+void tst_audit25mod3_func1 (void);
+
+void
+tst_audit25mod1_func1 (void)
+{
+ tst_audit25mod3_func1 ();
+}
+
+void
+tst_audit25mod1_func2 (void)
+{
+}
--- /dev/null
+/* Module used by tst-audit25.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+void tst_audit25mod4_func1 (void);
+
+void
+tst_audit25mod2_func1 (void)
+{
+ tst_audit25mod4_func1 ();
+}
+
+void
+tst_audit25mod2_func2 (void)
+{
+}
--- /dev/null
+/* Module used by tst-audit25.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+void
+tst_audit25mod3_func1 (void)
+{
+}
--- /dev/null
+/* Module used by tst-audit25.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+void
+tst_audit25mod4_func1 (void)
+{
+}
--- /dev/null
+/* Check the usability of <dlfcn.h> functions in audit modules.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gnu/lib-names.h>
+
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+static int
+do_test (void)
+{
+ /* Check that the audit module has been loaded. */
+ void *handle = xdlopen ("mapped to libc", RTLD_LOCAL | RTLD_NOW);
+ TEST_VERIFY (handle
+ == xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD));
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* DT_AUDIT with modules with TLSDESC.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
--- /dev/null
+/* Check DT_AUDIT with dlmopen.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <link.h>
+
+unsigned int
+la_version (unsigned int version)
+{
+ fprintf (stderr, "%s\n", __func__);
+ return LAV_CURRENT;
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ fprintf (stderr, "%s\n", __func__);
+ return (char *) name;
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ fprintf (stderr, "%s\n", __func__);
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ fprintf (stderr, "%s\n", __func__);
+ return LA_FLG_BINDTO | LA_FLG_BINDFROM;
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ fprintf (stderr, "%s\n", __func__);
+ return 0;
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ fprintf (stderr, "%s\n", __func__);
+}
+
+uintptr_t
+#if __ELF_NATIVE_CLASS == 32
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+#else
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+#endif
+{
+ fprintf (stderr, "%s\n", __func__);
+ return sym->st_value;
+}
--- /dev/null
+/* Audit module for tst-audit18a.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
--- /dev/null
+/* Audit module for tst-audit18b.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <string.h>
+#include <stdio.h>
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ return LA_FLG_BINDTO | LA_FLG_BINDFROM;
+}
+
+uintptr_t
+#if __ELF_NATIVE_CLASS == 32
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+#else
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, unsigned int *flags, const char *symname)
+#endif
+{
+ fprintf (stderr, "la_symbind: %s\n", symname);
+ return sym->st_value;
+}
--- /dev/null
+/* Check dlopen failure on audit modules.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <link.h>
+#include <stdlib.h>
+
+unsigned int
+la_version (unsigned int v)
+{
+ return LAV_CURRENT;
+}
+
+static void
+check (void)
+{
+ {
+ void *mod = dlopen ("nonexistent.so", RTLD_NOW);
+ if (mod != NULL)
+ abort ();
+ }
+
+ {
+ void *mod = dlmopen (LM_ID_BASE, "nonexistent.so", RTLD_NOW);
+ if (mod != NULL)
+ abort ();
+ }
+}
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ if (flag != LA_ACT_CONSISTENT)
+ return;
+ check ();
+}
+
+void
+la_preinit (uintptr_t *cookie)
+{
+ check ();
+}
--- /dev/null
+/* Check LD_AUDIT with static TLS.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <link.h>
+
+#define tls_ie __attribute__ ((tls_model ("initial-exec")))
+
+__thread int tls_var0 tls_ie;
+__thread int tls_var1 tls_ie = 0x10;
+
+/* Defined at tst-auditmod21b.so */
+extern __thread int tls_var2;
+extern __thread int tls_var3;
+
+static volatile int out;
+
+static void
+call_libc (void)
+{
+ /* isspace accesses the initial-exec glibc TLS variables, which are
+ setup in glibc initialization. */
+ out = isspace (' ');
+}
+
+unsigned int
+la_version (unsigned int v)
+{
+ tls_var0 = 0x1;
+ if (tls_var1 != 0x10)
+ abort ();
+ tls_var1 = 0x20;
+
+ tls_var2 = 0x2;
+ if (tls_var3 != 0x20)
+ abort ();
+ tls_var3 = 0x40;
+
+ call_libc ();
+
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map* map, Lmid_t lmid, uintptr_t* cookie)
+{
+ call_libc ();
+ *cookie = (uintptr_t) map;
+ return 0;
+}
+
+void
+la_activity (uintptr_t* cookie, unsigned int flag)
+{
+ if (tls_var0 != 0x1 || tls_var1 != 0x20)
+ abort ();
+ call_libc ();
+}
+
+void
+la_preinit (uintptr_t* cookie)
+{
+ call_libc ();
+}
--- /dev/null
+/* Check LD_AUDIT with static TLS.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define tls_ie __attribute__ ((tls_model ("initial-exec")))
+
+__thread int tls_var2 tls_ie;
+__thread int tls_var3 tls_ie = 0x20;
--- /dev/null
+/* Check DTAUDIT and vDSO interaction.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+
+static inline bool
+startswith (const char *str, const char *pre)
+{
+ size_t lenpre = strlen (pre);
+ size_t lenstr = strlen (str);
+ return lenstr < lenpre ? false : memcmp (pre, str, lenpre) == 0;
+}
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ /* The linux-gate.so is placed at a fixed address, thus l_addr being 0,
+ and it might be the value reported as the AT_SYSINFO_EHDR. */
+ if (map->l_addr == 0 && startswith (map->l_name, "linux-gate.so"))
+ fprintf (stderr, "vdso found: %p\n", NULL);
+ else if (map->l_addr == getauxval (AT_SYSINFO_EHDR))
+ fprintf (stderr, "vdso found: %p\n", (void*) map->l_addr);
+
+ return 0;
+}
--- /dev/null
+/* Audit module loaded by tst-audit23.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/auxv.h>
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+struct map_desc_t
+{
+ char *lname;
+ uintptr_t laddr;
+ Lmid_t lmid;
+};
+
+void
+la_activity (uintptr_t *cookie, unsigned int flag)
+{
+ fprintf (stderr, "%s: %d %"PRIxPTR"\n", __func__, flag, (uintptr_t) cookie);
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *l_name = map->l_name[0] == '\0' ? "mainapp" : map->l_name;
+ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__,
+ (uintptr_t) cookie, l_name, map->l_addr, lmid);
+
+ struct map_desc_t *map_desc = malloc (sizeof (struct map_desc_t));
+ if (map_desc == NULL)
+ abort ();
+
+ map_desc->lname = strdup (l_name);
+ map_desc->laddr = map->l_addr;
+ map_desc->lmid = lmid;
+
+ *cookie = (uintptr_t) map_desc;
+
+ return 0;
+}
+
+unsigned int
+la_objclose (uintptr_t *cookie)
+{
+ struct map_desc_t *map_desc = (struct map_desc_t *) *cookie;
+ fprintf (stderr, "%s: %"PRIxPTR" %s %"PRIxPTR" %ld\n", __func__,
+ (uintptr_t) cookie, map_desc->lname, map_desc->laddr,
+ map_desc->lmid);
+
+ return 0;
+}
--- /dev/null
+/* Auxiliary functions for tst-audit24x.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _TST_AUDITMOD24_H
+#define _TST_AUDITMOD24_H
+
+static void
+test_symbind_flags (unsigned int flags)
+{
+ if ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)) == 0)
+ abort ();
+}
+
+#endif
--- /dev/null
+/* Audit modules for tst-audit24a.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tst-auditmod24.h>
+
+#define AUDIT24_COOKIE 0x1
+#define AUDIT24MOD1_COOKIE 0x2
+#define AUDIT24MOD2_COOKIE 0x3
+
+#ifndef TEST_NAME
+# define TEST_NAME "tst-audit24a"
+#endif
+#ifndef TEST_MOD
+# define TEST_MOD TEST_NAME
+#endif
+#ifndef TEST_FUNC
+# define TEST_FUNC "tst_audit24a"
+#endif
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? TEST_NAME : p + 1;
+
+ uintptr_t ck = -1;
+ if (strcmp (l_name, TEST_MOD "mod1.so") == 0)
+ ck = AUDIT24MOD1_COOKIE;
+ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0)
+ ck = AUDIT24MOD2_COOKIE;
+ else if (strcmp (l_name, TEST_NAME) == 0)
+ ck = AUDIT24_COOKIE;
+
+ *cookie = ck;
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+static int
+tst_func1 (void)
+{
+ return 1;
+}
+
+static int
+tst_func2 (void)
+{
+ return 10;
+}
+
+#if __ELF_NATIVE_CLASS == 64
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#else
+uintptr_t
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#endif
+{
+ if (*refcook == AUDIT24_COOKIE)
+ {
+ if (*defcook == AUDIT24MOD1_COOKIE)
+ {
+ /* Check if bind-now symbols are advertised to not call the PLT
+ hooks. */
+ test_symbind_flags (*flags);
+
+ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0)
+ return (uintptr_t) tst_func1;
+ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0)
+ return sym->st_value;
+ abort ();
+ }
+ if (*defcook == AUDIT24MOD2_COOKIE
+ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0))
+ {
+ test_symbind_flags (*flags);
+
+ return (uintptr_t) tst_func2;
+ }
+
+ /* malloc functions. */
+ return sym->st_value;
+ }
+
+ if (symname[0] != '\0')
+ abort ();
+ return sym->st_value;
+}
--- /dev/null
+/* Audit modules for tst-audit24b.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tst-auditmod24.h>
+
+#define TEST_NAME "tst-audit24b"
+#define TEST_FUNC "tst_audit24b"
+
+#define AUDIT24_COOKIE 0x1
+#define AUDIT24MOD1_COOKIE 0x2
+#define AUDIT24MOD2_COOKIE 0x3
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? TEST_NAME : p + 1;
+
+ uintptr_t ck = -1;
+ if (strcmp (l_name, TEST_NAME "mod1.so") == 0)
+ ck = AUDIT24MOD1_COOKIE;
+ else if (strcmp (l_name, TEST_NAME "mod2.so") == 0)
+ ck = AUDIT24MOD2_COOKIE;
+ else if (strcmp (l_name, TEST_NAME) == 0)
+ ck = AUDIT24_COOKIE;
+
+ *cookie = ck;
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+static int
+tst_func1 (void)
+{
+ return 1;
+}
+
+static int
+tst_func2 (void)
+{
+ return 2;
+}
+
+#if __ELF_NATIVE_CLASS == 64
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#else
+uintptr_t
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#endif
+{
+ if (*refcook == AUDIT24_COOKIE)
+ {
+ if (*defcook == AUDIT24MOD1_COOKIE)
+ {
+ if (strcmp (symname, TEST_FUNC "mod1_func1") == 0)
+ return (uintptr_t) tst_func1;
+ else if (strcmp (symname, TEST_FUNC "mod1_func2") == 0)
+ return sym->st_value;
+ abort ();
+ }
+ /* malloc functions. */
+ return sym->st_value;
+ }
+ else if (*refcook == AUDIT24MOD1_COOKIE)
+ {
+ if (*defcook == AUDIT24MOD2_COOKIE
+ && (strcmp (symname, TEST_FUNC "mod2_func1") == 0))
+ {
+ test_symbind_flags (*flags);
+ return (uintptr_t) tst_func2;
+ }
+ }
+
+ abort ();
+}
--- /dev/null
+#define TEST_NAME "tst-audit24c"
+#define TEST_MOD "tst-audit24a"
+#include "tst-auditmod24a.c"
--- /dev/null
+/* Audit module for tst-audit24d.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <tst-auditmod24.h>
+
+#define AUDIT24_COOKIE 0x0
+#define AUDIT24MOD1_COOKIE 0x1
+#define AUDIT24MOD2_COOKIE 0x2
+#define AUDIT24MOD3_COOKIE 0x3
+#define AUDIT24MOD4_COOKIE 0x4
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? "tst-audit24d" : p + 1;
+
+ uintptr_t ck = -1;
+ if (strcmp (l_name, "tst-audit24dmod1.so") == 0)
+ ck = AUDIT24MOD1_COOKIE;
+ else if (strcmp (l_name, "tst-audit24dmod2.so") == 0)
+ ck = AUDIT24MOD2_COOKIE;
+ else if (strcmp (l_name, "tst-audit24dmod3.so") == 0)
+ ck = AUDIT24MOD3_COOKIE;
+ else if (strcmp (l_name, "tst-audit24dmod.so") == 0)
+ ck = AUDIT24MOD4_COOKIE;
+ else if (strcmp (l_name, "tst-audit24d") == 0)
+ ck = AUDIT24_COOKIE;
+
+ *cookie = ck;
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+static int
+tst_audit24dmod1_func1 (void)
+{
+ return 1;
+}
+
+static int
+tst_audit24dmod2_func1 (void)
+{
+ return 10;
+}
+
+static int
+tst_audit24dmod3_func1 (void)
+{
+ return 30;
+}
+
+#include <stdio.h>
+
+#if __ELF_NATIVE_CLASS == 64
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#else
+uintptr_t
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#endif
+{
+ if (*refcook == AUDIT24_COOKIE)
+ {
+ if (*defcook == AUDIT24MOD1_COOKIE)
+ {
+ if (strcmp (symname, "tst_audit24dmod1_func1") == 0)
+ return (uintptr_t) tst_audit24dmod1_func1;
+ else if (strcmp (symname, "tst_audit24dmod1_func2") == 0)
+ return sym->st_value;
+ abort ();
+ }
+ if (*defcook == AUDIT24MOD2_COOKIE
+ && (strcmp (symname, "tst_audit24dmod2_func1") == 0))
+ return (uintptr_t) tst_audit24dmod2_func1;
+
+ /* malloc functions. */
+ return sym->st_value;
+ }
+ else if (*refcook == AUDIT24MOD1_COOKIE)
+ {
+ if (*defcook == AUDIT24MOD3_COOKIE
+ && strcmp (symname, "tst_audit24dmod3_func1") == 0)
+ {
+ test_symbind_flags (*flags);
+
+ return (uintptr_t) tst_audit24dmod3_func1;
+ }
+ }
+
+ if (symname[0] != '\0')
+ abort ();
+ return sym->st_value;
+}
--- /dev/null
+/* Audit modules for tst-audit25a.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <link.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#define AUDIT25_COOKIE 0x1
+#define AUDIT25MOD1_COOKIE 0x2
+#define AUDIT25MOD2_COOKIE 0x3
+#define AUDIT25MOD3_COOKIE 0x2
+#define AUDIT25MOD4_COOKIE 0x3
+
+#define TEST_NAME "tst-audit25"
+#define TEST_MOD "tst-audit25"
+#define TEST_FUNC "tst_audit25"
+
+unsigned int
+la_version (unsigned int version)
+{
+ return LAV_CURRENT;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? TEST_NAME : p + 1;
+
+ uintptr_t ck = -1;
+ if (strcmp (l_name, TEST_MOD "mod1.so") == 0)
+ ck = AUDIT25MOD1_COOKIE;
+ else if (strcmp (l_name, TEST_MOD "mod2.so") == 0)
+ ck = AUDIT25MOD2_COOKIE;
+ else if (strcmp (l_name, TEST_MOD "mod3.so") == 0)
+ ck = AUDIT25MOD3_COOKIE;
+ else if (strcmp (l_name, TEST_MOD "mod4.so") == 0)
+ ck = AUDIT25MOD4_COOKIE;
+ else if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+ ck = AUDIT25_COOKIE;
+
+ *cookie = ck;
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+#if __ELF_NATIVE_CLASS == 64
+uintptr_t
+la_symbind64 (Elf64_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#else
+uintptr_t
+la_symbind32 (Elf32_Sym *sym, unsigned int ndx,
+ uintptr_t *refcook, uintptr_t *defcook,
+ unsigned int *flags, const char *symname)
+#endif
+{
+ if (*refcook != -1 && *defcook != -1 && symname[0] != '\0')
+ fprintf (stderr, "la_symbind: %s %u\n", symname,
+ *flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) ? 1 : 0);
+ return sym->st_value;
+}
--- /dev/null
+/* Check the usability of <dlfcn.h> functions in audit modules. Audit module.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dlfcn.h>
+#include <first-versions.h>
+#include <gnu/lib-names.h>
+#include <link.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <support/check.h>
+#include <support/xdlfcn.h>
+
+unsigned int
+la_version (unsigned int current)
+{
+ /* Exercise various <dlfcn.h> functions. */
+
+ /* Check dlopen, dlsym, dlclose. */
+ void *handle = xdlopen (LIBM_SO, RTLD_LOCAL | RTLD_NOW);
+ void *ptr = xdlsym (handle, "sincos");
+ TEST_VERIFY (ptr != NULL);
+ ptr = dlsym (handle, "SINCOS");
+ TEST_VERIFY (ptr == NULL);
+ const char *message = dlerror ();
+ TEST_VERIFY (strstr (message, ": undefined symbol: SINCOS") != NULL);
+ ptr = dlsym (handle, "SINCOS");
+ TEST_VERIFY (ptr == NULL);
+ xdlclose (handle);
+ TEST_COMPARE_STRING (dlerror (), NULL);
+
+ handle = xdlopen (LIBC_SO, RTLD_LOCAL | RTLD_NOW | RTLD_NOLOAD);
+
+ /* Check dlvsym. _exit is unlikely to gain another symbol
+ version. */
+ TEST_VERIFY (xdlsym (handle, "_exit")
+ == xdlvsym (handle, "_exit", FIRST_VERSION_libc__exit_STRING));
+
+ /* Check dlinfo. */
+ {
+ void *handle2 = NULL;
+ TEST_COMPARE (dlinfo (handle, RTLD_DI_LINKMAP, &handle2), 0);
+ TEST_VERIFY (handle2 == handle);
+ }
+
+ /* Check dladdr and dladdr1. */
+ Dl_info info = { };
+ TEST_VERIFY (dladdr (&_exit, &info) != 0);
+ if (strcmp (info.dli_sname, "_Exit") != 0) /* _Exit is an alias. */
+ TEST_COMPARE_STRING (info.dli_sname, "_exit");
+ TEST_VERIFY (info.dli_saddr == &_exit);
+ TEST_VERIFY (strstr (info.dli_fname, LIBC_SO));
+ void *extra_info;
+ memset (&info, 0, sizeof (info));
+ TEST_VERIFY (dladdr1 (&_exit, &info, &extra_info, RTLD_DL_LINKMAP) != 0);
+ TEST_VERIFY (extra_info == handle);
+
+ /* Verify that dlmopen creates a new namespace. */
+ void *dlmopen_handle = xdlmopen (LM_ID_NEWLM, LIBC_SO, RTLD_NOW);
+ TEST_VERIFY (dlmopen_handle != handle);
+ memset (&info, 0, sizeof (info));
+ extra_info = NULL;
+ ptr = xdlsym (dlmopen_handle, "_exit");
+ TEST_VERIFY (dladdr1 (ptr, &info, &extra_info, RTLD_DL_LINKMAP) != 0);
+ TEST_VERIFY (extra_info == dlmopen_handle);
+ xdlclose (dlmopen_handle);
+
+ /* Terminate the process with an error state. This does not happen
+ automatically because the audit module state is not shared with
+ the main program. */
+ if (support_record_failure_is_failed ())
+ {
+ fflush (stdout);
+ fflush (stderr);
+ _exit (1);
+ }
+
+ return LAV_CURRENT;
+}
+
+char *
+la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
+{
+ if (strcmp (name, "mapped to libc") == 0)
+ return (char *) LIBC_SO;
+ else
+ return (char *) name;
+}
cp $B/elf/libmarkermod2-1.so $L/libmarkermod2.so
cp $B/elf/libmarkermod3-1.so $L/libmarkermod3.so
cp $B/elf/libmarkermod4-1.so $L/libmarkermod4.so
+cp $B/elf/libmarkermod5-1.so $L/libmarkermod5.so
mkdirp 0770 $L/glibc-hwcaps/power9
cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/power9/libmarkermod2.so
cp $B/elf/libmarkermod4-2.so $L/glibc-hwcaps/z13/libmarkermod4.so
cp $B/elf/libmarkermod4-3.so $L/glibc-hwcaps/z14/libmarkermod4.so
cp $B/elf/libmarkermod4-4.so $L/glibc-hwcaps/z15/libmarkermod4.so
+mkdirp 0770 $L/glibc-hwcaps/z16
+cp $B/elf/libmarkermod5-2.so $L/glibc-hwcaps/z13/libmarkermod5.so
+cp $B/elf/libmarkermod5-3.so $L/glibc-hwcaps/z14/libmarkermod5.so
+cp $B/elf/libmarkermod5-4.so $L/glibc-hwcaps/z15/libmarkermod5.so
+cp $B/elf/libmarkermod5-5.so $L/glibc-hwcaps/z16/libmarkermod5.so
mkdirp 0770 $L/glibc-hwcaps/x86-64-v2
cp $B/elf/libmarkermod2-2.so $L/glibc-hwcaps/x86-64-v2/libmarkermod2.so
--- /dev/null
+#!/usr/bin/python3
+# Verify scripts/glibcelf.py contents against elf/elf.h.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import enum
+import sys
+
+import glibcelf
+import glibcextract
+
+errors_encountered = 0
+
+def error(message):
+ global errors_encountered
+ sys.stdout.write('error: {}\n'.format(message))
+ errors_encountered += 1
+
+# The enum constants in glibcelf are expected to have exactly these
+# prefixes.
+expected_constant_prefixes = tuple(
+ 'ELFCLASS ELFDATA EM_ ET_ DT_ PF_ PT_ SHF_ SHN_ SHT_ STB_ STT_'.split())
+
+def find_constant_prefix(name):
+ """Returns a matching prefix from expected_constant_prefixes or None."""
+ for prefix in expected_constant_prefixes:
+ if name.startswith(prefix):
+ return prefix
+ return None
+
+def find_enum_types():
+ """A generator for OpenIntEnum and IntFlag classes in glibcelf."""
+ for obj in vars(glibcelf).values():
+ if isinstance(obj, type) and obj.__bases__[0] in (
+ glibcelf._OpenIntEnum, enum.Enum, enum.IntFlag):
+ yield obj
+
+def check_duplicates():
+ """Verifies that enum types do not have duplicate values.
+
+ Different types must have different member names, too.
+
+ """
+ global_seen = {}
+ for typ in find_enum_types():
+ seen = {}
+ last = None
+ for (name, e) in typ.__members__.items():
+ if e.value in seen:
+ error('{} has {}={} and {}={}'.format(
+ typ, seen[e.value], e.value, name, e.value))
+ last = e
+ else:
+ seen[e.value] = name
+ if last is not None and last.value > e.value:
+ error('{} has {}={} after {}={}'.format(
+ typ, name, e.value, last.name, last.value))
+ if name in global_seen:
+ error('{} used in {} and {}'.format(
+ name, global_seen[name], typ))
+ else:
+ global_seen[name] = typ
+
+def check_constant_prefixes():
+ """Check that the constant prefixes match expected_constant_prefixes."""
+ seen = set()
+ for typ in find_enum_types():
+ typ_prefix = None
+ for val in typ:
+ prefix = find_constant_prefix(val.name)
+ if prefix is None:
+ error('constant {!r} for {} has unknown prefix'.format(
+ val, typ))
+ break
+ elif typ_prefix is None:
+ typ_prefix = prefix
+ seen.add(typ_prefix)
+ elif prefix != typ_prefix:
+ error('prefix {!r} for constant {!r}, expected {!r}'.format(
+ prefix, val, typ_prefix))
+ if typ_prefix is None:
+ error('empty enum type {}'.format(typ))
+
+ for prefix in sorted(set(expected_constant_prefixes) - seen):
+ error('missing constant prefix {!r}'.format(prefix))
+ # Reverse difference is already covered inside the loop.
+
+def find_elf_h_constants(cc):
+ """Returns a dictionary of relevant constants from <elf.h>."""
+ return glibcextract.compute_macro_consts(
+ source_text='#include <elf.h>',
+ cc=cc,
+ macro_re='|'.join(
+ prefix + '.*' for prefix in expected_constant_prefixes))
+
+# The first part of the pair is a name of an <elf.h> constant that is
+# dropped from glibcelf. The second part is the constant as it is
+# used in <elf.h>.
+glibcelf_skipped_aliases = (
+ ('EM_ARC_A5', 'EM_ARC_COMPACT'),
+ ('PF_PARISC_SBP', 'PF_HP_SBP')
+)
+
+# Constants that provide little value and are not included in
+# glibcelf: *LO*/*HI* range constants, *NUM constants counting the
+# number of constants. Also includes the alias names from
+# glibcelf_skipped_aliases.
+glibcelf_skipped_constants = frozenset(
+ [e[0] for e in glibcelf_skipped_aliases]) | frozenset("""
+DT_AARCH64_NUM
+DT_ADDRNUM
+DT_ADDRRNGHI
+DT_ADDRRNGLO
+DT_ALPHA_NUM
+DT_ENCODING
+DT_EXTRANUM
+DT_HIOS
+DT_HIPROC
+DT_IA_64_NUM
+DT_LOOS
+DT_LOPROC
+DT_MIPS_NUM
+DT_NUM
+DT_PPC64_NUM
+DT_PPC_NUM
+DT_PROCNUM
+DT_SPARC_NUM
+DT_VALNUM
+DT_VALRNGHI
+DT_VALRNGLO
+DT_VERSIONTAGNUM
+ELFCLASSNUM
+ELFDATANUM
+ET_HIOS
+ET_HIPROC
+ET_LOOS
+ET_LOPROC
+ET_NUM
+PF_MASKOS
+PF_MASKPROC
+PT_HIOS
+PT_HIPROC
+PT_HISUNW
+PT_LOOS
+PT_LOPROC
+PT_LOSUNW
+SHF_MASKOS
+SHF_MASKPROC
+SHN_HIOS
+SHN_HIPROC
+SHN_HIRESERVE
+SHN_LOOS
+SHN_LOPROC
+SHN_LORESERVE
+SHT_HIOS
+SHT_HIPROC
+SHT_HIPROC
+SHT_HISUNW
+SHT_HIUSER
+SHT_LOOS
+SHT_LOPROC
+SHT_LOSUNW
+SHT_LOUSER
+SHT_NUM
+STB_HIOS
+STB_HIPROC
+STB_LOOS
+STB_LOPROC
+STB_NUM
+STT_HIOS
+STT_HIPROC
+STT_LOOS
+STT_LOPROC
+STT_NUM
+""".strip().split())
+
+def check_constant_values(cc):
+ """Checks the values of <elf.h> constants against glibcelf."""
+
+ glibcelf_constants = {
+ e.name: e for typ in find_enum_types() for e in typ}
+ elf_h_constants = find_elf_h_constants(cc=cc)
+
+ missing_in_glibcelf = (set(elf_h_constants) - set(glibcelf_constants)
+ - glibcelf_skipped_constants)
+ for name in sorted(missing_in_glibcelf):
+ error('constant {} is missing from glibcelf'.format(name))
+
+ unexpected_in_glibcelf = \
+ set(glibcelf_constants) & glibcelf_skipped_constants
+ for name in sorted(unexpected_in_glibcelf):
+ error('constant {} is supposed to be filtered from glibcelf'.format(
+ name))
+
+ missing_in_elf_h = set(glibcelf_constants) - set(elf_h_constants)
+ for name in sorted(missing_in_elf_h):
+ error('constant {} is missing from <elf.h>'.format(name))
+
+ expected_in_elf_h = glibcelf_skipped_constants - set(elf_h_constants)
+ for name in expected_in_elf_h:
+ error('filtered constant {} is missing from <elf.h>'.format(name))
+
+ for alias_name, name_in_glibcelf in glibcelf_skipped_aliases:
+ if name_in_glibcelf not in glibcelf_constants:
+ error('alias value {} for {} not in glibcelf'.format(
+ name_in_glibcelf, alias_name))
+ elif (int(elf_h_constants[alias_name])
+ != glibcelf_constants[name_in_glibcelf].value):
+ error('<elf.h> has {}={}, glibcelf has {}={}'.format(
+ alias_name, elf_h_constants[alias_name],
+ name_in_glibcelf, glibcelf_constants[name_in_glibcelf]))
+
+ # Check for value mismatches:
+ for name in sorted(set(glibcelf_constants) & set(elf_h_constants)):
+ glibcelf_value = glibcelf_constants[name].value
+ elf_h_value = int(elf_h_constants[name])
+ # On 32-bit architectures <elf.h> as some constants that are
+ # parsed as signed, while they are unsigned in glibcelf. So
+ # far, this only affects some flag constants, so special-case
+ # them here.
+ if (glibcelf_value != elf_h_value
+ and not (isinstance(glibcelf_constants[name], enum.IntFlag)
+ and glibcelf_value == 1 << 31
+ and elf_h_value == -(1 << 31))):
+ error('{}: glibcelf has {!r}, <elf.h> has {!r}'.format(
+ name, glibcelf_value, elf_h_value))
+
+def main():
+ """The main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Check glibcelf.py and elf.h against each other.")
+ parser.add_argument('--cc', metavar='CC',
+ help='C compiler (including options) to use')
+ args = parser.parse_args()
+
+ check_duplicates()
+ check_constant_prefixes()
+ check_constant_values(cc=args.cc)
+
+ if errors_encountered > 0:
+ print("note: errors encountered:", errors_encountered)
+ sys.exit(1)
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+#!/usr/bin/python3
+# Verify that certain symbols are covered by RELRO.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+"""Analyze a (shared) object to verify that certain symbols are
+present and covered by the PT_GNU_RELRO segment.
+
+"""
+
+import argparse
+import os.path
+import sys
+
+# Make available glibc Python modules.
+sys.path.append(os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), os.path.pardir, 'scripts'))
+
+import glibcelf
+
+def find_relro(path: str, img: glibcelf.Image) -> (int, int):
+ """Discover the address range of the PT_GNU_RELRO segment."""
+ for phdr in img.phdrs():
+ if phdr.p_type == glibcelf.Pt.PT_GNU_RELRO:
+ # The computation is not entirely accurate because
+ # _dl_protect_relro in elf/dl-reloc.c rounds both the
+ # start end and downwards using the run-time page size.
+ return phdr.p_vaddr, phdr.p_vaddr + phdr.p_memsz
+ sys.stdout.write('{}: error: no PT_GNU_RELRO segment\n'.format(path))
+ sys.exit(1)
+
+def check_in_relro(kind, relro_begin, relro_end, name, start, size, error):
+ """Check if a section or symbol falls within in the RELRO segment."""
+ end = start + size - 1
+ if not (relro_begin <= start < end < relro_end):
+ error(
+ '{} {!r} of size {} at 0x{:x} is not in RELRO range [0x{:x}, 0x{:x})'.format(
+ kind, name.decode('UTF-8'), start, size,
+ relro_begin, relro_end))
+
+def get_parser():
+ """Return an argument parser for this script."""
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('object', help='path to object file to check')
+ parser.add_argument('--required', metavar='NAME', default=(),
+ help='required symbol names', nargs='*')
+ parser.add_argument('--optional', metavar='NAME', default=(),
+ help='required symbol names', nargs='*')
+ return parser
+
+def main(argv):
+ """The main entry point."""
+ parser = get_parser()
+ opts = parser.parse_args(argv)
+ img = glibcelf.Image.readfile(opts.object)
+
+ required_symbols = frozenset([sym.encode('UTF-8')
+ for sym in opts.required])
+ optional_symbols = frozenset([sym.encode('UTF-8')
+ for sym in opts.optional])
+ check_symbols = required_symbols | optional_symbols
+
+ # Tracks the symbols in check_symbols that have been found.
+ symbols_found = set()
+
+ # Discover the extent of the RELRO segment.
+ relro_begin, relro_end = find_relro(opts.object, img)
+ symbol_table_found = False
+
+ errors = False
+ def error(msg: str) -> None:
+ """Record an error condition and write a message to standard output."""
+ nonlocal errors
+ errors = True
+ sys.stdout.write('{}: error: {}\n'.format(opts.object, msg))
+
+ # Iterate over section headers to find the symbol table.
+ for shdr in img.shdrs():
+ if shdr.sh_type == glibcelf.Sht.SHT_SYMTAB:
+ symbol_table_found = True
+ for sym in img.syms(shdr):
+ if sym.st_name in check_symbols:
+ symbols_found.add(sym.st_name)
+
+ # Validate symbol type, section, and size.
+ if sym.st_info.type != glibcelf.Stt.STT_OBJECT:
+ error('symbol {!r} has wrong type {}'.format(
+ sym.st_name.decode('UTF-8'), sym.st_info.type))
+ if sym.st_shndx in glibcelf.Shn:
+ error('symbol {!r} has reserved section {}'.format(
+ sym.st_name.decode('UTF-8'), sym.st_shndx))
+ continue
+ if sym.st_size == 0:
+ error('symbol {!r} has size zero'.format(
+ sym.st_name.decode('UTF-8')))
+ continue
+
+ check_in_relro('symbol', relro_begin, relro_end,
+ sym.st_name, sym.st_value, sym.st_size,
+ error)
+ continue # SHT_SYMTAB
+ if shdr.sh_name == b'.data.rel.ro' \
+ or shdr.sh_name.startswith(b'.data.rel.ro.'):
+ check_in_relro('section', relro_begin, relro_end,
+ shdr.sh_name, shdr.sh_addr, shdr.sh_size,
+ error)
+ continue
+
+ if required_symbols - symbols_found:
+ for sym in sorted(required_symbols - symbols_found):
+ error('symbol {!r} not found'.format(sym.decode('UTF-8')))
+
+ if errors:
+ sys.exit(1)
+
+ if not symbol_table_found:
+ sys.stdout.write(
+ '{}: warning: no symbol table found (stripped object)\n'.format(
+ opts.object))
+ sys.exit(77)
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
--- /dev/null
+/* Test case for DSO with readonly dynamic section.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+int foo = -1;
--- /dev/null
+SECTIONS
+{
+ . = SIZEOF_HEADERS;
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .rodata : { *(.data*) *(.bss*) } :text
+ /DISCARD/ : {
+ *(.note.gnu.property)
+ }
+ .note : { *(.note.*) } :text :note
+}
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS;
+ dynamic PT_DYNAMIC FLAGS(4);
+ note PT_NOTE FLAGS(4);
+}
--- /dev/null
+/* Test case for DSO with readonly dynamic section.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/test-driver.h>
+
+extern int foo;
+
+static int
+do_test (void)
+{
+ TEST_COMPARE (foo, -1);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Base for test program with impossiblyh large PT_TLS segment.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* The test actual binary is patched using scripts/tst-elf-edit.py
+ --maximize-tls-size, and this introduces the expected test
+ allocation failure due to an excessive PT_LS p_memsz value.
+
+ Patching the binary is required because on some 64-bit targets, TLS
+ relocations can only cover a 32-bit range, and glibc-internal TLS
+ variables such as errno end up outside that range. */
+
+int
+main (void)
+{
+ return 0;
+}
__gconv_get_path ();
for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt)
- gconv_parseconfdir (__gconv_path_elem[cnt].name,
+ gconv_parseconfdir (NULL, __gconv_path_elem[cnt].name,
__gconv_path_elem[cnt].len);
#endif
# define readdir __readdir
# define closedir __closedir
# define mempcpy __mempcpy
-# define lstat64 __lstat64
+# define struct_stat struct __stat64_t64
+# define lstat __lstat64_time64
# define feof_unlocked __feof_unlocked
+#else
+# define struct_stat struct stat
#endif
/* Name of the file containing the module information in the directories
along the path. */
static const char gconv_conf_filename[] = "gconv-modules";
-static const char gconv_conf_dirname[] = "gconv-modules.d";
static void add_alias (char *);
static void add_module (char *, const char *, size_t, int);
return true;
}
+/* Prefix DIR (with length DIR_LEN) with PREFIX if the latter is non-NULL and
+ parse configuration in it. */
+
static __always_inline bool
-gconv_parseconfdir (const char *dir, size_t dir_len)
+gconv_parseconfdir (const char *prefix, const char *dir, size_t dir_len)
{
- /* No slash needs to be inserted between dir and gconv_conf_filename;
- dir already ends in a slash. */
- char *buf = malloc (dir_len + sizeof (gconv_conf_dirname));
+ /* No slash needs to be inserted between dir and gconv_conf_filename; dir
+ already ends in a slash. The additional 2 is to accommodate the ".d"
+ when looking for configuration files in gconv-modules.d. */
+ size_t buflen = dir_len + sizeof (gconv_conf_filename) + 2;
+ char *buf = malloc (buflen + (prefix != NULL ? strlen (prefix) : 0));
+ char *cp = buf;
bool found = false;
if (buf == NULL)
return false;
- char *cp = mempcpy (mempcpy (buf, dir, dir_len), gconv_conf_filename,
- sizeof (gconv_conf_filename));
+ if (prefix != NULL)
+ cp = stpcpy (cp, prefix);
+
+ cp = mempcpy (mempcpy (cp, dir, dir_len), gconv_conf_filename,
+ sizeof (gconv_conf_filename));
/* Read the gconv-modules configuration file first. */
found = read_conf_file (buf, dir, dir_len);
&& strcmp (ent->d_name + len - strlen (suffix), suffix) == 0)
{
char *conf;
- struct stat64 st;
+ struct_stat st;
if (asprintf (&conf, "%s/%s", buf, ent->d_name) < 0)
continue;
- if (ent->d_type == DT_UNKNOWN
- && (lstat64 (conf, &st) == -1
- || !S_ISREG (st.st_mode)))
- continue;
- found |= read_conf_file (conf, dir, dir_len);
+ if (ent->d_type != DT_UNKNOWN
+ || (lstat (conf, &st) != -1 && S_ISREG (st.st_mode)))
+ found |= read_conf_file (conf, dir, dir_len);
+
free (conf);
}
}
while (++remaining < argc);
/* All done. */
+ if (output != stdout)
+ fclose (output);
free_table (cvtbl);
return status;
}
static int
handle_dir (const char *dir)
{
+ char *newp = NULL;
size_t dirlen = strlen (dir);
bool found = false;
- char *fulldir = xasprintf ("%s%s%s", dir[0] == '/' ? prefix : "",
- dir, dir[dirlen - 1] != '/' ? "/" : "");
+ /* End directory path with a '/' if it doesn't already. */
+ if (dir[dirlen - 1] != '/')
+ {
+ newp = xmalloc (dirlen + 2);
+ memcpy (newp, dir, dirlen);
+ newp[dirlen++] = '/';
+ newp[dirlen] = '\0';
+ dir = newp;
+ }
- found = gconv_parseconfdir (fulldir, strlen (fulldir));
+ found = gconv_parseconfdir (dir[0] == '/' ? prefix : NULL, dir, dirlen);
if (!found)
{
"configuration files with names ending in .conf.");
}
- free (fulldir);
+ free (newp);
return found ? 0 : 1;
}
# Copyright (C) 1997-2021 Free Software Foundation, Inc.
+# Copyright (C) The GNU Toolchain Authors.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
- bug-iconv13 bug-iconv14
+ bug-iconv13 bug-iconv14 bug-iconv15
ifeq ($(have-thread-library),yes)
tests += bug-iconv3
endif
$(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
$(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
+ $(addprefix $(objpfx),$(modules.so))
$(objpfx)iconv-test.out: run-iconv-test.sh \
$(addprefix $(objpfx), $(gconv-modules)) \
--- /dev/null
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
+ may emit spurious NUL character on state reset.
+ Copyright (C) The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+#include <iconv.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ char in[] = "\x1b(I";
+ char *inbuf = in;
+ size_t inleft = sizeof (in) - 1;
+ char out[1];
+ char *outbuf = out;
+ size_t outleft = sizeof (out);
+ iconv_t cd;
+
+ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+ /* First call to iconv should alter internal state.
+ Now, JISX0201_Kana_set is selected and
+ state value != ASCII_set. */
+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
+
+ /* No bytes should have been added to
+ the output buffer at this point. */
+ TEST_VERIFY (outbuf == out);
+ TEST_VERIFY (outleft == sizeof (out));
+
+ /* Second call shall emit spurious NUL character in unpatched glibc. */
+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
+
+ /* No characters are expected to be produced. */
+ TEST_VERIFY (outbuf == out);
+ TEST_VERIFY (outleft == sizeof (out));
+
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
/* Conversion module for ISO-2022-JP-3.
Copyright (C) 1998-2021 Free Software Foundation, Inc.
+ Copyright (C) The GNU Toolchain Authors.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
and Bruno Haible <bruno@clisp.org>, 2002.
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
- if (data->__statep->__count != ASCII_set) \
+ if ((data->__statep->__count & ~7) != ASCII_set) \
{ \
if (FROM_DIRECTION) \
{ \
- if (__glibc_likely (outbuf + 4 <= outend)) \
+ uint32_t ch = data->__statep->__count >> 6; \
+ \
+ if (__glibc_unlikely (ch != 0)) \
{ \
- /* Write out the last character. */ \
- *((uint32_t *) outbuf) = data->__statep->__count >> 6; \
- outbuf += sizeof (uint32_t); \
- data->__statep->__count = ASCII_set; \
+ if (__glibc_likely (outbuf + 4 <= outend)) \
+ { \
+ /* Write out the last character. */ \
+ put32u (outbuf, ch); \
+ outbuf += 4; \
+ data->__statep->__count &= 7; \
+ data->__statep->__count |= ASCII_set; \
+ } \
+ else \
+ /* We don't have enough room in the output buffer. */ \
+ status = __GCONV_FULL_OUTPUT; \
} \
else \
- /* We don't have enough room in the output buffer. */ \
- status = __GCONV_FULL_OUTPUT; \
+ { \
+ data->__statep->__count &= 7; \
+ data->__statep->__count |= ASCII_set; \
+ } \
} \
else \
{ \
#define __RTLD_AUDIT 0x08000000
#define __RTLD_SECURE 0x04000000 /* Apply additional security checks. */
#define __RTLD_NOIFUNC 0x02000000 /* Suppress calling ifunc functions. */
+#define __RTLD_VDSO 0x01000000 /* Tell _dl_new_object the object is
+ system-loaded. */
#define __LM_ID_CALLER -2
# warning _FORTIFY_SOURCE requires compiling with optimization (-O)
# elif !__GNUC_PREREQ (4, 1)
# warning _FORTIFY_SOURCE requires GCC 4.1 or later
-# elif _FORTIFY_SOURCE > 2 && __glibc_clang_prereq (9, 0)
+# elif _FORTIFY_SOURCE > 2 && (__glibc_clang_prereq (9, 0) \
+ || __GNUC_PREREQ (12, 0))
+
# if _FORTIFY_SOURCE > 3
# warning _FORTIFY_SOURCE > 3 is treated like 3 on this platform
# endif
#include <hp-timing.h>
-/* Initialize the `__libc_enable_secure' flag. */
-extern void __libc_init_secure (void);
-
/* Discover the tick frequency of the machine if something goes wrong,
we return 0, an impossible hertz. */
extern int __profile_frequency (void);
unsigned int l_free_initfini:1; /* Nonzero if l_initfini can be
freed, ie. not allocated with
the dummy malloc in ld.so. */
+ unsigned int l_ld_readonly:1; /* Nonzero if dynamic section is readonly. */
/* NODELETE status of the map. Only valid for maps of type
lt_loaded. Lazy binding sets l_nodelete_active directly,
unsigned long long int l_serial;
};
+#include <dl-relocate-ld.h>
+
/* Information used by audit modules. For most link maps, this data
immediate follows the link map in memory. For the dynamic linker,
it is allocated separately. See link_map_audit_state in
#if __ELF_NATIVE_CLASS == 32
# define symbind symbind32
+# define LA_SYMBIND "la_symbind32"
#elif __ELF_NATIVE_CLASS == 64
# define symbind symbind64
+# define LA_SYMBIND "la_symbind64"
#else
# error "__ELF_NATIVE_CLASS must be defined"
#endif
void (*parent_handler) (void);
void (*child_handler) (void);
void *dso_handle;
+ uint64_t id;
};
/* Function to call to unregister fork handlers. */
atfork_run_parent
};
-/* Run the atfork handlers and lock/unlock the internal lock depending
- of the WHO argument:
-
- - atfork_run_prepare: run all the PREPARE_HANDLER in reverse order of
- insertion and locks the internal lock.
- - atfork_run_child: run all the CHILD_HANDLER and unlocks the internal
- lock.
- - atfork_run_parent: run all the PARENT_HANDLER and unlocks the internal
- lock.
-
- Perform locking only if DO_LOCKING. */
-extern void __run_fork_handlers (enum __run_fork_handler_type who,
- _Bool do_locking) attribute_hidden;
+/* Run the atfork prepare handlers in the reverse order of registration and
+ return the ID of the last registered handler. If DO_LOCKING is true, the
+ internal lock is held locked upon return. */
+extern uint64_t __run_prefork_handlers (_Bool do_locking) attribute_hidden;
+
+/* Given a handler type (parent or child), run all the atfork handlers in
+ the order of registration up to and including the handler with id equal
+ to LASTRUN. If DO_LOCKING is true, the internal lock is unlocked prior
+ to return. */
+extern void __run_postfork_handlers (enum __run_fork_handler_type who,
+ _Bool do_locking,
+ uint64_t lastrun) attribute_hidden;
/* C library side function to register new fork handlers. */
extern int __register_atfork (void (*__prepare) (void),
libc_hidden_proto (__sendmmsg)
#endif
-/* Receive a message as described by MESSAGE from socket FD.
- Returns the number of bytes read or -1 for errors. */
extern ssize_t __libc_recvmsg (int __fd, struct msghdr *__message,
int __flags);
extern ssize_t __recvmsg (int __fd, struct msghdr *__message,
int __flags) attribute_hidden;
#if __TIMESIZE == 64
+# define __libc_recvmsg64 __libc_recvmsg
+# define __recvmsg64 __recvmsg
# define __recvmmsg64 __recvmmsg
#else
+extern ssize_t __libc_recvmsg64 (int __fd, struct msghdr *__message,
+ int __flags);
+extern ssize_t __recvmsg64 (int __fd, struct msghdr *__message,
+ int __flags);
+/* Receive a message as described by MESSAGE from socket FD.
+ Returns the number of bytes read or -1 for errors. */
extern int __recvmmsg64 (int __fd, struct mmsghdr *vmessages,
unsigned int vlen, int flags,
struct __timespec64 *timeout);
extern int __get_nprocs_conf (void);
libc_hidden_proto (__get_nprocs_conf)
-/* Return number of available processors. */
+/* Return number of available processors (not all of them will be
+ available to the caller process). */
extern int __get_nprocs (void);
libc_hidden_proto (__get_nprocs)
+/* Return the number of available processors which the process can
+ be scheduled. */
+extern int __get_nprocs_sched (void) attribute_hidden;
+
/* Return number of physical pages of memory in the system. */
extern long int __get_phys_pages (void);
libc_hidden_proto (__get_phys_pages)
#include <socket/sys/un.h>
+
+#ifndef _ISOMAC
+
+/* Set ADDR->sun_family to AF_UNIX and ADDR->sun_path to PATHNAME.
+ Return 0 on success or -1 on failure (due to overlong PATHNAME).
+ The caller should always use sizeof (struct sockaddr_un) as the
+ socket address length, disregaring the length of PATHNAME.
+ Only concrete (non-abstract) pathnames are supported. */
+int __sockaddr_un_set (struct sockaddr_un *addr, const char *pathname)
+ attribute_hidden;
+
+#endif /* _ISOMAC */
and some functions contained in the C library ignore various
environment variables that normally affect them. */
extern int __libc_enable_secure attribute_relro;
-extern int __libc_enable_secure_decided;
rtld_hidden_proto (__libc_enable_secure)
char *hdir, *buf, *tmp;
char myname[1024], *mydomain;
int t, usedefault = 0;
- struct stat64 stb;
+ struct __stat64_t64 stb;
hdir = __libc_secure_getenv("HOME");
if (hdir == NULL) {
break;
case PASSWD:
if (strcmp(*aname, "anonymous") &&
- __fstat64(fileno(cfile), &stb) >= 0 &&
+ __fstat64_time64(fileno(cfile), &stb) >= 0 &&
(stb.st_mode & 077) != 0) {
warnx(_("Error: .netrc file is readable by others."));
warnx(_("Remove 'password' line or make file unreadable by others."));
# define __gettextparse PLURAL_PARSE
#endif
+/* Later we provide those prototypes. Without these macros, bison may
+ generate its own prototypes with possible conflicts. */
+#define YYLEX_IS_DECLARED
+#define YYERROR_IS_DECLARED
+
%}
%parse-param {struct parse_args *arg}
%lex-param {struct parse_args *arg}
tst-closefrom \
tests-time64 := \
+ tst-fcntl-time64 \
+ tst-fts-time64 \
tst-futimens-time64 \
tst-futimes-time64\
- tst-fts-time64 \
+ tst-futimesat-time64 \
+ tst-lchmod-time64 \
tst-lutimes-time64 \
tst-stat-time64 \
- tst-futimesat-time64 \
tst-utime-time64 \
tst-utimensat-time64 \
tst-utimes-time64 \
- tst-fcntl-time64 \
# tests-time64
# Likewise for statx, but we do not need static linking here.
CFLAGS-test-stat.c += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE
CFLAGS-test-lfs.c += -D_LARGEFILE64_SOURCE
+CFLAGS-tst-lchmod.c += -D_FILE_OFFSET_BITS=64
test-stat2-ARGS = Makefile . $(objpfx)test-stat2
__poll_chk)
__warnattr ("poll called with fds buffer too small file nfds entries");
-__fortify_function __attr_access ((__write_only__, 1, 2)) int
+__fortify_function __fortified_attr_access (__write_only__, 1, 2) int
poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
{
- if (__glibc_objsize (__fds) != (__SIZE_TYPE__) -1)
- {
- if (! __builtin_constant_p (__nfds))
- return __poll_chk (__fds, __nfds, __timeout, __glibc_objsize (__fds));
- else if (__glibc_objsize (__fds) / sizeof (*__fds) < __nfds)
- return __poll_chk_warn (__fds, __nfds, __timeout,
- __glibc_objsize (__fds));
- }
-
- return __poll_alias (__fds, __nfds, __timeout);
+ return __glibc_fortify (poll, __nfds, sizeof (*__fds),
+ __glibc_objsize (__fds),
+ __fds, __nfds, __timeout);
}
__ppoll_chk)
__warnattr ("ppoll called with fds buffer too small file nfds entries");
-__fortify_function __attr_access ((__write_only__, 1, 2)) int
+__fortify_function __fortified_attr_access (__write_only__, 1, 2) int
ppoll (struct pollfd *__fds, nfds_t __nfds, const struct timespec *__timeout,
const __sigset_t *__ss)
{
- if (__glibc_objsize (__fds) != (__SIZE_TYPE__) -1)
- {
- if (! __builtin_constant_p (__nfds))
- return __ppoll_chk (__fds, __nfds, __timeout, __ss,
- __glibc_objsize (__fds));
- else if (__glibc_objsize (__fds) / sizeof (*__fds) < __nfds)
- return __ppoll_chk_warn (__fds, __nfds, __timeout, __ss,
- __glibc_objsize (__fds));
- }
-
- return __ppoll_alias (__fds, __nfds, __timeout, __ss);
+ return __glibc_fortify (ppoll, __nfds, sizeof (*__fds),
+ __glibc_objsize (__fds),
+ __fds, __nfds, __timeout, __ss);
}
#endif
# endif
#else /* __USE_TIME_BITS64 */
# ifdef __REDIRECT
-extern int __REDIRECT (fcntl, (int __fd, int __request, ...),
- __fcntl_time64) __THROW;
-extern int __REDIRECT (fcntl64, (int __fd, int __request, ...),
- __fcntl_time64) __THROW;
+extern int __REDIRECT_NTH (fcntl, (int __fd, int __request, ...),
+ __fcntl_time64);
+extern int __REDIRECT_NTH (fcntl64, (int __fd, int __request, ...),
+ __fcntl_time64);
# else
extern int __fcntl_time64 (int __fd, int __request, ...) __THROW;
# define fcntl64 __fcntl_time64
This function is a cancellation point and therefore not marked with
__THROW. */
extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
#ifdef __USE_GNU
/* Like poll, but before waiting the threads signal mask is replaced
extern int ppoll (struct pollfd *__fds, nfds_t __nfds,
const struct timespec *__timeout,
const __sigset_t *__ss)
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
# ifdef __USE_TIME_BITS64
# ifdef __REDIRECT
const struct timespec *__timeout,
const __sigset_t *__ss),
__ppoll64)
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
# else
# define ppoll __ppoll64
# endif
#include <support/check.h>
#include <support/descriptors.h>
#include <support/xunistd.h>
+#include <support/support.h>
#include <array_length.h>
#define NFDS 100
-static int
-open_multiple_temp_files (void)
-{
- /* Check if the temporary file descriptor has no no gaps. */
- int lowfd = xopen ("/dev/null", O_RDONLY, 0600);
- for (int i = 1; i <= NFDS; i++)
- TEST_COMPARE (xopen ("/dev/null", O_RDONLY, 0600), lowfd + i);
- return lowfd;
-}
-
static int
closefrom_test (void)
{
struct support_descriptors *descrs = support_descriptors_list ();
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
- const int maximum_fd = lowfd + NFDS;
+ const int maximum_fd = lowfd + NFDS - 1;
const int half_fd = lowfd + NFDS / 2;
- const int gap = maximum_fd / 4;
+ const int gap = lowfd + NFDS / 4;
/* Close half of the descriptors and check result. */
closefrom (half_fd);
TEST_COMPARE (fcntl (i, F_GETFL), -1);
TEST_COMPARE (errno, EBADF);
}
- for (int i = 0; i < half_fd; i++)
+ for (int i = lowfd; i < half_fd; i++)
TEST_VERIFY (fcntl (i, F_GETFL) > -1);
/* Create some gaps, close up to a threshold, and check result. */
TEST_COMPARE (fcntl (i, F_GETFL), -1);
TEST_COMPARE (errno, EBADF);
}
- for (int i = 0; i < gap; i++)
+ for (int i = lowfd; i < gap; i++)
TEST_VERIFY (fcntl (i, F_GETFL) > -1);
/* Close the remmaining but the last one. */
--- /dev/null
+#define CHECK_TIME64
+#include "tst-lchmod.c"
return full_path;
}
+static void
+update_file_time_to_y2038 (const char *fname, int flags)
+{
+#ifdef CHECK_TIME64
+ /* Y2038 threshold plus 1 second. */
+ const struct timespec ts[] = { { 0x80000001LL, 0}, { 0x80000001LL } };
+ TEST_VERIFY_EXIT (utimensat (AT_FDCWD, fname, ts, flags) == 0);
+#endif
+}
+
static void
test_1 (bool do_relative_path, int (*chmod_func) (int fd, const char *, mode_t, int))
{
char *tempdir = support_create_temp_directory ("tst-lchmod-");
+#ifdef CHECK_TIME64
+ if (!support_path_support_time64 (tempdir))
+ {
+ puts ("info: test skipped, filesystem does not support 64 bit time_t");
+ return;
+ }
+#endif
char *path_dangling = xasprintf ("%s/dangling", tempdir);
char *path_file = xasprintf ("%s/file", tempdir);
xsymlink ("loop", path_loop);
xsymlink ("target-does-not-exist", path_dangling);
+ update_file_time_to_y2038 (path_file, 0);
+ update_file_time_to_y2038 (path_to_file, AT_SYMLINK_NOFOLLOW);
+
/* Check that the modes do not collide with what we will use in the
test. */
- struct stat64 st;
+ struct stat st;
xstat (path_file, &st);
TEST_VERIFY ((st.st_mode & 0777) != 1);
xlstat (path_to_file, &st);
TEST_VERIFY_EXIT (fd >= 0);
support_write_file_string (path, "abc");
+ /* This should help to prevent delayed allocation, which may result
+ in a spurious stx_blocks/st_blocks difference. */
+ fsync (fd);
+
bool check_ns = support_stat_nanoseconds (path);
if (!check_ns)
printf ("warning: timestamp with nanoseconds not supported\n");
__wur __warnattr ("fgets called with bigger size than length "
"of destination buffer");
-__fortify_function __wur __attr_access ((__write_only__, 1, 2)) char *
+__fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char *
fgets (char *__restrict __s, int __n, FILE *__restrict __stream)
{
- if (__glibc_objsize (__s) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n) || __n <= 0)
- return __fgets_chk (__s, __glibc_objsize (__s), __n, __stream);
-
- if ((size_t) __n > __glibc_objsize (__s))
- return __fgets_chk_warn (__s, __glibc_objsize (__s), __n, __stream);
- }
- return __fgets_alias (__s, __n, __stream);
+ size_t sz = __glibc_objsize (__s);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz))
+ return __fgets_alias (__s, __n, __stream);
+ if (__glibc_unsafe_len (__n, sizeof (char), sz))
+ return __fgets_chk_warn (__s, sz, __n, __stream);
+ return __fgets_chk (__s, sz, __n, __stream);
}
extern size_t __fread_chk (void *__restrict __ptr, size_t __ptrlen,
fread (void *__restrict __ptr, size_t __size, size_t __n,
FILE *__restrict __stream)
{
- if (__glibc_objsize0 (__ptr) != (size_t) -1)
- {
- if (!__builtin_constant_p (__size)
- || !__builtin_constant_p (__n)
- || (__size | __n) >= (((size_t) 1) << (8 * sizeof (size_t) / 2)))
- return __fread_chk (__ptr, __glibc_objsize0 (__ptr), __size, __n,
- __stream);
-
- if (__size * __n > __glibc_objsize0 (__ptr))
- return __fread_chk_warn (__ptr, __glibc_objsize0 (__ptr), __size, __n,
- __stream);
- }
- return __fread_alias (__ptr, __size, __n, __stream);
+ size_t sz = __glibc_objsize0 (__ptr);
+ if (__glibc_safe_or_unknown_len (__n, __size, sz))
+ return __fread_alias (__ptr, __size, __n, __stream);
+ if (__glibc_unsafe_len (__n, __size, sz))
+ return __fread_chk_warn (__ptr, sz, __size, __n, __stream);
+ return __fread_chk (__ptr, sz, __size, __n, __stream);
}
#ifdef __USE_GNU
__wur __warnattr ("fgets_unlocked called with bigger size than length "
"of destination buffer");
-__fortify_function __wur __attr_access ((__write_only__, 1, 2)) char *
+__fortify_function __wur __fortified_attr_access (__write_only__, 1, 2) char *
fgets_unlocked (char *__restrict __s, int __n, FILE *__restrict __stream)
{
- if (__glibc_objsize (__s) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n) || __n <= 0)
- return __fgets_unlocked_chk (__s, __glibc_objsize (__s), __n,
- __stream);
-
- if ((size_t) __n > __glibc_objsize (__s))
- return __fgets_unlocked_chk_warn (__s, __glibc_objsize (__s), __n,
- __stream);
- }
- return __fgets_unlocked_alias (__s, __n, __stream);
+ size_t sz = __glibc_objsize (__s);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz))
+ return __fgets_unlocked_alias (__s, __n, __stream);
+ if (__glibc_unsafe_len (__n, sizeof (char), sz))
+ return __fgets_unlocked_chk_warn (__s, sz, __n, __stream);
+ return __fgets_unlocked_chk (__s, sz, __n, __stream);
}
#endif
fread_unlocked (void *__restrict __ptr, size_t __size, size_t __n,
FILE *__restrict __stream)
{
- if (__glibc_objsize0 (__ptr) != (size_t) -1)
+ size_t sz = __glibc_objsize0 (__ptr);
+ if (__glibc_safe_or_unknown_len (__n, __size, sz))
{
- if (!__builtin_constant_p (__size)
- || !__builtin_constant_p (__n)
- || (__size | __n) >= (((size_t) 1) << (8 * sizeof (size_t) / 2)))
- return __fread_unlocked_chk (__ptr, __glibc_objsize0 (__ptr), __size,
- __n, __stream);
-
- if (__size * __n > __glibc_objsize0 (__ptr))
- return __fread_unlocked_chk_warn (__ptr, __glibc_objsize0 (__ptr),
- __size, __n, __stream);
- }
-
# ifdef __USE_EXTERN_INLINES
- if (__builtin_constant_p (__size)
- && __builtin_constant_p (__n)
- && (__size | __n) < (((size_t) 1) << (8 * sizeof (size_t) / 2))
- && __size * __n <= 8)
- {
- size_t __cnt = __size * __n;
- char *__cptr = (char *) __ptr;
- if (__cnt == 0)
- return 0;
-
- for (; __cnt > 0; --__cnt)
+ if (__builtin_constant_p (__size)
+ && __builtin_constant_p (__n)
+ && (__size | __n) < (((size_t) 1) << (8 * sizeof (size_t) / 2))
+ && __size * __n <= 8)
{
- int __c = getc_unlocked (__stream);
- if (__c == EOF)
- break;
- *__cptr++ = __c;
+ size_t __cnt = __size * __n;
+ char *__cptr = (char *) __ptr;
+ if (__cnt == 0)
+ return 0;
+
+ for (; __cnt > 0; --__cnt)
+ {
+ int __c = getc_unlocked (__stream);
+ if (__c == EOF)
+ break;
+ *__cptr++ = __c;
+ }
+ return (__cptr - (char *) __ptr) / __size;
}
- return (__cptr - (char *) __ptr) / __size;
- }
# endif
- return __fread_unlocked_alias (__ptr, __size, __n, __stream);
+ return __fread_unlocked_alias (__ptr, __size, __n, __stream);
+ }
+ if (__glibc_unsafe_len (__n, __size, sz))
+ return __fread_unlocked_chk_warn (__ptr, sz, __size, __n, __stream);
+ return __fread_unlocked_chk (__ptr, sz, __size, __n, __stream);
+
}
#endif
This function is a possible cancellation point and therefore not
marked with __THROW. */
extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream)
- __wur __attr_access ((__write_only__, 1, 2));
+ __wur __fortified_attr_access (__write_only__, 1, 2);
#if __GLIBC_USE (DEPRECATED_GETS)
/* Get a newline-terminated string from stdin, removing the newline.
therefore not marked with __THROW. */
extern char *fgets_unlocked (char *__restrict __s, int __n,
FILE *__restrict __stream) __wur
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
#endif
{
char fullname[fnamelen + 2 * strlen (d->d_name) + 7];
- if (d_type == DT_UNKNOWN)
+ if (d_type == DT_UNKNOWN || d_type == DT_LNK)
{
strcpy (stpcpy (stpcpy (fullname, fname), "/"),
d->d_name);
narenas_limit = mp_.arena_max;
else if (narenas > mp_.arena_test)
{
- int n = __get_nprocs ();
+ int n = __get_nprocs_sched ();
if (n >= 1)
narenas_limit = NARENAS_FROM_NCORES (n);
/* Malloc debug DSO.
Copyright (C) 2021 Free Software Foundation, Inc.
+ Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
size_t
malloc_usable_size (void *mem)
{
+ if (mem == NULL)
+ return 0;
+
if (__is_malloc_debug_enabled (MALLOC_MCHECK_HOOK))
return mcheck_usable_size (mem);
if (__is_malloc_debug_enabled (MALLOC_CHECK_HOOK))
return malloc_check_get_size (mem);
- if (mem != NULL)
- {
- mchunkptr p = mem2chunk (mem);
- if (DUMPED_MAIN_ARENA_CHUNK (p))
- return chunksize (p) - SIZE_SZ;
- }
+ mchunkptr p = mem2chunk (mem);
+ if (DUMPED_MAIN_ARENA_CHUNK (p))
+ return chunksize (p) - SIZE_SZ;
return musable (mem);
}
/* Malloc implementation for multiple threads without lock contention.
Copyright (C) 1996-2021 Free Software Foundation, Inc.
+ Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
Contributed by Wolfram Gloger <wg@malloc.de>
and Doug Lea <dl@cs.oswego.edu>, 2001.
# define __assert_fail(assertion, file, line, function) \
__malloc_assert(assertion, file, line, function)
-extern const char *__progname;
-
-static void
+_Noreturn static void
__malloc_assert (const char *assertion, const char *file, unsigned int line,
const char *function)
{
- (void) __fxprintf (NULL, "%s%s%s:%u: %s%sAssertion `%s' failed.\n",
- __progname, __progname[0] ? ": " : "",
- file, line,
- function ? function : "", function ? ": " : "",
- assertion);
- fflush (stderr);
- abort ();
+ __libc_message (do_abort, "\
+Fatal glibc error: malloc assertion failure in %s: %s\n",
+ function, assertion);
+ __builtin_unreachable ();
}
#endif
#endif
static size_t
musable (void *mem)
{
- mchunkptr p;
- if (mem != 0)
- {
- size_t result = 0;
-
- p = mem2chunk (mem);
+ mchunkptr p = mem2chunk (mem);
- if (chunk_is_mmapped (p))
- result = chunksize (p) - CHUNK_HDR_SZ;
- else if (inuse (p))
- result = memsize (p);
+ if (chunk_is_mmapped (p))
+ return chunksize (p) - CHUNK_HDR_SZ;
+ else if (inuse (p))
+ return memsize (p);
- return result;
- }
return 0;
}
size_t
__malloc_usable_size (void *m)
{
- size_t result;
-
- result = musable (m);
- return result;
+ if (m == NULL)
+ return 0;
+ return musable (m);
}
#endif
MALLOC_CHECK_ exported to a positive value.
Copyright (C) 2012-2021 Free Software Foundation, Inc.
+ Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
#include <malloc.h>
#include <string.h>
#include <stdio.h>
+#include <support/support.h>
+#include <support/check.h>
static int
do_test (void)
{
size_t usable_size;
void *p = malloc (7);
- if (!p)
- {
- printf ("memory allocation failed\n");
- return 1;
- }
+ TEST_VERIFY_EXIT (p != NULL);
usable_size = malloc_usable_size (p);
- if (usable_size != 7)
- {
- printf ("malloc_usable_size: expected 7 but got %zu\n", usable_size);
- return 1;
- }
-
+ TEST_COMPARE (usable_size, 7);
memset (p, 0, usable_size);
free (p);
+
+ TEST_COMPARE (malloc_usable_size (NULL), 0);
+
return 0;
}
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
+#include "support/test-driver.c"
(void)__chdir("/");
if (!noclose) {
- struct stat64 st;
+ struct __stat64_t64 st;
if ((fd = __open_nocancel(_PATH_DEVNULL, O_RDWR, 0)) != -1
- && (__builtin_expect (__fstat64 (fd, &st), 0)
- == 0)) {
+ && __glibc_likely (__fstat64_time64 (fd, &st) == 0)) {
if (__builtin_expect (S_ISCHR (st.st_mode), 1) != 0
#if defined DEV_NULL_MAJOR && defined DEV_NULL_MINOR
&& (st.st_rdev
link_warning (get_nprocs, "warning: get_nprocs will always return 1")
+int
+__get_nprocs_sched (void)
+{
+ return 1;
+}
+
long int
__get_phys_pages (void)
{
{
char **sp, *cp;
FILE *fp;
- struct stat64 statb;
+ struct __stat64_t64 statb;
size_t flen;
free(shells);
strings = NULL;
if ((fp = fopen(_PATH_SHELLS, "rce")) == NULL)
goto init_okshells_noclose;
- if (__fstat64(fileno(fp), &statb) == -1) {
+ if (__fstat64_time64(fileno(fp), &statb) == -1) {
init_okshells:
(void)fclose(fp);
init_okshells_noclose:
#define __bos0(ptr) __builtin_object_size (ptr, 0)
/* Use __builtin_dynamic_object_size at _FORTIFY_SOURCE=3 when available. */
-#if __USE_FORTIFY_LEVEL == 3 && __glibc_clang_prereq (9, 0)
+#if __USE_FORTIFY_LEVEL == 3 && (__glibc_clang_prereq (9, 0) \
+ || __GNUC_PREREQ (12, 0))
# define __glibc_objsize0(__o) __builtin_dynamic_object_size (__o, 0)
# define __glibc_objsize(__o) __builtin_dynamic_object_size (__o, 1)
#else
# define __glibc_objsize(__o) __bos (__o)
#endif
+/* Compile time conditions to choose between the regular, _chk and _chk_warn
+ variants. These conditions should get evaluated to constant and optimized
+ away. */
+
+#define __glibc_safe_len_cond(__l, __s, __osz) ((__l) <= (__osz) / (__s))
+#define __glibc_unsigned_or_positive(__l) \
+ ((__typeof (__l)) 0 < (__typeof (__l)) -1 \
+ || (__builtin_constant_p (__l) && (__l) > 0))
+
+/* Length is known to be safe at compile time if the __L * __S <= __OBJSZ
+ condition can be folded to a constant and if it is true, or unknown (-1) */
+#define __glibc_safe_or_unknown_len(__l, __s, __osz) \
+ ((__builtin_constant_p (__osz) && (__osz) == (__SIZE_TYPE__) -1) \
+ || (__glibc_unsigned_or_positive (__l) \
+ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \
+ (__s), (__osz))) \
+ && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), (__s), (__osz))))
+
+/* Conversely, we know at compile time that the length is unsafe if the
+ __L * __S <= __OBJSZ condition can be folded to a constant and if it is
+ false. */
+#define __glibc_unsafe_len(__l, __s, __osz) \
+ (__glibc_unsigned_or_positive (__l) \
+ && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \
+ __s, __osz)) \
+ && !__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz))
+
+/* Fortify function f. __f_alias, __f_chk and __f_chk_warn must be
+ declared. */
+
+#define __glibc_fortify(f, __l, __s, __osz, ...) \
+ (__glibc_safe_or_unknown_len (__l, __s, __osz) \
+ ? __ ## f ## _alias (__VA_ARGS__) \
+ : (__glibc_unsafe_len (__l, __s, __osz) \
+ ? __ ## f ## _chk_warn (__VA_ARGS__, __osz) \
+ : __ ## f ## _chk (__VA_ARGS__, __osz))) \
+
+/* Fortify function f, where object size argument passed to f is the number of
+ elements and not total size. */
+
+#define __glibc_fortify_n(f, __l, __s, __osz, ...) \
+ (__glibc_safe_or_unknown_len (__l, __s, __osz) \
+ ? __ ## f ## _alias (__VA_ARGS__) \
+ : (__glibc_unsafe_len (__l, __s, __osz) \
+ ? __ ## f ## _chk_warn (__VA_ARGS__, (__osz) / (__s)) \
+ : __ ## f ## _chk (__VA_ARGS__, (__osz) / (__s)))) \
+
#if __GNUC_PREREQ (4,3)
# define __warnattr(msg) __attribute__((__warning__ (msg)))
# define __errordecl(name, msg) \
size-index is not provided:
access (access-mode, <ref-index> [, <size-index>]) */
# define __attr_access(x) __attribute__ ((__access__ x))
+/* For _FORTIFY_SOURCE == 3 we use __builtin_dynamic_object_size, which may
+ use the access attribute to get object sizes from function definition
+ arguments, so we can't use them on functions we fortify. Drop the object
+ size hints for such functions. */
+# if __USE_FORTIFY_LEVEL == 3
+# define __fortified_attr_access(a, o, s) __attribute__ ((__access__ (a, o)))
+# else
+# define __fortified_attr_access(a, o, s) __attr_access ((a, o, s))
+# endif
# if __GNUC_PREREQ (11, 0)
# define __attr_access_none(argno) __attribute__ ((__access__ (__none__, argno)))
# else
# define __attr_access_none(argno)
# endif
#else
+# define __fortified_attr_access(a, o, s)
# define __attr_access(x)
# define __attr_access_none(argno)
#endif
extern int ioctl (int __fd, unsigned long int __request, ...) __THROW;
#else
# ifdef __REDIRECT
-extern int __REDIRECT (ioctl, (int __fd, unsigned long int __request, ...),
- __ioctl_time64) __THROW;
+extern int __REDIRECT_NTH (ioctl, (int __fd, unsigned long int __request, ...),
+ __ioctl_time64);
# else
extern int __ioctl_time64 (int __fd, unsigned long int __request, ...) __THROW;
# define ioctl __ioctl_time64
#include <futex-internal.h>
#include <kernel-features.h>
#include <nptl-stack.h>
+#include <libc-lock.h>
/* Default alignment of stack. */
#ifndef STACK_ALIGN
/* Cancellation handling is back to the default. */
result->cancelhandling = 0;
- result->cancelstate = PTHREAD_CANCEL_ENABLE;
- result->canceltype = PTHREAD_CANCEL_DEFERRED;
result->cleanup = NULL;
result->setup_failed = 0;
/* No pending event. */
result->nextevent = NULL;
+ result->exiting = false;
+ __libc_lock_init (result->exit_lock);
result->tls_state = (struct tls_internal_t) { 0 };
/* Clear the DTV. */
memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
/* Re-initialize the TLS. */
- _dl_allocate_tls_init (TLS_TPADJ (result));
+ _dl_allocate_tls_init (TLS_TPADJ (result), true);
return result;
}
__pthread_enable_asynccancel (void)
{
struct pthread *self = THREAD_SELF;
+ int oldval = atomic_load_relaxed (&self->cancelhandling);
- int oldval = THREAD_GETMEM (self, canceltype);
- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_ASYNCHRONOUS);
+ while (1)
+ {
+ int newval = oldval | CANCELTYPE_BITMASK;
- int ch = THREAD_GETMEM (self, cancelhandling);
+ if (newval == oldval)
+ break;
- if (self->cancelstate == PTHREAD_CANCEL_ENABLE
- && (ch & CANCELED_BITMASK)
- && !(ch & EXITING_BITMASK)
- && !(ch & TERMINATED_BITMASK))
- {
- THREAD_SETMEM (self, result, PTHREAD_CANCELED);
- __do_cancel ();
+ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &oldval, newval))
+ {
+ if (cancel_enabled_and_canceled_and_async (newval))
+ {
+ self->result = PTHREAD_CANCELED;
+ __do_cancel ();
+ }
+
+ break;
+ }
}
return oldval;
{
/* If asynchronous cancellation was enabled before we do not have
anything to do. */
- if (oldtype == PTHREAD_CANCEL_ASYNCHRONOUS)
+ if (oldtype & CANCELTYPE_BITMASK)
return;
struct pthread *self = THREAD_SELF;
- self->canceltype = PTHREAD_CANCEL_DEFERRED;
+ int newval;
+ int oldval = atomic_load_relaxed (&self->cancelhandling);
+ do
+ {
+ newval = oldval & ~CANCELTYPE_BITMASK;
+ }
+ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &oldval, newval));
+
+ /* We cannot return when we are being canceled. Upon return the
+ thread might be things which would have to be undone. The
+ following loop should loop until the cancellation signal is
+ delivered. */
+ while (__glibc_unlikely ((newval & (CANCELING_BITMASK | CANCELED_BITMASK))
+ == CANCELING_BITMASK))
+ {
+ futex_wait_simple ((unsigned int *) &self->cancelhandling, newval,
+ FUTEX_PRIVATE);
+ newval = atomic_load_relaxed (&self->cancelhandling);
+ }
}
libc_hidden_def (__pthread_disable_asynccancel)
ibuf->priv.data.prev = THREAD_GETMEM (self, cleanup_jmp_buf);
ibuf->priv.data.cleanup = THREAD_GETMEM (self, cleanup);
- /* Disable asynchronous cancellation for now. */
- ibuf->priv.data.canceltype = THREAD_GETMEM (self, canceltype);
- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED);
+ int cancelhandling = atomic_load_relaxed (&self->cancelhandling);
+ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK))
+ {
+ int newval;
+ do
+ {
+ newval = cancelhandling & ~CANCELTYPE_BITMASK;
+ }
+ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &cancelhandling,
+ newval));
+ }
+
+ ibuf->priv.data.canceltype = (cancelhandling & CANCELTYPE_BITMASK
+ ? PTHREAD_CANCEL_ASYNCHRONOUS
+ : PTHREAD_CANCEL_DEFERRED);
/* Store the new cleanup handler info. */
THREAD_SETMEM (self, cleanup_jmp_buf, (struct pthread_unwind_buf *) buf);
THREAD_SETMEM (self, cleanup_jmp_buf, ibuf->priv.data.prev);
- THREAD_SETMEM (self, canceltype, ibuf->priv.data.canceltype);
- if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_ASYNCHRONOUS)
- __pthread_testcancel ();
+ if (ibuf->priv.data.canceltype == PTHREAD_CANCEL_DEFERRED)
+ return;
+
+ int cancelhandling = atomic_load_relaxed (&self->cancelhandling);
+ if ((cancelhandling & CANCELTYPE_BITMASK) == 0)
+ {
+ int newval;
+ do
+ {
+ newval = cancelhandling | CANCELTYPE_BITMASK;
+ }
+ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &cancelhandling, newval));
+
+ if (cancel_enabled_and_canceled (cancelhandling))
+ {
+ self->result = PTHREAD_CANCELED;
+ __do_cancel ();
+ }
+ }
}
versioned_symbol (libc, ___pthread_unregister_cancel_restore,
__pthread_unregister_cancel_restore, GLIBC_2_34);
/* Flags determining processing of cancellation. */
int cancelhandling;
+ /* Bit set if cancellation is disabled. */
+#define CANCELSTATE_BIT 0
+#define CANCELSTATE_BITMASK (1 << CANCELSTATE_BIT)
+ /* Bit set if asynchronous cancellation mode is selected. */
+#define CANCELTYPE_BIT 1
+#define CANCELTYPE_BITMASK (1 << CANCELTYPE_BIT)
+ /* Bit set if canceling has been initiated. */
+#define CANCELING_BIT 2
+#define CANCELING_BITMASK (1 << CANCELING_BIT)
/* Bit set if canceled. */
#define CANCELED_BIT 3
-#define CANCELED_BITMASK (0x01 << CANCELED_BIT)
+#define CANCELED_BITMASK (1 << CANCELED_BIT)
/* Bit set if thread is exiting. */
#define EXITING_BIT 4
-#define EXITING_BITMASK (0x01 << EXITING_BIT)
+#define EXITING_BITMASK (1 << EXITING_BIT)
/* Bit set if thread terminated and TCB is freed. */
#define TERMINATED_BIT 5
-#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT)
+#define TERMINATED_BITMASK (1 << TERMINATED_BIT)
/* Bit set if thread is supposed to change XID. */
#define SETXID_BIT 6
-#define SETXID_BITMASK (0x01 << SETXID_BIT)
+#define SETXID_BITMASK (1 << SETXID_BIT)
/* Flags. Including those copied from the thread attribute. */
int flags;
/* Indicates whether is a C11 thread created by thrd_creat. */
bool c11;
- /* Thread cancel state (PTHREAD_CANCEL_ENABLE or
- PTHREAD_CANCEL_DISABLE). */
- unsigned char cancelstate;
-
- /* Thread cancel type (PTHREAD_CANCEL_DEFERRED or
- PTHREAD_CANCEL_ASYNCHRONOUS). */
- unsigned char canceltype;
+ /* Used in __pthread_kill_internal to detected a thread that has
+ exited or is about to exit. exit_lock must only be acquired
+ after blocking signals. */
+ bool exiting;
+ int exit_lock; /* A low-level lock (for use with __libc_lock_init etc). */
/* Used on strsignal. */
struct tls_internal_t tls_state;
(sizeof (struct pthread) - offsetof (struct pthread, end_padding))
} __attribute ((aligned (TCB_ALIGNMENT)));
+static inline bool
+cancel_enabled_and_canceled (int value)
+{
+ return (value & (CANCELSTATE_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
+ | TERMINATED_BITMASK))
+ == CANCELED_BITMASK;
+}
+
+static inline bool
+cancel_enabled_and_canceled_and_async (int value)
+{
+ return ((value) & (CANCELSTATE_BITMASK | CANCELTYPE_BITMASK | CANCELED_BITMASK
+ | EXITING_BITMASK | TERMINATED_BITMASK))
+ == (CANCELTYPE_BITMASK | CANCELED_BITMASK);
+}
+
/* This yields the pointer that TLS support code calls the thread pointer. */
#if TLS_TCB_AT_TP
# define TLS_TPADJ(pd) (pd)
buffer->__prev = THREAD_GETMEM (self, cleanup);
+ int cancelhandling = atomic_load_relaxed (&self->cancelhandling);
+
/* Disable asynchronous cancellation for now. */
- buffer->__canceltype = THREAD_GETMEM (self, canceltype);
- THREAD_SETMEM (self, canceltype, PTHREAD_CANCEL_DEFERRED);
+ if (__glibc_unlikely (cancelhandling & CANCELTYPE_BITMASK))
+ {
+ int newval;
+ do
+ {
+ newval = cancelhandling & ~CANCELTYPE_BITMASK;
+ }
+ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &cancelhandling,
+ newval));
+ }
+
+ buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK
+ ? PTHREAD_CANCEL_ASYNCHRONOUS
+ : PTHREAD_CANCEL_DEFERRED);
THREAD_SETMEM (self, cleanup, buffer);
}
THREAD_SETMEM (self, cleanup, buffer->__prev);
- THREAD_SETMEM (self, canceltype, buffer->__canceltype);
- if (buffer->__canceltype == PTHREAD_CANCEL_ASYNCHRONOUS)
- __pthread_testcancel ();
+ int cancelhandling = atomic_load_relaxed (&self->cancelhandling);
+ if (buffer->__canceltype != PTHREAD_CANCEL_DEFERRED
+ && (cancelhandling & CANCELTYPE_BITMASK) == 0)
+ {
+ int newval;
+ do
+ {
+ newval = cancelhandling | CANCELTYPE_BITMASK;
+ }
+ while (!atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &cancelhandling, newval));
+
+ if (cancel_enabled_and_canceled (cancelhandling))
+ {
+ self->result = PTHREAD_CANCELED;
+ __do_cancel ();
+ }
+ }
}
libc_hidden_def (__libc_cleanup_pop_restore)
struct pthread *self = THREAD_SELF;
- int ch = atomic_load_relaxed (&self->cancelhandling);
- /* Cancelation not enabled, not cancelled, or already exitting. */
- if (self->cancelstate == PTHREAD_CANCEL_DISABLE
- || (ch & CANCELED_BITMASK) == 0
- || (ch & EXITING_BITMASK) != 0)
- return;
-
- /* Set the return value. */
- THREAD_SETMEM (self, result, PTHREAD_CANCELED);
- /* Make sure asynchronous cancellation is still enabled. */
- if (self->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS)
- __do_cancel ();
+ int oldval = atomic_load_relaxed (&self->cancelhandling);
+ while (1)
+ {
+ /* We are canceled now. When canceled by another thread this flag
+ is already set but if the signal is directly send (internally or
+ from another process) is has to be done here. */
+ int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
+
+ if (oldval == newval || (oldval & EXITING_BITMASK) != 0)
+ /* Already canceled or exiting. */
+ break;
+
+ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &oldval, newval))
+ {
+ self->result = PTHREAD_CANCELED;
+
+ /* Make sure asynchronous cancellation is still enabled. */
+ if ((oldval & CANCELTYPE_BITMASK) != 0)
+ /* Run the registered destructors and terminate the thread. */
+ __do_cancel ();
+ }
+ }
}
int
{
volatile struct pthread *pd = (volatile struct pthread *) th;
- /* Make sure the descriptor is valid. */
- if (INVALID_TD_P (pd))
- /* Not a valid thread handle. */
- return ESRCH;
+ if (pd->tid == 0)
+ /* The thread has already exited on the kernel side. Its outcome
+ (regular exit, other cancelation) has already been
+ determined. */
+ return 0;
static int init_sigcancel = 0;
if (atomic_load_relaxed (&init_sigcancel) == 0)
}
#endif
- int oldch = atomic_fetch_or_acquire (&pd->cancelhandling, CANCELED_BITMASK);
- if ((oldch & CANCELED_BITMASK) != 0)
- return 0;
-
- if (pd == THREAD_SELF)
+ /* Some syscalls are never restarted after being interrupted by a signal
+ handler, regardless of the use of SA_RESTART (they always fail with
+ EINTR). So pthread_cancel cannot send SIGCANCEL unless the cancellation
+ is enabled and set as asynchronous (in this case the cancellation will
+ be acted in the cancellation handler instead by the syscall wrapper).
+ Otherwise the target thread is set as 'cancelling' (CANCELING_BITMASK)
+ by atomically setting 'cancelhandling' and the cancelation will be acted
+ upon on next cancellation entrypoing in the target thread.
+
+ It also requires to atomically check if cancellation is enabled and
+ asynchronous, so both cancellation state and type are tracked on
+ 'cancelhandling'. */
+
+ int result = 0;
+ int oldval = atomic_load_relaxed (&pd->cancelhandling);
+ int newval;
+ do
{
- /* A single-threaded process should be able to kill itself, since there
- is nothing in the POSIX specification that says that it cannot. So
- we set multiple_threads to true so that cancellation points get
- executed. */
- THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1);
+ again:
+ newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
+ if (oldval == newval)
+ break;
+
+ /* If the cancellation is handled asynchronously just send a
+ signal. We avoid this if possible since it's more
+ expensive. */
+ if (cancel_enabled_and_canceled_and_async (newval))
+ {
+ /* Mark the cancellation as "in progress". */
+ int newval2 = oldval | CANCELING_BITMASK;
+ if (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling,
+ &oldval, newval2))
+ goto again;
+
+ if (pd == THREAD_SELF)
+ /* This is not merely an optimization: An application may
+ call pthread_cancel (pthread_self ()) without calling
+ pthread_create, so the signal handler may not have been
+ set up for a self-cancel. */
+ {
+ pd->result = PTHREAD_CANCELED;
+ if ((newval & CANCELTYPE_BITMASK) != 0)
+ __do_cancel ();
+ }
+ else
+ /* The cancellation handler will take care of marking the
+ thread as canceled. */
+ result = __pthread_kill_internal (th, SIGCANCEL);
+
+ break;
+ }
+
+ /* A single-threaded process should be able to kill itself, since
+ there is nothing in the POSIX specification that says that it
+ cannot. So we set multiple_threads to true so that cancellation
+ points get executed. */
+ THREAD_SETMEM (THREAD_SELF, header.multiple_threads, 1);
#ifndef TLS_MULTIPLE_THREADS_IN_TCB
__libc_multiple_threads = 1;
#endif
-
- THREAD_SETMEM (pd, result, PTHREAD_CANCELED);
- if (pd->cancelstate == PTHREAD_CANCEL_ENABLE
- && pd->canceltype == PTHREAD_CANCEL_ASYNCHRONOUS)
- __do_cancel ();
- return 0;
}
+ while (!atomic_compare_exchange_weak_acquire (&pd->cancelhandling, &oldval,
+ newval));
- return __pthread_kill_internal (th, SIGCANCEL);
+ return result;
}
versioned_symbol (libc, __pthread_cancel, pthread_cancel, GLIBC_2_34);
#include <sys/single_threaded.h>
#include <version.h>
#include <clone_internal.h>
+#include <futex-internal.h>
#include <shlib-compat.h>
unwind_buf.priv.data.prev = NULL;
unwind_buf.priv.data.cleanup = NULL;
- __libc_signal_restore_set (&pd->sigmask);
-
/* Allow setxid from now onwards. */
if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2))
futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
/* Store the new cleanup handler info. */
THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf);
+ __libc_signal_restore_set (&pd->sigmask);
+
LIBC_PROBE (pthread_start, 3, (pthread_t) pd, pd->start_routine, pd->arg);
/* Run the code the user provided. */
/* This was the last thread. */
exit (0);
+ /* This prevents sending a signal from this thread to itself during
+ its final stages. This must come after the exit call above
+ because atexit handlers must not run with signals blocked.
+
+ Do not block SIGSETXID. The setxid handshake below expects the
+ signal to be delivered. (SIGSETXID cannot run application code,
+ nor does it use pthread_kill.) Reuse the pd->sigmask space for
+ computing the signal mask, to save stack space. */
+ __sigfillset (&pd->sigmask);
+ __sigdelset (&pd->sigmask, SIGSETXID);
+ INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_BLOCK, &pd->sigmask, NULL,
+ __NSIG_BYTES);
+
+ /* Tell __pthread_kill_internal that this thread is about to exit.
+ If there is a __pthread_kill_internal in progress, this delays
+ the thread exit until the signal has been queued by the kernel
+ (so that the TID used to send it remains valid). */
+ __libc_lock_lock (pd->exit_lock);
+ pd->exiting = true;
+ __libc_lock_unlock (pd->exit_lock);
+
#ifndef __ASSUME_SET_ROBUST_LIST
/* If this thread has any robust mutexes locked, handle them now. */
# if __PTHREAD_MUTEX_HAVE_PREV
if ((pd == self
|| (self->joinid == pd
&& (pd->cancelhandling
- & (CANCELED_BITMASK | EXITING_BITMASK
+ & (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
| TERMINATED_BITMASK)) == 0))
- && !(self->cancelstate == PTHREAD_CANCEL_ENABLE
- && (pd->cancelhandling & (CANCELED_BITMASK | EXITING_BITMASK
- | TERMINATED_BITMASK))
- == CANCELED_BITMASK))
+ && !cancel_enabled_and_canceled (self->cancelhandling))
/* This is a deadlock situation. The threads are waiting for each
other to finish. Note that this is a "may" error. To be 100%
sure we catch this error we would have to lock the data
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <libc-lock.h>
#include <unistd.h>
#include <pthreadP.h>
#include <shlib-compat.h>
-int
-__pthread_kill_internal (pthread_t threadid, int signo)
+/* Sends SIGNO to THREADID. If the thread is about to exit or has
+ already exited on the kernel side, return NO_TID. Otherwise return
+ 0 or an error code. */
+static int
+__pthread_kill_implementation (pthread_t threadid, int signo, int no_tid)
{
- pid_t tid;
struct pthread *pd = (struct pthread *) threadid;
-
if (pd == THREAD_SELF)
- /* It is a special case to handle raise() implementation after a vfork
- call (which does not update the PD tid field). */
- tid = INLINE_SYSCALL_CALL (gettid);
- else
- /* Force load of pd->tid into local variable or register. Otherwise
- if a thread exits between ESRCH test and tgkill, we might return
- EINVAL, because pd->tid would be cleared by the kernel. */
- tid = atomic_forced_read (pd->tid);
-
- int val;
- if (__glibc_likely (tid > 0))
{
- pid_t pid = __getpid ();
-
- val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo);
- val = (INTERNAL_SYSCALL_ERROR_P (val)
- ? INTERNAL_SYSCALL_ERRNO (val) : 0);
+ /* Use the actual TID from the kernel, so that it refers to the
+ current thread even if called after vfork. There is no
+ signal blocking in this case, so that the signal is delivered
+ immediately, before __pthread_kill_internal returns: a signal
+ sent to the thread itself needs to be delivered
+ synchronously. (It is unclear if Linux guarantees the
+ delivery of all pending signals after unblocking in the code
+ below. POSIX only guarantees delivery of a single signal,
+ which may not be the right one.) */
+ pid_t tid = INTERNAL_SYSCALL_CALL (gettid);
+ int ret = INTERNAL_SYSCALL_CALL (tgkill, __getpid (), tid, signo);
+ return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0;
}
+
+ /* Block all signals, as required by pd->exit_lock. */
+ sigset_t old_mask;
+ __libc_signal_block_all (&old_mask);
+ __libc_lock_lock (pd->exit_lock);
+
+ int ret;
+ if (pd->exiting)
+ /* The thread is about to exit (or has exited). Sending the
+ signal is either not observable (the target thread has already
+ blocked signals at this point), or it will fail, or it might be
+ delivered to a new, unrelated thread that has reused the TID.
+ So do not actually send the signal. */
+ ret = no_tid;
else
- val = ESRCH;
+ {
+ ret = INTERNAL_SYSCALL_CALL (tgkill, __getpid (), pd->tid, signo);
+ ret = INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0;
+ }
+
+ __libc_lock_unlock (pd->exit_lock);
+ __libc_signal_restore_set (&old_mask);
- return val;
+ return ret;
+}
+
+int
+__pthread_kill_internal (pthread_t threadid, int signo)
+{
+ /* Do not report an error in the no-tid case because the threadid
+ argument is still valid (the thread ID lifetime has not ended),
+ and ESRCH (for example) would be misleading. */
+ return __pthread_kill_implementation (threadid, signo, 0);
}
int
return __pthread_kill_internal (threadid, signo);
}
+
/* Some architectures (for instance arm) might pull raise through libgcc, so
avoid the symbol version if it ends up being used on ld.so. */
#if !IS_IN(rtld)
versioned_symbol (libc, __pthread_kill, pthread_kill, GLIBC_2_34);
# if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_34)
-compat_symbol (libc, __pthread_kill, pthread_kill, GLIBC_2_0);
+/* Variant which returns ESRCH in the no-TID case, for backwards
+ compatibility. */
+int
+attribute_compat_text_section
+__pthread_kill_esrch (pthread_t threadid, int signo)
+{
+ if (__is_internal_signal (signo))
+ return EINVAL;
+
+ return __pthread_kill_implementation (threadid, signo, ESRCH);
+}
+compat_symbol (libc, __pthread_kill_esrch, pthread_kill, GLIBC_2_0);
# endif
#endif
self = THREAD_SELF;
- if (oldstate != NULL)
- *oldstate = self->cancelstate;
- self->cancelstate = state;
+ int oldval = atomic_load_relaxed (&self->cancelhandling);
+ while (1)
+ {
+ int newval = (state == PTHREAD_CANCEL_DISABLE
+ ? oldval | CANCELSTATE_BITMASK
+ : oldval & ~CANCELSTATE_BITMASK);
+
+ if (oldstate != NULL)
+ *oldstate = ((oldval & CANCELSTATE_BITMASK)
+ ? PTHREAD_CANCEL_DISABLE : PTHREAD_CANCEL_ENABLE);
+
+ if (oldval == newval)
+ break;
+
+ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &oldval, newval))
+ {
+ if (cancel_enabled_and_canceled_and_async (newval))
+ __do_cancel ();
+
+ break;
+ }
+ }
return 0;
}
volatile struct pthread *self = THREAD_SELF;
- if (oldtype != NULL)
- *oldtype = self->canceltype;
- self->canceltype = type;
- if (type == PTHREAD_CANCEL_ASYNCHRONOUS)
- __pthread_testcancel ();
+ int oldval = atomic_load_relaxed (&self->cancelhandling);
+ while (1)
+ {
+ int newval = (type == PTHREAD_CANCEL_ASYNCHRONOUS
+ ? oldval | CANCELTYPE_BITMASK
+ : oldval & ~CANCELTYPE_BITMASK);
+
+ if (oldtype != NULL)
+ *oldtype = ((oldval & CANCELTYPE_BITMASK)
+ ? PTHREAD_CANCEL_ASYNCHRONOUS : PTHREAD_CANCEL_DEFERRED);
+
+ if (oldval == newval)
+ break;
+
+ if (atomic_compare_exchange_weak_acquire (&self->cancelhandling,
+ &oldval, newval))
+ {
+ if (cancel_enabled_and_canceled_and_async (newval))
+ {
+ THREAD_SETMEM (self, result, PTHREAD_CANCELED);
+ __do_cancel ();
+ }
+
+ break;
+ }
+ }
return 0;
}
___pthread_testcancel (void)
{
struct pthread *self = THREAD_SELF;
- int cancelhandling = THREAD_GETMEM (self, cancelhandling);
- if (self->cancelstate == PTHREAD_CANCEL_ENABLE
- && (cancelhandling & CANCELED_BITMASK)
- && !(cancelhandling & EXITING_BITMASK)
- && !(cancelhandling & TERMINATED_BITMASK))
+ int cancelhandling = atomic_load_relaxed (&self->cancelhandling);
+ if (cancel_enabled_and_canceled (cancelhandling))
{
- THREAD_SETMEM (self, result, PTHREAD_CANCELED);
+ self->result = PTHREAD_CANCELED;
__do_cancel ();
}
}
#include <jmpbuf-unwind.h>
#include <shlib-compat.h>
-#ifdef _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN
# define FRAME_LEFT(frame, other, adj) \
((uintptr_t) frame - adj >= (uintptr_t) other - adj)
#elif _STACK_GROWS_UP
tst-nss-test1 \
tst-nss-test2 \
tst-nss-test4 \
- tst-nss-test5
+ tst-nss-test5 \
+ tst-nss-test_errno
xtests = bug-erange
tests-container = \
ifeq ($(build-static-nss),yes)
tests-static += tst-nss-static
endif
-extra-test-objs += nss_test1.os nss_test2.os
+extra-test-objs += nss_test1.os nss_test2.os nss_test_errno.os
include ../Rules
libof-nss_test1 = extramodules
libof-nss_test2 = extramodules
+libof-nss_test_errno = extramodules
$(objpfx)/libnss_test1.so: $(objpfx)nss_test1.os $(link-libc-deps)
$(build-module)
$(objpfx)/libnss_test2.so: $(objpfx)nss_test2.os $(link-libc-deps)
$(build-module)
+$(objpfx)/libnss_test_errno.so: $(objpfx)nss_test_errno.os $(link-libc-deps)
+ $(build-module)
$(objpfx)nss_test2.os : nss_test1.c
-ifdef libnss_test1.so-version
-$(objpfx)/libnss_test1.so$(libnss_test1.so-version): $(objpfx)/libnss_test1.so
+# Use the nss_files suffix for these objects as well.
+$(objpfx)/libnss_test1.so$(libnss_files.so-version): $(objpfx)/libnss_test1.so
$(make-link)
-endif
-ifdef libnss_test2.so-version
-$(objpfx)/libnss_test2.so$(libnss_test2.so-version): $(objpfx)/libnss_test2.so
+$(objpfx)/libnss_test2.so$(libnss_files.so-version): $(objpfx)/libnss_test2.so
+ $(make-link)
+$(objpfx)/libnss_test_errno.so$(libnss_files.so-version): \
+ $(objpfx)/libnss_test_errno.so
$(make-link)
-endif
$(patsubst %,$(objpfx)%.out,$(tests) $(tests-container)) : \
- $(objpfx)/libnss_test1.so$(libnss_test1.so-version) \
- $(objpfx)/libnss_test2.so$(libnss_test2.so-version)
+ $(objpfx)/libnss_test1.so$(libnss_files.so-version) \
+ $(objpfx)/libnss_test2.so$(libnss_files.so-version) \
+ $(objpfx)/libnss_test_errno.so$(libnss_files.so-version)
ifeq (yes,$(have-thread-library))
$(objpfx)tst-cancel-getpwuid_r: $(shared-thread-library)
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <assert.h>
#include "nsswitch.h"
/*******************************************************************\
|* ALTERNATE_NAME - name of another service which is examined in *|
|* case DATABASE_NAME is not found *|
|* *|
-|* DEFAULT_CONFIG - string for default conf (e.g. "dns files") *|
+|* DEFAULT_CONFIG - string for default conf (e.g. "files dns") *|
|* *|
\*******************************************************************/
*ni = DATABASE_NAME_SYMBOL;
+ /* We want to know about it if we've somehow got a NULL action list;
+ in the past, we had bad state if seccomp interfered with setup. */
+ assert(*ni != NULL);
+
return __nss_lookup (ni, fct_name, fct2_name, fctp);
}
libc_hidden_def (DB_LOOKUP_FCT)
{
nss_database_default_defconfig = 0, /* "nis [NOTFOUND=return] files". */
nss_database_default_compat, /* "compat [NOTFOUND=return] files". */
- nss_database_default_dns, /* "dns [!UNAVAIL=return] files". */
+ nss_database_default_dns, /* "files dns". */
nss_database_default_files, /* "files". */
nss_database_default_nis, /* "nis". */
nss_database_default_nis_nisplus, /* "nis nisplus". */
#endif
case nss_database_default_dns:
- line = "dns [!UNAVAIL=return] files";
+ line = "files dns";
break;
case nss_database_default_files:
return true;
}
- /* Before we reload, verify that "/" hasn't changed. We assume that
- errors here are very unlikely, but the chance that we're entering
- a container is also very unlikely, so we err on the side of both
- very unlikely things not happening at the same time. */
- if (__stat64_time64 ("/", &str) != 0
- || (local->root_ino != 0
- && (str.st_ino != local->root_ino
- || str.st_dev != local->root_dev)))
+ int stat_rv = __stat64_time64 ("/", &str);
+
+ if (local->data.services[database_index] != NULL)
{
- /* Change detected; disable reloading and return current state. */
- atomic_store_release (&local->data.reload_disabled, 1);
- *result = local->data.services[database_index];
- __libc_lock_unlock (local->lock);
- return true;
+ /* Before we reload, verify that "/" hasn't changed. We assume that
+ errors here are very unlikely, but the chance that we're entering
+ a container is also very unlikely, so we err on the side of both
+ very unlikely things not happening at the same time. */
+ if (stat_rv != 0
+ || (local->root_ino != 0
+ && (str.st_ino != local->root_ino
+ || str.st_dev != local->root_dev)))
+ {
+ /* Change detected; disable reloading and return current state. */
+ atomic_store_release (&local->data.reload_disabled, 1);
+ *result = local->data.services[database_index];
+ __libc_lock_unlock (local->lock);
+ return true;
+ }
+ }
+ if (stat_rv == 0)
+ {
+ local->root_ino = str.st_ino;
+ local->root_dev = str.st_dev;
}
- local->root_ino = str.st_ino;
- local->root_dev = str.st_dev;
+
__libc_lock_unlock (local->lock);
/* Avoid overwriting the global configuration until we have loaded
void *
__nss_module_get_function (struct nss_module *module, const char *name)
{
+ /* A successful dlopen might clobber errno. */
+ int saved_errno = errno;
+
if (!__nss_module_load (module))
- return NULL;
+ {
+ /* Reporting module load failure is currently inaccurate. See
+ bug 22041. Not changing errno is the conservative choice. */
+ __set_errno (saved_errno);
+ return NULL;
+ }
+
+ __set_errno (saved_errno);
function_name *name_entry = bsearch (name, nss_function_name_array,
array_length (nss_function_name_array),
--- /dev/null
+/* NSS service provider with errno clobber.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <nss.h>
+#include <stdlib.h>
+
+/* Catch misnamed and functions. */
+#pragma GCC diagnostic error "-Wmissing-prototypes"
+NSS_DECLARE_MODULE_FUNCTIONS (test_errno)
+
+static void __attribute__ ((constructor))
+init (void)
+{
+ /* An arbitrary error code which is otherwise not used. */
+ errno = -1009;
+}
+
+/* Lookup functions for pwd follow that do not return any data. */
+
+/* Catch misnamed function definitions. */
+
+enum nss_status
+_nss_test_errno_setpwent (int stayopen)
+{
+ setenv ("_nss_test_errno_setpwent", "yes", 1);
+ return NSS_STATUS_SUCCESS;
+}
+
+enum nss_status
+_nss_test_errno_getpwent_r (struct passwd *result,
+ char *buffer, size_t size, int *errnop)
+{
+ setenv ("_nss_test_errno_getpwent_r", "yes", 1);
+ return NSS_STATUS_NOTFOUND;
+}
+
+enum nss_status
+_nss_test_errno_endpwent (void)
+{
+ setenv ("_nss_test_errno_endpwent", "yes", 1);
+ return NSS_STATUS_SUCCESS;
+}
--- /dev/null
+hosts: files
--- /dev/null
+/* getpwent failure when dlopen clobbers errno (bug 28953).
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <nss.h>
+#include <support/check.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <pwd.h>
+#include <string.h>
+
+static int
+do_test (void)
+{
+ __nss_configure_lookup ("passwd", "files test_errno");
+
+ errno = 0;
+ setpwent ();
+ TEST_COMPARE (errno, 0);
+
+ bool root_seen = false;
+ while (true)
+ {
+ errno = 0;
+ struct passwd *e = getpwent ();
+ if (e == NULL)
+ break;
+ if (strcmp (e->pw_name, "root"))
+ root_seen = true;
+ }
+
+ TEST_COMPARE (errno, 0);
+ TEST_VERIFY (root_seen);
+
+ errno = 0;
+ endpwent ();
+ TEST_COMPARE (errno, 0);
+
+ TEST_COMPARE_STRING (getenv ("_nss_test_errno_setpwent"), "yes");
+ TEST_COMPARE_STRING (getenv ("_nss_test_errno_getpwent_r"), "yes");
+ TEST_COMPARE_STRING (getenv ("_nss_test_errno_endpwent"), "yes");
+
+ return 0;
+}
+
+#include <support/test-driver.c>
tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \
bug-regex38 tst-regcomp-truncated tst-spawn-chdir \
- tst-wordexp-nocmd tst-execveat tst-spawn5
+ tst-wordexp-nocmd tst-execveat tst-spawn5 \
+ tst-sched_getaffinity
# Test for the glob symbol version that was replaced in glibc 2.27.
ifeq ($(have-GLIBC_2.26)$(build-shared),yesyes)
__fortify_function __wur ssize_t
read (int __fd, void *__buf, size_t __nbytes)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__nbytes))
- return __read_chk (__fd, __buf, __nbytes, __glibc_objsize0 (__buf));
-
- if (__nbytes > __glibc_objsize0 (__buf))
- return __read_chk_warn (__fd, __buf, __nbytes,
- __glibc_objsize0 (__buf));
- }
- return __read_alias (__fd, __buf, __nbytes);
+ return __glibc_fortify (read, __nbytes, sizeof (char),
+ __glibc_objsize0 (__buf),
+ __fd, __buf, __nbytes);
}
-#ifdef __USE_UNIX98
+#if defined __USE_UNIX98 || defined __USE_XOPEN2K8
extern ssize_t __pread_chk (int __fd, void *__buf, size_t __nbytes,
__off_t __offset, size_t __bufsize)
__wur __attr_access ((__write_only__, 2, 3));
__fortify_function __wur ssize_t
pread (int __fd, void *__buf, size_t __nbytes, __off_t __offset)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__nbytes))
- return __pread_chk (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
-
- if ( __nbytes > __glibc_objsize0 (__buf))
- return __pread_chk_warn (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
- }
- return __pread_alias (__fd, __buf, __nbytes, __offset);
+ return __glibc_fortify (pread, __nbytes, sizeof (char),
+ __glibc_objsize0 (__buf),
+ __fd, __buf, __nbytes, __offset);
}
# else
__fortify_function __wur ssize_t
pread (int __fd, void *__buf, size_t __nbytes, __off64_t __offset)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__nbytes))
- return __pread64_chk (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
-
- if ( __nbytes > __glibc_objsize0 (__buf))
- return __pread64_chk_warn (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
- }
-
- return __pread64_alias (__fd, __buf, __nbytes, __offset);
+ return __glibc_fortify (pread64, __nbytes, sizeof (char),
+ __glibc_objsize0 (__buf),
+ __fd, __buf, __nbytes, __offset);
}
# endif
__fortify_function __wur ssize_t
pread64 (int __fd, void *__buf, size_t __nbytes, __off64_t __offset)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__nbytes))
- return __pread64_chk (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
-
- if ( __nbytes > __glibc_objsize0 (__buf))
- return __pread64_chk_warn (__fd, __buf, __nbytes, __offset,
- __glibc_objsize0 (__buf));
- }
-
- return __pread64_alias (__fd, __buf, __nbytes, __offset);
+ return __glibc_fortify (pread64, __nbytes, sizeof (char),
+ __glibc_objsize0 (__buf),
+ __fd, __buf, __nbytes, __offset);
}
# endif
#endif
__NTH (readlink (const char *__restrict __path, char *__restrict __buf,
size_t __len))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __readlink_chk (__path, __buf, __len, __glibc_objsize (__buf));
-
- if ( __len > __glibc_objsize (__buf))
- return __readlink_chk_warn (__path, __buf, __len,
- __glibc_objsize (__buf));
- }
- return __readlink_alias (__path, __buf, __len);
+ return __glibc_fortify (readlink, __len, sizeof (char),
+ __glibc_objsize (__buf),
+ __path, __buf, __len);
}
#endif
__NTH (readlinkat (int __fd, const char *__restrict __path,
char *__restrict __buf, size_t __len))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __readlinkat_chk (__fd, __path, __buf, __len,
- __glibc_objsize (__buf));
-
- if (__len > __glibc_objsize (__buf))
- return __readlinkat_chk_warn (__fd, __path, __buf, __len,
- __glibc_objsize (__buf));
- }
- return __readlinkat_alias (__fd, __path, __buf, __len);
+ return __glibc_fortify (readlinkat, __len, sizeof (char),
+ __glibc_objsize (__buf),
+ __fd, __path, __buf, __len);
}
#endif
extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen)
- __THROW __wur __attr_access ((__write_only__, 1, 2));
+ __THROW __wur;
extern char *__REDIRECT_NTH (__getcwd_alias,
- (char *__buf, size_t __size), getcwd)
- __wur __attr_access ((__write_only__, 1, 2));
+ (char *__buf, size_t __size), getcwd) __wur;
extern char *__REDIRECT_NTH (__getcwd_chk_warn,
(char *__buf, size_t __size, size_t __buflen),
__getcwd_chk)
__fortify_function __wur char *
__NTH (getcwd (char *__buf, size_t __size))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__size))
- return __getcwd_chk (__buf, __size, __glibc_objsize (__buf));
-
- if (__size > __glibc_objsize (__buf))
- return __getcwd_chk_warn (__buf, __size, __glibc_objsize (__buf));
- }
- return __getcwd_alias (__buf, __size);
+ return __glibc_fortify (getcwd, __size, sizeof (char),
+ __glibc_objsize (__buf),
+ __buf, __size);
}
#if defined __USE_MISC || defined __USE_XOPEN_EXTENDED
__fortify_function size_t
__NTH (confstr (int __name, char *__buf, size_t __len))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __confstr_chk (__name, __buf, __len, __glibc_objsize (__buf));
-
- if (__glibc_objsize (__buf) < __len)
- return __confstr_chk_warn (__name, __buf, __len,
- __glibc_objsize (__buf));
- }
- return __confstr_alias (__name, __buf, __len);
+ return __glibc_fortify (confstr, __len, sizeof (char),
+ __glibc_objsize (__buf),
+ __name, __buf, __len);
}
__fortify_function int
__NTH (getgroups (int __size, __gid_t __list[]))
{
- if (__glibc_objsize (__list) != (size_t) -1)
- {
- if (!__builtin_constant_p (__size) || __size < 0)
- return __getgroups_chk (__size, __list, __glibc_objsize (__list));
-
- if (__size * sizeof (__gid_t) > __glibc_objsize (__list))
- return __getgroups_chk_warn (__size, __list, __glibc_objsize (__list));
- }
- return __getgroups_alias (__size, __list);
+ return __glibc_fortify (getgroups, __size, sizeof (__gid_t),
+ __glibc_objsize (__list),
+ __size, __list);
}
__fortify_function int
__NTH (ttyname_r (int __fd, char *__buf, size_t __buflen))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__buflen))
- return __ttyname_r_chk (__fd, __buf, __buflen,
- __glibc_objsize (__buf));
-
- if (__buflen > __glibc_objsize (__buf))
- return __ttyname_r_chk_warn (__fd, __buf, __buflen,
- __glibc_objsize (__buf));
- }
- return __ttyname_r_alias (__fd, __buf, __buflen);
+ return __glibc_fortify (ttyname_r, __buflen, sizeof (char),
+ __glibc_objsize (__buf),
+ __fd, __buf, __buflen);
}
__fortify_function int
getlogin_r (char *__buf, size_t __buflen)
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__buflen))
- return __getlogin_r_chk (__buf, __buflen, __glibc_objsize (__buf));
-
- if (__buflen > __glibc_objsize (__buf))
- return __getlogin_r_chk_warn (__buf, __buflen,
- __glibc_objsize (__buf));
- }
- return __getlogin_r_alias (__buf, __buflen);
+ return __glibc_fortify (getlogin_r, __buflen, sizeof (char),
+ __glibc_objsize (__buf),
+ __buf, __buflen);
}
#endif
__fortify_function int
__NTH (gethostname (char *__buf, size_t __buflen))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__buflen))
- return __gethostname_chk (__buf, __buflen, __glibc_objsize (__buf));
-
- if (__buflen > __glibc_objsize (__buf))
- return __gethostname_chk_warn (__buf, __buflen,
- __glibc_objsize (__buf));
- }
- return __gethostname_alias (__buf, __buflen);
+ return __glibc_fortify (gethostname, __buflen, sizeof (char),
+ __glibc_objsize (__buf),
+ __buf, __buflen);
}
#endif
__fortify_function int
__NTH (getdomainname (char *__buf, size_t __buflen))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__buflen))
- return __getdomainname_chk (__buf, __buflen, __glibc_objsize (__buf));
-
- if (__buflen > __glibc_objsize (__buf))
- return __getdomainname_chk_warn (__buf, __buflen,
- __glibc_objsize (__buf));
- }
- return __getdomainname_alias (__buf, __buflen);
+ return __glibc_fortify (getdomainname, __buflen, sizeof (char),
+ __glibc_objsize (__buf),
+ __buf, __buflen);
}
#endif
best effort to make is async-signal-safe at least for single-thread
case. */
bool multiple_threads = __libc_single_threaded == 0;
+ uint64_t lastrun;
- __run_fork_handlers (atfork_run_prepare, multiple_threads);
+ lastrun = __run_prefork_handlers (multiple_threads);
struct nss_database_data nss_database_data;
/* Reset the lock the dynamic loader uses to protect its data. */
__rtld_lock_initialize (GL(dl_load_lock));
+ /* Reset the lock protecting dynamic TLS related data. */
+ __rtld_lock_initialize (GL(dl_load_tls_lock));
+
reclaim_stacks ();
/* Run the handlers registered for the child. */
- __run_fork_handlers (atfork_run_child, multiple_threads);
+ __run_postfork_handlers (atfork_run_child, multiple_threads, lastrun);
}
else
{
}
/* Run the handlers registered for the parent. */
- __run_fork_handlers (atfork_run_parent, multiple_threads);
+ __run_postfork_handlers (atfork_run_parent, multiple_threads, lastrun);
if (pid < 0)
__set_errno (save_errno);
optimizes away the pattern == NULL test below. */
# define _GL_ARG_NONNULL(params)
-# include <config.h>
+# include <libc-config.h>
#endif
#include <glob.h>
#include <errno.h>
+#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <stdbool.h>
# define sysconf(id) __sysconf (id)
# define closedir(dir) __closedir (dir)
# define opendir(name) __opendir (name)
+# undef dirfd
+# define dirfd(str) __dirfd (str)
# define readdir(str) __readdir64 (str)
# define getpwnam_r(name, bufp, buf, len, res) \
__getpwnam_r (name, bufp, buf, len, res)
# ifndef GLOB_LSTAT
# define GLOB_LSTAT gl_lstat
# endif
-# ifndef GLOB_STAT64
-# define GLOB_STAT64 __stat64
-# endif
-# ifndef GLOB_LSTAT64
-# define GLOB_LSTAT64 __lstat64
+# ifndef GLOB_FSTATAT64
+# define GLOB_FSTATAT64 __fstatat64
# endif
# include <shlib-compat.h>
#else /* !_LIBC */
# define struct_stat struct stat
# define struct_stat64 struct stat
# define GLOB_LSTAT gl_lstat
-# define GLOB_STAT64 stat
-# define GLOB_LSTAT64 lstat
+# define GLOB_FSTATAT64 fstatat
#endif /* _LIBC */
#include <fnmatch.h>
} ust;
return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC)
? pglob->GLOB_LSTAT (fullname, &ust.st)
- : GLOB_LSTAT64 (fullname, &ust.st64));
+ : GLOB_FSTATAT64 (AT_FDCWD, fullname, &ust.st64,
+ AT_SYMLINK_NOFOLLOW));
}
/* Set *R = A + B. Return true if the answer is mathematically
struct_stat64 st64;
return (__glibc_unlikely (flags & GLOB_ALTDIRFUNC)
? pglob->gl_stat (filename, &st) == 0 && S_ISDIR (st.st_mode)
- : GLOB_STAT64 (filename, &st64) == 0 && S_ISDIR (st64.st_mode));
+ : (GLOB_FSTATAT64 (AT_FDCWD, filename, &st64, 0) == 0
+ && S_ISDIR (st64.st_mode)));
}
/* Find the end of the sub-pattern in a brace expression. */
else
{
#ifndef WINDOWS32
+ /* Recognize ~user as a shorthand for the specified user's home
+ directory. */
char *end_name = strchr (dirname, '/');
char *user_name;
int malloc_user_name = 0;
}
scratch_buffer_free (&pwtmpbuf);
}
-#endif /* !WINDOWS32 */
+#else /* WINDOWS32 */
+ /* On native Windows, access to a user's home directory
+ (via GetUserProfileDirectory) or to a user's environment
+ variables (via ExpandEnvironmentStringsForUser) requires
+ the credentials of the user. Therefore we cannot support
+ the ~user syntax on this platform.
+ Handling ~user specially (and treat it like plain ~) if
+ user is getenv ("USERNAME") would not be a good idea,
+ since it would make people think that ~user is supported
+ in general. */
+ if (flags & GLOB_TILDE_CHECK)
+ {
+ retval = GLOB_NOMATCH;
+ goto out;
+ }
+#endif /* WINDOWS32 */
}
}
{
size_t dirlen = strlen (directory);
void *stream = NULL;
+ struct scratch_buffer s;
+ scratch_buffer_init (&s);
# define GLOBNAMES_MEMBERS(nnames) \
struct globnames *next; size_t count; char *name[nnames];
struct globnames { GLOBNAMES_MEMBERS (FLEXIBLE_ARRAY_MEMBER) };
}
else
{
+ int dfd = dirfd (stream);
int fnm_flags = ((!(flags & GLOB_PERIOD) ? FNM_PERIOD : 0)
| ((flags & GLOB_NOESCAPE) ? FNM_NOESCAPE : 0));
flags |= GLOB_MAGCHAR;
if (flags & GLOB_ONLYDIR)
switch (readdir_result_type (d))
{
- case DT_DIR: case DT_LNK: case DT_UNKNOWN: break;
default: continue;
+ case DT_DIR: break;
+ case DT_LNK: case DT_UNKNOWN:
+ /* The filesystem was too lazy to give us a hint,
+ so we have to do it the hard way. */
+ if (__glibc_unlikely (dfd < 0 || flags & GLOB_ALTDIRFUNC))
+ {
+ size_t namelen = strlen (d.name);
+ size_t need = dirlen + 1 + namelen + 1;
+ if (s.length < need
+ && !scratch_buffer_set_array_size (&s, need, 1))
+ goto memory_error;
+ char *p = mempcpy (s.data, directory, dirlen);
+ *p = '/';
+ p += p[-1] != '/';
+ memcpy (p, d.name, namelen + 1);
+ if (! is_dir (s.data, flags, pglob))
+ continue;
+ }
+ else
+ {
+ struct_stat64 st64;
+ if (! (GLOB_FSTATAT64 (dfd, d.name, &st64, 0) == 0
+ && S_ISDIR (st64.st_mode)))
+ continue;
+ }
}
if (fnmatch (pattern, d.name, fnm_flags) == 0)
__set_errno (save);
}
+ scratch_buffer_free (&s);
return result;
}
#include <libc-lock.h>
#include <stdbool.h>
#include <register-atfork.h>
+#include <intprops.h>
+#include <stdio.h>
#define DYNARRAY_ELEMENT struct fork_handler
#define DYNARRAY_STRUCT fork_handler_list
#include <malloc/dynarray-skeleton.c>
static struct fork_handler_list fork_handlers;
-static bool fork_handler_init = false;
+static uint64_t fork_handler_counter;
static int atfork_lock = LLL_LOCK_INITIALIZER;
{
lll_lock (atfork_lock, LLL_PRIVATE);
- if (!fork_handler_init)
- {
- fork_handler_list_init (&fork_handlers);
- fork_handler_init = true;
- }
+ if (fork_handler_counter == 0)
+ fork_handler_list_init (&fork_handlers);
struct fork_handler *newp = fork_handler_list_emplace (&fork_handlers);
if (newp != NULL)
newp->parent_handler = parent;
newp->child_handler = child;
newp->dso_handle = dso_handle;
+
+ /* IDs assigned to handlers start at 1 and increment with handler
+ registration. Un-registering a handlers discards the corresponding
+ ID. It is not reused in future registrations. */
+ if (INT_ADD_OVERFLOW (fork_handler_counter, 1))
+ __libc_fatal ("fork handler counter overflow");
+ newp->id = ++fork_handler_counter;
}
/* Release the lock. */
lll_unlock (atfork_lock, LLL_PRIVATE);
}
-void
-__run_fork_handlers (enum __run_fork_handler_type who, _Bool do_locking)
+uint64_t
+__run_prefork_handlers (_Bool do_locking)
{
- struct fork_handler *runp;
+ uint64_t lastrun;
- if (who == atfork_run_prepare)
+ if (do_locking)
+ lll_lock (atfork_lock, LLL_PRIVATE);
+
+ /* We run prepare handlers from last to first. After fork, only
+ handlers up to the last handler found here (pre-fork) will be run.
+ Handlers registered during __run_prefork_handlers or
+ __run_postfork_handlers will be positioned after this last handler, and
+ since their prepare handlers won't be run now, their parent/child
+ handlers should also be ignored. */
+ lastrun = fork_handler_counter;
+
+ size_t sl = fork_handler_list_size (&fork_handlers);
+ for (size_t i = sl; i > 0;)
{
- if (do_locking)
- lll_lock (atfork_lock, LLL_PRIVATE);
- size_t sl = fork_handler_list_size (&fork_handlers);
- for (size_t i = sl; i > 0; i--)
- {
- runp = fork_handler_list_at (&fork_handlers, i - 1);
- if (runp->prepare_handler != NULL)
- runp->prepare_handler ();
- }
+ struct fork_handler *runp
+ = fork_handler_list_at (&fork_handlers, i - 1);
+
+ uint64_t id = runp->id;
+
+ if (runp->prepare_handler != NULL)
+ {
+ if (do_locking)
+ lll_unlock (atfork_lock, LLL_PRIVATE);
+
+ runp->prepare_handler ();
+
+ if (do_locking)
+ lll_lock (atfork_lock, LLL_PRIVATE);
+ }
+
+ /* We unlocked, ran the handler, and locked again. In the
+ meanwhile, one or more deregistrations could have occurred leading
+ to the current (just run) handler being moved up the list or even
+ removed from the list itself. Since handler IDs are guaranteed to
+ to be in increasing order, the next handler has to have: */
+
+ /* A. An earlier position than the current one has. */
+ i--;
+
+ /* B. A lower ID than the current one does. The code below skips
+ any newly added handlers with higher IDs. */
+ while (i > 0
+ && fork_handler_list_at (&fork_handlers, i - 1)->id >= id)
+ i--;
}
- else
+
+ return lastrun;
+}
+
+void
+__run_postfork_handlers (enum __run_fork_handler_type who, _Bool do_locking,
+ uint64_t lastrun)
+{
+ size_t sl = fork_handler_list_size (&fork_handlers);
+ for (size_t i = 0; i < sl;)
{
- size_t sl = fork_handler_list_size (&fork_handlers);
- for (size_t i = 0; i < sl; i++)
- {
- runp = fork_handler_list_at (&fork_handlers, i);
- if (who == atfork_run_child && runp->child_handler)
- runp->child_handler ();
- else if (who == atfork_run_parent && runp->parent_handler)
- runp->parent_handler ();
- }
+ struct fork_handler *runp = fork_handler_list_at (&fork_handlers, i);
+ uint64_t id = runp->id;
+
+ /* prepare handlers were not run for handlers with ID > LASTRUN.
+ Thus, parent/child handlers will also not be run. */
+ if (id > lastrun)
+ break;
+
if (do_locking)
- lll_unlock (atfork_lock, LLL_PRIVATE);
+ lll_unlock (atfork_lock, LLL_PRIVATE);
+
+ if (who == atfork_run_child && runp->child_handler)
+ runp->child_handler ();
+ else if (who == atfork_run_parent && runp->parent_handler)
+ runp->parent_handler ();
+
+ if (do_locking)
+ lll_lock (atfork_lock, LLL_PRIVATE);
+
+ /* We unlocked, ran the handler, and locked again. In the meanwhile,
+ one or more [de]registrations could have occurred. Due to this,
+ the list size must be updated. */
+ sl = fork_handler_list_size (&fork_handlers);
+
+ /* The just-run handler could also have moved up the list. */
+
+ if (sl > i && fork_handler_list_at (&fork_handlers, i)->id == id)
+ /* The position of the recently run handler hasn't changed. The
+ next handler to be run is an easy increment away. */
+ i++;
+ else
+ {
+ /* The next handler to be run is the first handler in the list
+ to have an ID higher than the current one. */
+ for (i = 0; i < sl; i++)
+ {
+ if (fork_handler_list_at (&fork_handlers, i)->id > id)
+ break;
+ }
+ }
}
+
+ if (do_locking)
+ lll_unlock (atfork_lock, LLL_PRIVATE);
}
--- /dev/null
+/* Tests for sched_getaffinity with large buffers.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <sched.h>
+#include <support/check.h>
+
+/* NB: this test may fail on system with more than 32k cpus. */
+
+static int
+do_test (void)
+{
+ /* The values are larger than the default cpu_set_t. */
+ const int bufsize[] = { 1<<11, 1<<12, 1<<13, 1<<14, 1<<15, 1<<16, 1<<17 };
+ int cpucount[array_length (bufsize)];
+
+ for (int i = 0; i < array_length (bufsize); i++)
+ {
+ cpu_set_t *cpuset = CPU_ALLOC (bufsize[i]);
+ TEST_VERIFY (cpuset != NULL);
+ size_t size = CPU_ALLOC_SIZE (bufsize[i]);
+ TEST_COMPARE (sched_getaffinity (0, size, cpuset), 0);
+ cpucount[i] = CPU_COUNT_S (size, cpuset);
+ CPU_FREE (cpuset);
+ }
+
+ for (int i = 0; i < array_length (cpucount) - 1; i++)
+ TEST_COMPARE (cpucount[i], cpucount[i + 1]);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
#define NFDS 100
-static int
-open_multiple_temp_files (void)
-{
- /* Check if the temporary file descriptor has no no gaps. */
- int lowfd = xopen ("/dev/null", O_RDONLY, 0600);
- for (int i = 1; i <= NFDS; i++)
- TEST_COMPARE (xopen ("/dev/null", O_RDONLY, 0600),
- lowfd + i);
- return lowfd;
-}
-
static int
parse_fd (const char *str)
{
static void
do_test_closefrom (void)
{
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
const int half_fd = lowfd + NFDS / 2;
/* Close half of the descriptors and check result. */
This function is a cancellation point and therefore not marked with
__THROW. */
extern ssize_t read (int __fd, void *__buf, size_t __nbytes) __wur
- __attr_access ((__write_only__, 2, 3));
+ __fortified_attr_access (__write_only__, 2, 3);
/* Write N bytes of BUF to FD. Return the number written, or -1.
__THROW. */
extern ssize_t pread (int __fd, void *__buf, size_t __nbytes,
__off_t __offset) __wur
- __attr_access ((__write_only__, 2, 3));
+ __fortified_attr_access (__write_only__, 2, 3);
/* Write N bytes of BUF to FD at the given position OFFSET without
changing the file pointer. Return the number written, or -1.
extern ssize_t __REDIRECT (pread, (int __fd, void *__buf, size_t __nbytes,
__off64_t __offset),
pread64) __wur
- __attr_access ((__write_only__, 2, 3));
+ __fortified_attr_access (__write_only__, 2, 3);
extern ssize_t __REDIRECT (pwrite, (int __fd, const void *__buf,
size_t __nbytes, __off64_t __offset),
pwrite64) __wur
or 0 for EOF. */
extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes,
__off64_t __offset) __wur
- __attr_access ((__write_only__, 2, 3));
+ __fortified_attr_access (__write_only__, 2, 3);
/* Write N bytes of BUF to FD at the given position OFFSET without
changing the file pointer. Return the number written, or -1. */
extern ssize_t pwrite64 (int __fd, const void *__buf, size_t __n,
an array is allocated with `malloc'; the array is SIZE
bytes long, unless SIZE == 0, in which case it is as
big as necessary. */
-extern char *getcwd (char *__buf, size_t __size) __THROW __wur
- __attr_access ((__write_only__, 1, 2));
+extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
#ifdef __USE_GNU
/* Return a malloc'd string containing the current directory name.
#ifdef __USE_POSIX2
/* Get the value of the string-valued system variable NAME. */
extern size_t confstr (int __name, char *__buf, size_t __len) __THROW
- __attr_access ((__write_only__, 2, 3));
+ __fortified_attr_access (__write_only__, 2, 3);
#endif
the calling process is in. Otherwise, fill in the group IDs
of its supplementary groups in LIST and return the number written. */
extern int getgroups (int __size, __gid_t __list[]) __THROW __wur
- __attr_access ((__write_only__, 2, 1));
+ __fortified_attr_access (__write_only__, 2, 1);
#ifdef __USE_GNU
/* Return nonzero iff the calling process is in group GID. */
extern int group_member (__gid_t __gid) __THROW;
/* Store at most BUFLEN characters of the pathname of the terminal FD is
open on in BUF. Return 0 on success, otherwise an error number. */
extern int ttyname_r (int __fd, char *__buf, size_t __buflen)
- __THROW __nonnull ((2)) __wur __attr_access ((__write_only__, 2, 3));
+ __THROW __nonnull ((2)) __wur
+ __fortified_attr_access (__write_only__, 2, 3);
/* Return 1 if FD is a valid descriptor associated
with a terminal, zero if not. */
Returns the number of characters read, or -1 for errors. */
extern ssize_t readlink (const char *__restrict __path,
char *__restrict __buf, size_t __len)
- __THROW __nonnull ((1, 2)) __wur __attr_access ((__write_only__, 2, 3));
+ __THROW __nonnull ((1, 2)) __wur
+ __fortified_attr_access (__write_only__, 2, 3);
#endif /* Use POSIX.1-2001. */
/* Like readlink but a relative PATH is interpreted relative to FD. */
extern ssize_t readlinkat (int __fd, const char *__restrict __path,
char *__restrict __buf, size_t __len)
- __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4));
+ __THROW __nonnull ((2, 3)) __wur
+ __fortified_attr_access (__write_only__, 3, 4);
#endif
/* Remove the link NAME. */
This function is a possible cancellation point and therefore not
marked with __THROW. */
extern int getlogin_r (char *__name, size_t __name_len) __nonnull ((1))
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
#endif
#ifdef __USE_MISC
The result is null-terminated if LEN is large enough for the full
name and the terminator. */
extern int gethostname (char *__name, size_t __len) __THROW __nonnull ((1))
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
#endif
Called just like `gethostname' and `sethostname'.
The NIS domain name is usually the empty string when not using NIS. */
extern int getdomainname (char *__name, size_t __len)
- __THROW __nonnull ((1)) __wur __attr_access ((__write_only__, 1, 2));
+ __THROW __nonnull ((1)) __wur
+ __fortified_attr_access (__write_only__, 1, 2);
extern int setdomainname (const char *__name, size_t __len)
__THROW __nonnull ((1)) __wur __attr_access ((__read_only__, 1, 2));
tst-aio7 tst-aio8 tst-aio9 tst-aio10 \
tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \
tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \
+ tst-bz28213 \
tst-timer3 tst-timer4 tst-timer5 \
tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \
tst-shm-cancel \
--- /dev/null
+/* Bug 28213: test for NULL pointer dereference in mq_notify.
+ Copyright (C) The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <mqueue.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <support/check.h>
+
+static mqd_t m = -1;
+static const char msg[] = "hello";
+
+static void
+check_bz28213_cb (union sigval sv)
+{
+ char buf[sizeof (msg)];
+
+ (void) sv;
+
+ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL)
+ == sizeof (buf));
+ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0);
+
+ exit (0);
+}
+
+static void
+check_bz28213 (void)
+{
+ struct sigevent sev;
+
+ memset (&sev, '\0', sizeof (sev));
+ sev.sigev_notify = SIGEV_THREAD;
+ sev.sigev_notify_function = check_bz28213_cb;
+
+ /* Step 1: Register & unregister notifier.
+ Helper thread should receive NOTIFY_REMOVED notification.
+ In a vulnerable version of glibc, NULL pointer dereference follows. */
+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
+ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0);
+
+ /* Step 2: Once again, register notification.
+ Try to send one message.
+ Test is considered successful, if the callback does exit (0). */
+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
+ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0);
+
+ /* Wait... */
+ pause ();
+}
+
+static int
+do_test (void)
+{
+ static const char m_name[] = "/bz28213_queue";
+ struct mq_attr m_attr;
+
+ memset (&m_attr, '\0', sizeof (m_attr));
+ m_attr.mq_maxmsg = 1;
+ m_attr.mq_msgsize = sizeof (msg);
+
+ m = mq_open (m_name,
+ O_RDWR | O_CREAT | O_EXCL,
+ 0600,
+ &m_attr);
+
+ if (m < 0)
+ {
+ if (errno == ENOSYS)
+ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n");
+ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n");
+ }
+
+ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0);
+
+ check_bz28213 ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>
'gcc': 'vcs-11',
'glibc': 'vcs-mainline',
'gmp': '6.2.1',
- 'linux': '5.13',
+ 'linux': '5.14',
'mpc': '1.2.1',
'mpfr': '4.1.0',
'mig': 'vcs-mainline',
--- /dev/null
+#!/usr/bin/python3
+# ELF support functionality for Python.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+"""Basic ELF parser.
+
+Use Image.readfile(path) to read an ELF file into memory and begin
+parsing it.
+
+"""
+
+import collections
+import enum
+import struct
+
+if not hasattr(enum, 'IntFlag'):
+ import sys
+ sys.stdout.write(
+ 'warning: glibcelf.py needs Python 3.6 for enum support\n')
+ sys.exit(77)
+
+class _OpenIntEnum(enum.IntEnum):
+ """Integer enumeration that supports arbitrary int values."""
+ @classmethod
+ def _missing_(cls, value):
+ # See enum.IntFlag._create_pseudo_member_. This allows
+ # creating of enum constants with arbitrary integer values.
+ pseudo_member = int.__new__(cls, value)
+ pseudo_member._name_ = None
+ pseudo_member._value_ = value
+ return pseudo_member
+
+ def __repr__(self):
+ name = self._name_
+ if name is not None:
+ # The names have prefixes like SHT_, implying their type.
+ return name
+ return '{}({})'.format(self.__class__.__name__, self._value_)
+
+ def __str__(self):
+ name = self._name_
+ if name is not None:
+ return name
+ return str(self._value_)
+
+class ElfClass(_OpenIntEnum):
+ """ELF word size. Type of EI_CLASS values."""
+ ELFCLASSNONE = 0
+ ELFCLASS32 = 1
+ ELFCLASS64 = 2
+
+class ElfData(_OpenIntEnum):
+ """ELF endianess. Type of EI_DATA values."""
+ ELFDATANONE = 0
+ ELFDATA2LSB = 1
+ ELFDATA2MSB = 2
+
+class Machine(_OpenIntEnum):
+ """ELF machine type. Type of values in Ehdr.e_machine field."""
+ EM_NONE = 0
+ EM_M32 = 1
+ EM_SPARC = 2
+ EM_386 = 3
+ EM_68K = 4
+ EM_88K = 5
+ EM_IAMCU = 6
+ EM_860 = 7
+ EM_MIPS = 8
+ EM_S370 = 9
+ EM_MIPS_RS3_LE = 10
+ EM_PARISC = 15
+ EM_VPP500 = 17
+ EM_SPARC32PLUS = 18
+ EM_960 = 19
+ EM_PPC = 20
+ EM_PPC64 = 21
+ EM_S390 = 22
+ EM_SPU = 23
+ EM_V800 = 36
+ EM_FR20 = 37
+ EM_RH32 = 38
+ EM_RCE = 39
+ EM_ARM = 40
+ EM_FAKE_ALPHA = 41
+ EM_SH = 42
+ EM_SPARCV9 = 43
+ EM_TRICORE = 44
+ EM_ARC = 45
+ EM_H8_300 = 46
+ EM_H8_300H = 47
+ EM_H8S = 48
+ EM_H8_500 = 49
+ EM_IA_64 = 50
+ EM_MIPS_X = 51
+ EM_COLDFIRE = 52
+ EM_68HC12 = 53
+ EM_MMA = 54
+ EM_PCP = 55
+ EM_NCPU = 56
+ EM_NDR1 = 57
+ EM_STARCORE = 58
+ EM_ME16 = 59
+ EM_ST100 = 60
+ EM_TINYJ = 61
+ EM_X86_64 = 62
+ EM_PDSP = 63
+ EM_PDP10 = 64
+ EM_PDP11 = 65
+ EM_FX66 = 66
+ EM_ST9PLUS = 67
+ EM_ST7 = 68
+ EM_68HC16 = 69
+ EM_68HC11 = 70
+ EM_68HC08 = 71
+ EM_68HC05 = 72
+ EM_SVX = 73
+ EM_ST19 = 74
+ EM_VAX = 75
+ EM_CRIS = 76
+ EM_JAVELIN = 77
+ EM_FIREPATH = 78
+ EM_ZSP = 79
+ EM_MMIX = 80
+ EM_HUANY = 81
+ EM_PRISM = 82
+ EM_AVR = 83
+ EM_FR30 = 84
+ EM_D10V = 85
+ EM_D30V = 86
+ EM_V850 = 87
+ EM_M32R = 88
+ EM_MN10300 = 89
+ EM_MN10200 = 90
+ EM_PJ = 91
+ EM_OPENRISC = 92
+ EM_ARC_COMPACT = 93
+ EM_XTENSA = 94
+ EM_VIDEOCORE = 95
+ EM_TMM_GPP = 96
+ EM_NS32K = 97
+ EM_TPC = 98
+ EM_SNP1K = 99
+ EM_ST200 = 100
+ EM_IP2K = 101
+ EM_MAX = 102
+ EM_CR = 103
+ EM_F2MC16 = 104
+ EM_MSP430 = 105
+ EM_BLACKFIN = 106
+ EM_SE_C33 = 107
+ EM_SEP = 108
+ EM_ARCA = 109
+ EM_UNICORE = 110
+ EM_EXCESS = 111
+ EM_DXP = 112
+ EM_ALTERA_NIOS2 = 113
+ EM_CRX = 114
+ EM_XGATE = 115
+ EM_C166 = 116
+ EM_M16C = 117
+ EM_DSPIC30F = 118
+ EM_CE = 119
+ EM_M32C = 120
+ EM_TSK3000 = 131
+ EM_RS08 = 132
+ EM_SHARC = 133
+ EM_ECOG2 = 134
+ EM_SCORE7 = 135
+ EM_DSP24 = 136
+ EM_VIDEOCORE3 = 137
+ EM_LATTICEMICO32 = 138
+ EM_SE_C17 = 139
+ EM_TI_C6000 = 140
+ EM_TI_C2000 = 141
+ EM_TI_C5500 = 142
+ EM_TI_ARP32 = 143
+ EM_TI_PRU = 144
+ EM_MMDSP_PLUS = 160
+ EM_CYPRESS_M8C = 161
+ EM_R32C = 162
+ EM_TRIMEDIA = 163
+ EM_QDSP6 = 164
+ EM_8051 = 165
+ EM_STXP7X = 166
+ EM_NDS32 = 167
+ EM_ECOG1X = 168
+ EM_MAXQ30 = 169
+ EM_XIMO16 = 170
+ EM_MANIK = 171
+ EM_CRAYNV2 = 172
+ EM_RX = 173
+ EM_METAG = 174
+ EM_MCST_ELBRUS = 175
+ EM_ECOG16 = 176
+ EM_CR16 = 177
+ EM_ETPU = 178
+ EM_SLE9X = 179
+ EM_L10M = 180
+ EM_K10M = 181
+ EM_AARCH64 = 183
+ EM_AVR32 = 185
+ EM_STM8 = 186
+ EM_TILE64 = 187
+ EM_TILEPRO = 188
+ EM_MICROBLAZE = 189
+ EM_CUDA = 190
+ EM_TILEGX = 191
+ EM_CLOUDSHIELD = 192
+ EM_COREA_1ST = 193
+ EM_COREA_2ND = 194
+ EM_ARCV2 = 195
+ EM_OPEN8 = 196
+ EM_RL78 = 197
+ EM_VIDEOCORE5 = 198
+ EM_78KOR = 199
+ EM_56800EX = 200
+ EM_BA1 = 201
+ EM_BA2 = 202
+ EM_XCORE = 203
+ EM_MCHP_PIC = 204
+ EM_INTELGT = 205
+ EM_KM32 = 210
+ EM_KMX32 = 211
+ EM_EMX16 = 212
+ EM_EMX8 = 213
+ EM_KVARC = 214
+ EM_CDP = 215
+ EM_COGE = 216
+ EM_COOL = 217
+ EM_NORC = 218
+ EM_CSR_KALIMBA = 219
+ EM_Z80 = 220
+ EM_VISIUM = 221
+ EM_FT32 = 222
+ EM_MOXIE = 223
+ EM_AMDGPU = 224
+ EM_RISCV = 243
+ EM_BPF = 247
+ EM_CSKY = 252
+ EM_NUM = 253
+ EM_ALPHA = 0x9026
+
+class Et(_OpenIntEnum):
+ """ELF file type. Type of ET_* values and the Ehdr.e_type field."""
+ ET_NONE = 0
+ ET_REL = 1
+ ET_EXEC = 2
+ ET_DYN = 3
+ ET_CORE = 4
+
+class Shn(_OpenIntEnum):
+ """ELF reserved section indices."""
+ SHN_UNDEF = 0
+ SHN_BEFORE = 0xff00
+ SHN_AFTER = 0xff01
+ SHN_ABS = 0xfff1
+ SHN_COMMON = 0xfff2
+ SHN_XINDEX = 0xffff
+
+class ShnMIPS(enum.Enum):
+ """Supplemental SHN_* constants for EM_MIPS."""
+ SHN_MIPS_ACOMMON = 0xff00
+ SHN_MIPS_TEXT = 0xff01
+ SHN_MIPS_DATA = 0xff02
+ SHN_MIPS_SCOMMON = 0xff03
+ SHN_MIPS_SUNDEFINED = 0xff04
+
+class ShnPARISC(enum.Enum):
+ """Supplemental SHN_* constants for EM_PARISC."""
+ SHN_PARISC_ANSI_COMMON = 0xff00
+ SHN_PARISC_HUGE_COMMON = 0xff01
+
+class Sht(_OpenIntEnum):
+ """ELF section types. Type of SHT_* values."""
+ SHT_NULL = 0
+ SHT_PROGBITS = 1
+ SHT_SYMTAB = 2
+ SHT_STRTAB = 3
+ SHT_RELA = 4
+ SHT_HASH = 5
+ SHT_DYNAMIC = 6
+ SHT_NOTE = 7
+ SHT_NOBITS = 8
+ SHT_REL = 9
+ SHT_SHLIB = 10
+ SHT_DYNSYM = 11
+ SHT_INIT_ARRAY = 14
+ SHT_FINI_ARRAY = 15
+ SHT_PREINIT_ARRAY = 16
+ SHT_GROUP = 17
+ SHT_SYMTAB_SHNDX = 18
+ SHT_GNU_ATTRIBUTES = 0x6ffffff5
+ SHT_GNU_HASH = 0x6ffffff6
+ SHT_GNU_LIBLIST = 0x6ffffff7
+ SHT_CHECKSUM = 0x6ffffff8
+ SHT_SUNW_move = 0x6ffffffa
+ SHT_SUNW_COMDAT = 0x6ffffffb
+ SHT_SUNW_syminfo = 0x6ffffffc
+ SHT_GNU_verdef = 0x6ffffffd
+ SHT_GNU_verneed = 0x6ffffffe
+ SHT_GNU_versym = 0x6fffffff
+
+class ShtALPHA(enum.Enum):
+ """Supplemental SHT_* constants for EM_ALPHA."""
+ SHT_ALPHA_DEBUG = 0x70000001
+ SHT_ALPHA_REGINFO = 0x70000002
+
+class ShtARM(enum.Enum):
+ """Supplemental SHT_* constants for EM_ARM."""
+ SHT_ARM_EXIDX = 0x70000001
+ SHT_ARM_PREEMPTMAP = 0x70000002
+ SHT_ARM_ATTRIBUTES = 0x70000003
+
+class ShtCSKY(enum.Enum):
+ """Supplemental SHT_* constants for EM_CSKY."""
+ SHT_CSKY_ATTRIBUTES = 0x70000001
+
+class ShtIA_64(enum.Enum):
+ """Supplemental SHT_* constants for EM_IA_64."""
+ SHT_IA_64_EXT = 0x70000000
+ SHT_IA_64_UNWIND = 0x70000001
+
+class ShtMIPS(enum.Enum):
+ """Supplemental SHT_* constants for EM_MIPS."""
+ SHT_MIPS_LIBLIST = 0x70000000
+ SHT_MIPS_MSYM = 0x70000001
+ SHT_MIPS_CONFLICT = 0x70000002
+ SHT_MIPS_GPTAB = 0x70000003
+ SHT_MIPS_UCODE = 0x70000004
+ SHT_MIPS_DEBUG = 0x70000005
+ SHT_MIPS_REGINFO = 0x70000006
+ SHT_MIPS_PACKAGE = 0x70000007
+ SHT_MIPS_PACKSYM = 0x70000008
+ SHT_MIPS_RELD = 0x70000009
+ SHT_MIPS_IFACE = 0x7000000b
+ SHT_MIPS_CONTENT = 0x7000000c
+ SHT_MIPS_OPTIONS = 0x7000000d
+ SHT_MIPS_SHDR = 0x70000010
+ SHT_MIPS_FDESC = 0x70000011
+ SHT_MIPS_EXTSYM = 0x70000012
+ SHT_MIPS_DENSE = 0x70000013
+ SHT_MIPS_PDESC = 0x70000014
+ SHT_MIPS_LOCSYM = 0x70000015
+ SHT_MIPS_AUXSYM = 0x70000016
+ SHT_MIPS_OPTSYM = 0x70000017
+ SHT_MIPS_LOCSTR = 0x70000018
+ SHT_MIPS_LINE = 0x70000019
+ SHT_MIPS_RFDESC = 0x7000001a
+ SHT_MIPS_DELTASYM = 0x7000001b
+ SHT_MIPS_DELTAINST = 0x7000001c
+ SHT_MIPS_DELTACLASS = 0x7000001d
+ SHT_MIPS_DWARF = 0x7000001e
+ SHT_MIPS_DELTADECL = 0x7000001f
+ SHT_MIPS_SYMBOL_LIB = 0x70000020
+ SHT_MIPS_EVENTS = 0x70000021
+ SHT_MIPS_TRANSLATE = 0x70000022
+ SHT_MIPS_PIXIE = 0x70000023
+ SHT_MIPS_XLATE = 0x70000024
+ SHT_MIPS_XLATE_DEBUG = 0x70000025
+ SHT_MIPS_WHIRL = 0x70000026
+ SHT_MIPS_EH_REGION = 0x70000027
+ SHT_MIPS_XLATE_OLD = 0x70000028
+ SHT_MIPS_PDR_EXCEPTION = 0x70000029
+ SHT_MIPS_XHASH = 0x7000002b
+
+class ShtPARISC(enum.Enum):
+ """Supplemental SHT_* constants for EM_PARISC."""
+ SHT_PARISC_EXT = 0x70000000
+ SHT_PARISC_UNWIND = 0x70000001
+ SHT_PARISC_DOC = 0x70000002
+
+class Pf(enum.IntFlag):
+ """Program header flags. Type of Phdr.p_flags values."""
+ PF_X = 1
+ PF_W = 2
+ PF_R = 4
+
+class PfARM(enum.IntFlag):
+ """Supplemental PF_* flags for EM_ARM."""
+ PF_ARM_SB = 0x10000000
+ PF_ARM_PI = 0x20000000
+ PF_ARM_ABS = 0x40000000
+
+class PfPARISC(enum.IntFlag):
+ """Supplemental PF_* flags for EM_PARISC."""
+ PF_HP_PAGE_SIZE = 0x00100000
+ PF_HP_FAR_SHARED = 0x00200000
+ PF_HP_NEAR_SHARED = 0x00400000
+ PF_HP_CODE = 0x01000000
+ PF_HP_MODIFY = 0x02000000
+ PF_HP_LAZYSWAP = 0x04000000
+ PF_HP_SBP = 0x08000000
+
+class PfIA_64(enum.IntFlag):
+ """Supplemental PF_* flags for EM_IA_64."""
+ PF_IA_64_NORECOV = 0x80000000
+
+class PfMIPS(enum.IntFlag):
+ """Supplemental PF_* flags for EM_MIPS."""
+ PF_MIPS_LOCAL = 0x10000000
+
+class Shf(enum.IntFlag):
+ """Section flags. Type of Shdr.sh_type values."""
+ SHF_WRITE = 1 << 0
+ SHF_ALLOC = 1 << 1
+ SHF_EXECINSTR = 1 << 2
+ SHF_MERGE = 1 << 4
+ SHF_STRINGS = 1 << 5
+ SHF_INFO_LINK = 1 << 6
+ SHF_LINK_ORDER = 1 << 7
+ SHF_OS_NONCONFORMING = 256
+ SHF_GROUP = 1 << 9
+ SHF_TLS = 1 << 10
+ SHF_COMPRESSED = 1 << 11
+ SHF_GNU_RETAIN = 1 << 21
+ SHF_ORDERED = 1 << 30
+ SHF_EXCLUDE = 1 << 31
+
+class ShfALPHA(enum.IntFlag):
+ """Supplemental SHF_* constants for EM_ALPHA."""
+ SHF_ALPHA_GPREL = 0x10000000
+
+class ShfARM(enum.IntFlag):
+ """Supplemental SHF_* constants for EM_ARM."""
+ SHF_ARM_ENTRYSECT = 0x10000000
+ SHF_ARM_COMDEF = 0x80000000
+
+class ShfIA_64(enum.IntFlag):
+ """Supplemental SHF_* constants for EM_IA_64."""
+ SHF_IA_64_SHORT = 0x10000000
+ SHF_IA_64_NORECOV = 0x20000000
+
+class ShfMIPS(enum.IntFlag):
+ """Supplemental SHF_* constants for EM_MIPS."""
+ SHF_MIPS_GPREL = 0x10000000
+ SHF_MIPS_MERGE = 0x20000000
+ SHF_MIPS_ADDR = 0x40000000
+ SHF_MIPS_STRINGS = 0x80000000
+ SHF_MIPS_NOSTRIP = 0x08000000
+ SHF_MIPS_LOCAL = 0x04000000
+ SHF_MIPS_NAMES = 0x02000000
+ SHF_MIPS_NODUPE = 0x01000000
+
+class ShfPARISC(enum.IntFlag):
+ """Supplemental SHF_* constants for EM_PARISC."""
+ SHF_PARISC_SHORT = 0x20000000
+ SHF_PARISC_HUGE = 0x40000000
+ SHF_PARISC_SBP = 0x80000000
+
+class Stb(_OpenIntEnum):
+ """ELF symbol binding type."""
+ STB_LOCAL = 0
+ STB_GLOBAL = 1
+ STB_WEAK = 2
+ STB_GNU_UNIQUE = 10
+ STB_MIPS_SPLIT_COMMON = 13
+
+class Stt(_OpenIntEnum):
+ """ELF symbol type."""
+ STT_NOTYPE = 0
+ STT_OBJECT = 1
+ STT_FUNC = 2
+ STT_SECTION = 3
+ STT_FILE = 4
+ STT_COMMON = 5
+ STT_TLS = 6
+ STT_GNU_IFUNC = 10
+
+class SttARM(enum.Enum):
+ """Supplemental STT_* constants for EM_ARM."""
+ STT_ARM_TFUNC = 13
+ STT_ARM_16BIT = 15
+
+class SttPARISC(enum.Enum):
+ """Supplemental STT_* constants for EM_PARISC."""
+ STT_HP_OPAQUE = 11
+ STT_HP_STUB = 12
+ STT_PARISC_MILLICODE = 13
+
+class SttSPARC(enum.Enum):
+ """Supplemental STT_* constants for EM_SPARC."""
+ STT_SPARC_REGISTER = 13
+
+class SttX86_64(enum.Enum):
+ """Supplemental STT_* constants for EM_X86_64."""
+ SHT_X86_64_UNWIND = 0x70000001
+
+class Pt(_OpenIntEnum):
+ """ELF program header types. Type of Phdr.p_type."""
+ PT_NULL = 0
+ PT_LOAD = 1
+ PT_DYNAMIC = 2
+ PT_INTERP = 3
+ PT_NOTE = 4
+ PT_SHLIB = 5
+ PT_PHDR = 6
+ PT_TLS = 7
+ PT_NUM = 8
+ PT_GNU_EH_FRAME = 0x6474e550
+ PT_GNU_STACK = 0x6474e551
+ PT_GNU_RELRO = 0x6474e552
+ PT_GNU_PROPERTY = 0x6474e553
+ PT_SUNWBSS = 0x6ffffffa
+ PT_SUNWSTACK = 0x6ffffffb
+
+class PtARM(enum.Enum):
+ """Supplemental PT_* constants for EM_ARM."""
+ PT_ARM_EXIDX = 0x70000001
+
+class PtIA_64(enum.Enum):
+ """Supplemental PT_* constants for EM_IA_64."""
+ PT_IA_64_HP_OPT_ANOT = 0x60000012
+ PT_IA_64_HP_HSL_ANOT = 0x60000013
+ PT_IA_64_HP_STACK = 0x60000014
+ PT_IA_64_ARCHEXT = 0x70000000
+ PT_IA_64_UNWIND = 0x70000001
+
+class PtMIPS(enum.Enum):
+ """Supplemental PT_* constants for EM_MIPS."""
+ PT_MIPS_REGINFO = 0x70000000
+ PT_MIPS_RTPROC = 0x70000001
+ PT_MIPS_OPTIONS = 0x70000002
+ PT_MIPS_ABIFLAGS = 0x70000003
+
+class PtPARISC(enum.Enum):
+ """Supplemental PT_* constants for EM_PARISC."""
+ PT_HP_TLS = 0x60000000
+ PT_HP_CORE_NONE = 0x60000001
+ PT_HP_CORE_VERSION = 0x60000002
+ PT_HP_CORE_KERNEL = 0x60000003
+ PT_HP_CORE_COMM = 0x60000004
+ PT_HP_CORE_PROC = 0x60000005
+ PT_HP_CORE_LOADABLE = 0x60000006
+ PT_HP_CORE_STACK = 0x60000007
+ PT_HP_CORE_SHM = 0x60000008
+ PT_HP_CORE_MMF = 0x60000009
+ PT_HP_PARALLEL = 0x60000010
+ PT_HP_FASTBIND = 0x60000011
+ PT_HP_OPT_ANNOT = 0x60000012
+ PT_HP_HSL_ANNOT = 0x60000013
+ PT_HP_STACK = 0x60000014
+ PT_PARISC_ARCHEXT = 0x70000000
+ PT_PARISC_UNWIND = 0x70000001
+
+class Dt(_OpenIntEnum):
+ """ELF dynamic segment tags. Type of Dyn.d_val."""
+ DT_NULL = 0
+ DT_NEEDED = 1
+ DT_PLTRELSZ = 2
+ DT_PLTGOT = 3
+ DT_HASH = 4
+ DT_STRTAB = 5
+ DT_SYMTAB = 6
+ DT_RELA = 7
+ DT_RELASZ = 8
+ DT_RELAENT = 9
+ DT_STRSZ = 10
+ DT_SYMENT = 11
+ DT_INIT = 12
+ DT_FINI = 13
+ DT_SONAME = 14
+ DT_RPATH = 15
+ DT_SYMBOLIC = 16
+ DT_REL = 17
+ DT_RELSZ = 18
+ DT_RELENT = 19
+ DT_PLTREL = 20
+ DT_DEBUG = 21
+ DT_TEXTREL = 22
+ DT_JMPREL = 23
+ DT_BIND_NOW = 24
+ DT_INIT_ARRAY = 25
+ DT_FINI_ARRAY = 26
+ DT_INIT_ARRAYSZ = 27
+ DT_FINI_ARRAYSZ = 28
+ DT_RUNPATH = 29
+ DT_FLAGS = 30
+ DT_PREINIT_ARRAY = 32
+ DT_PREINIT_ARRAYSZ = 33
+ DT_SYMTAB_SHNDX = 34
+ DT_GNU_PRELINKED = 0x6ffffdf5
+ DT_GNU_CONFLICTSZ = 0x6ffffdf6
+ DT_GNU_LIBLISTSZ = 0x6ffffdf7
+ DT_CHECKSUM = 0x6ffffdf8
+ DT_PLTPADSZ = 0x6ffffdf9
+ DT_MOVEENT = 0x6ffffdfa
+ DT_MOVESZ = 0x6ffffdfb
+ DT_FEATURE_1 = 0x6ffffdfc
+ DT_POSFLAG_1 = 0x6ffffdfd
+ DT_SYMINSZ = 0x6ffffdfe
+ DT_SYMINENT = 0x6ffffdff
+ DT_GNU_HASH = 0x6ffffef5
+ DT_TLSDESC_PLT = 0x6ffffef6
+ DT_TLSDESC_GOT = 0x6ffffef7
+ DT_GNU_CONFLICT = 0x6ffffef8
+ DT_GNU_LIBLIST = 0x6ffffef9
+ DT_CONFIG = 0x6ffffefa
+ DT_DEPAUDIT = 0x6ffffefb
+ DT_AUDIT = 0x6ffffefc
+ DT_PLTPAD = 0x6ffffefd
+ DT_MOVETAB = 0x6ffffefe
+ DT_SYMINFO = 0x6ffffeff
+ DT_VERSYM = 0x6ffffff0
+ DT_RELACOUNT = 0x6ffffff9
+ DT_RELCOUNT = 0x6ffffffa
+ DT_FLAGS_1 = 0x6ffffffb
+ DT_VERDEF = 0x6ffffffc
+ DT_VERDEFNUM = 0x6ffffffd
+ DT_VERNEED = 0x6ffffffe
+ DT_VERNEEDNUM = 0x6fffffff
+ DT_AUXILIARY = 0x7ffffffd
+ DT_FILTER = 0x7fffffff
+
+class DtAARCH64(enum.Enum):
+ """Supplemental DT_* constants for EM_AARCH64."""
+ DT_AARCH64_BTI_PLT = 0x70000001
+ DT_AARCH64_PAC_PLT = 0x70000003
+ DT_AARCH64_VARIANT_PCS = 0x70000005
+
+class DtALPHA(enum.Enum):
+ """Supplemental DT_* constants for EM_ALPHA."""
+ DT_ALPHA_PLTRO = 0x70000000
+
+class DtALTERA_NIOS2(enum.Enum):
+ """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
+ DT_NIOS2_GP = 0x70000002
+
+class DtIA_64(enum.Enum):
+ """Supplemental DT_* constants for EM_IA_64."""
+ DT_IA_64_PLT_RESERVE = 0x70000000
+
+class DtMIPS(enum.Enum):
+ """Supplemental DT_* constants for EM_MIPS."""
+ DT_MIPS_RLD_VERSION = 0x70000001
+ DT_MIPS_TIME_STAMP = 0x70000002
+ DT_MIPS_ICHECKSUM = 0x70000003
+ DT_MIPS_IVERSION = 0x70000004
+ DT_MIPS_FLAGS = 0x70000005
+ DT_MIPS_BASE_ADDRESS = 0x70000006
+ DT_MIPS_MSYM = 0x70000007
+ DT_MIPS_CONFLICT = 0x70000008
+ DT_MIPS_LIBLIST = 0x70000009
+ DT_MIPS_LOCAL_GOTNO = 0x7000000a
+ DT_MIPS_CONFLICTNO = 0x7000000b
+ DT_MIPS_LIBLISTNO = 0x70000010
+ DT_MIPS_SYMTABNO = 0x70000011
+ DT_MIPS_UNREFEXTNO = 0x70000012
+ DT_MIPS_GOTSYM = 0x70000013
+ DT_MIPS_HIPAGENO = 0x70000014
+ DT_MIPS_RLD_MAP = 0x70000016
+ DT_MIPS_DELTA_CLASS = 0x70000017
+ DT_MIPS_DELTA_CLASS_NO = 0x70000018
+ DT_MIPS_DELTA_INSTANCE = 0x70000019
+ DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a
+ DT_MIPS_DELTA_RELOC = 0x7000001b
+ DT_MIPS_DELTA_RELOC_NO = 0x7000001c
+ DT_MIPS_DELTA_SYM = 0x7000001d
+ DT_MIPS_DELTA_SYM_NO = 0x7000001e
+ DT_MIPS_DELTA_CLASSSYM = 0x70000020
+ DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021
+ DT_MIPS_CXX_FLAGS = 0x70000022
+ DT_MIPS_PIXIE_INIT = 0x70000023
+ DT_MIPS_SYMBOL_LIB = 0x70000024
+ DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025
+ DT_MIPS_LOCAL_GOTIDX = 0x70000026
+ DT_MIPS_HIDDEN_GOTIDX = 0x70000027
+ DT_MIPS_PROTECTED_GOTIDX = 0x70000028
+ DT_MIPS_OPTIONS = 0x70000029
+ DT_MIPS_INTERFACE = 0x7000002a
+ DT_MIPS_DYNSTR_ALIGN = 0x7000002b
+ DT_MIPS_INTERFACE_SIZE = 0x7000002c
+ DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d
+ DT_MIPS_PERF_SUFFIX = 0x7000002e
+ DT_MIPS_COMPACT_SIZE = 0x7000002f
+ DT_MIPS_GP_VALUE = 0x70000030
+ DT_MIPS_AUX_DYNAMIC = 0x70000031
+ DT_MIPS_PLTGOT = 0x70000032
+ DT_MIPS_RWPLT = 0x70000034
+ DT_MIPS_RLD_MAP_REL = 0x70000035
+ DT_MIPS_XHASH = 0x70000036
+
+class DtPPC(enum.Enum):
+ """Supplemental DT_* constants for EM_PPC."""
+ DT_PPC_GOT = 0x70000000
+ DT_PPC_OPT = 0x70000001
+
+class DtPPC64(enum.Enum):
+ """Supplemental DT_* constants for EM_PPC64."""
+ DT_PPC64_GLINK = 0x70000000
+ DT_PPC64_OPD = 0x70000001
+ DT_PPC64_OPDSZ = 0x70000002
+ DT_PPC64_OPT = 0x70000003
+
+class DtSPARC(enum.Enum):
+ """Supplemental DT_* constants for EM_SPARC."""
+ DT_SPARC_REGISTER = 0x70000001
+
+class StInfo:
+ """ELF symbol binding and type. Type of the Sym.st_info field."""
+ def __init__(self, arg0, arg1=None):
+ if isinstance(arg0, int) and arg1 is None:
+ self.bind = Stb(arg0 >> 4)
+ self.type = Stt(arg0 & 15)
+ else:
+ self.bind = Stb(arg0)
+ self.type = Stt(arg1)
+
+ def value(self):
+ """Returns the raw value for the bind/type combination."""
+ return (self.bind.value() << 4) | (self.type.value())
+
+# Type in an ELF file. Used for deserialization.
+_Layout = collections.namedtuple('_Layout', 'unpack size')
+
+def _define_layouts(baseclass: type, layout32: str, layout64: str,
+ types=None, fields32=None):
+ """Assign variants dict to baseclass.
+
+ The variants dict is indexed by (ElfClass, ElfData) pairs, and its
+ values are _Layout instances.
+
+ """
+ struct32 = struct.Struct(layout32)
+ struct64 = struct.Struct(layout64)
+
+ # Check that the struct formats yield the right number of components.
+ for s in (struct32, struct64):
+ example = s.unpack(b' ' * s.size)
+ if len(example) != len(baseclass._fields):
+ raise ValueError('{!r} yields wrong field count: {} != {}'.format(
+ s.format, len(example), len(baseclass._fields)))
+
+ # Check that field names in types are correct.
+ if types is None:
+ types = ()
+ for n in types:
+ if n not in baseclass._fields:
+ raise ValueError('{} does not have field {!r}'.format(
+ baseclass.__name__, n))
+
+ if fields32 is not None \
+ and set(fields32) != set(baseclass._fields):
+ raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
+ fields32, baseclass._fields))
+
+ def unique_name(name, used_names = (set((baseclass.__name__,))
+ | set(baseclass._fields)
+ | {n.__name__
+ for n in (types or {}).values()})):
+ """Find a name that is not used for a class or field name."""
+ candidate = name
+ n = 0
+ while candidate in used_names:
+ n += 1
+ candidate = '{}{}'.format(name, n)
+ used_names.add(candidate)
+ return candidate
+
+ blob_name = unique_name('blob')
+ struct_unpack_name = unique_name('struct_unpack')
+ comps_name = unique_name('comps')
+
+ layouts = {}
+ for (bits, elfclass, layout, fields) in (
+ (32, ElfClass.ELFCLASS32, layout32, fields32),
+ (64, ElfClass.ELFCLASS64, layout64, None),
+ ):
+ for (elfdata, structprefix, funcsuffix) in (
+ (ElfData.ELFDATA2LSB, '<', 'LE'),
+ (ElfData.ELFDATA2MSB, '>', 'BE'),
+ ):
+ env = {
+ baseclass.__name__: baseclass,
+ struct_unpack_name: struct.unpack,
+ }
+
+ # Add the type converters.
+ if types:
+ for cls in types.values():
+ env[cls.__name__] = cls
+
+ funcname = ''.join(
+ ('unpack_', baseclass.__name__, str(bits), funcsuffix))
+
+ code = '''
+def {funcname}({blob_name}):
+'''.format(funcname=funcname, blob_name=blob_name)
+
+ indent = ' ' * 4
+ unpack_call = '{}({!r}, {})'.format(
+ struct_unpack_name, structprefix + layout, blob_name)
+ field_names = ', '.join(baseclass._fields)
+ if types is None and fields is None:
+ code += '{}return {}({})\n'.format(
+ indent, baseclass.__name__, unpack_call)
+ else:
+ # Destructuring tuple assignment.
+ if fields is None:
+ code += '{}{} = {}\n'.format(
+ indent, field_names, unpack_call)
+ else:
+ # Use custom field order.
+ code += '{}{} = {}\n'.format(
+ indent, ', '.join(fields), unpack_call)
+
+ # Perform the type conversions.
+ for n in baseclass._fields:
+ if n in types:
+ code += '{}{} = {}({})\n'.format(
+ indent, n, types[n].__name__, n)
+ # Create the named tuple.
+ code += '{}return {}({})\n'.format(
+ indent, baseclass.__name__, field_names)
+
+ exec(code, env)
+ layouts[(elfclass, elfdata)] = _Layout(
+ env[funcname], struct.calcsize(layout))
+ baseclass.layouts = layouts
+
+
+# Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
+class Ident(collections.namedtuple('Ident',
+ 'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
+
+ def __new__(cls, *args):
+ """Construct an object from a blob or its constituent fields."""
+ if len(args) == 1:
+ return cls.unpack(args[0])
+ return cls.__base__.__new__(cls, *args)
+
+ @staticmethod
+ def unpack(blob: memoryview) -> 'Ident':
+ """Parse raws data into a tuple."""
+ ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
+ ei_pad = struct.unpack('4s5B7s', blob)
+ return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
+ ei_version, ei_osabi, ei_abiversion, ei_pad)
+ size = 16
+
+# Corresponds to Elf32_Ehdr and Elf64_Ehdr.
+Ehdr = collections.namedtuple('Ehdr',
+ 'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
+ + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
+_define_layouts(Ehdr,
+ layout32='16s2H5I6H',
+ layout64='16s2HI3QI6H',
+ types=dict(e_ident=Ident,
+ e_machine=Machine,
+ e_type=Et,
+ e_shstrndx=Shn))
+
+# Corresponds to Elf32_Phdr and Elf64_Pdhr. Order follows the latter.
+Phdr = collections.namedtuple('Phdr',
+ 'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
+_define_layouts(Phdr,
+ layout32='8I',
+ fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
+ 'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
+ layout64='2I6Q',
+ types=dict(p_type=Pt, p_flags=Pf))
+
+
+# Corresponds to Elf32_Shdr and Elf64_Shdr.
+class Shdr(collections.namedtuple('Shdr',
+ 'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
+ + ' sh_addralign sh_entsize')):
+ def resolve(self, strtab: 'StringTable') -> 'Shdr':
+ """Resolve sh_name using a string table."""
+ return self.__class__(strtab.get(self[0]), *self[1:])
+_define_layouts(Shdr,
+ layout32='10I',
+ layout64='2I4Q2I2Q',
+ types=dict(sh_type=Sht,
+ sh_flags=Shf,
+ sh_link=Shn))
+
+# Corresponds to Elf32_Dyn and Elf64_Dyn. The nesting through the
+# d_un union is skipped, and d_ptr is missing (its representation in
+# Python would be identical to d_val).
+Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
+_define_layouts(Dyn,
+ layout32='2i',
+ layout64='2q',
+ types=dict(d_tag=Dt))
+
+# Corresponds to Elf32_Sym and Elf64_Sym.
+class Sym(collections.namedtuple('Sym',
+ 'st_name st_info st_other st_shndx st_value st_size')):
+ def resolve(self, strtab: 'StringTable') -> 'Sym':
+ """Resolve st_name using a string table."""
+ return self.__class__(strtab.get(self[0]), *self[1:])
+_define_layouts(Sym,
+ layout32='3I2BH',
+ layout64='I2BH2Q',
+ fields32=('st_name', 'st_value', 'st_size', 'st_info',
+ 'st_other', 'st_shndx'),
+ types=dict(st_shndx=Shn,
+ st_info=StInfo))
+
+# Corresponds to Elf32_Rel and Elf64_Rel.
+Rel = collections.namedtuple('Rel', 'r_offset r_info')
+_define_layouts(Rel,
+ layout32='2I',
+ layout64='2Q')
+
+# Corresponds to Elf32_Rel and Elf64_Rel.
+Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
+_define_layouts(Rela,
+ layout32='3I',
+ layout64='3Q')
+
+class StringTable:
+ """ELF string table."""
+ def __init__(self, blob):
+ """Create a new string table backed by the data in the blob.
+
+ blob: a memoryview-like object
+
+ """
+ self.blob = blob
+
+ def get(self, index) -> bytes:
+ """Returns the null-terminated byte string at the index."""
+ blob = self.blob
+ endindex = index
+ while True:
+ if blob[endindex] == 0:
+ return bytes(blob[index:endindex])
+ endindex += 1
+
+class Image:
+ """ELF image parser."""
+ def __init__(self, image):
+ """Create an ELF image from binary image data.
+
+ image: a memoryview-like object that supports efficient range
+ subscripting.
+
+ """
+ self.image = image
+ ident = self.read(Ident, 0)
+ classdata = (ident.ei_class, ident.ei_data)
+ # Set self.Ehdr etc. to the subtypes with the right parsers.
+ for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
+ setattr(self, typ.__name__, typ.layouts.get(classdata, None))
+
+ if self.Ehdr is not None:
+ self.ehdr = self.read(self.Ehdr, 0)
+ self._shdr_num = self._compute_shdr_num()
+ else:
+ self.ehdr = None
+ self._shdr_num = 0
+
+ self._section = {}
+ self._stringtab = {}
+
+ if self._shdr_num > 0:
+ self._shdr_strtab = self._find_shdr_strtab()
+ else:
+ self._shdr_strtab = None
+
+ @staticmethod
+ def readfile(path: str) -> 'Image':
+ """Reads the ELF file at the specified path."""
+ with open(path, 'rb') as inp:
+ return Image(memoryview(inp.read()))
+
+ def _compute_shdr_num(self) -> int:
+ """Computes the actual number of section headers."""
+ shnum = self.ehdr.e_shnum
+ if shnum == 0:
+ if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
+ # No section headers.
+ return 0
+ # Otherwise the extension mechanism is used (which may be
+ # needed because e_shnum is just 16 bits).
+ return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
+ return shnum
+
+ def _find_shdr_strtab(self) -> StringTable:
+ """Finds the section header string table (maybe via extensions)."""
+ shstrndx = self.ehdr.e_shstrndx
+ if shstrndx == Shn.SHN_XINDEX:
+ shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
+ return self._find_stringtab(shstrndx)
+
+ def read(self, typ: type, offset:int ):
+ """Reads an object at a specific offset.
+
+ The type must have been enhanced using _define_variants.
+
+ """
+ return typ.unpack(self.image[offset: offset + typ.size])
+
+ def phdrs(self) -> Phdr:
+ """Generator iterating over the program headers."""
+ if self.ehdr is None:
+ return
+ size = self.ehdr.e_phentsize
+ if size != self.Phdr.size:
+ raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
+ .format(size, self.Phdr.size))
+
+ offset = self.ehdr.e_phoff
+ for _ in range(self.ehdr.e_phnum):
+ yield self.read(self.Phdr, offset)
+ offset += size
+
+ def shdrs(self, resolve: bool=True) -> Shdr:
+ """Generator iterating over the section headers.
+
+ If resolve, section names are automatically translated
+ using the section header string table.
+
+ """
+ if self._shdr_num == 0:
+ return
+
+ size = self.ehdr.e_shentsize
+ if size != self.Shdr.size:
+ raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
+ .format(size, self.Shdr.size))
+
+ offset = self.ehdr.e_shoff
+ for _ in range(self._shdr_num):
+ shdr = self.read(self.Shdr, offset)
+ if resolve:
+ shdr = shdr.resolve(self._shdr_strtab)
+ yield shdr
+ offset += size
+
+ def dynamic(self) -> Dyn:
+ """Generator iterating over the dynamic segment."""
+ for phdr in self.phdrs():
+ if phdr.p_type == Pt.PT_DYNAMIC:
+ # Pick the first dynamic segment, like the loader.
+ if phdr.p_filesz == 0:
+ # Probably separated debuginfo.
+ return
+ offset = phdr.p_offset
+ end = offset + phdr.p_memsz
+ size = self.Dyn.size
+ while True:
+ next_offset = offset + size
+ if next_offset > end:
+ raise ValueError(
+ 'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
+ phdr.p_memsz, size))
+ yield self.read(self.Dyn, offset)
+ if next_offset == end:
+ return
+ offset = next_offset
+
+ def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
+ """A generator iterating over a symbol table.
+
+ If resolve, symbol names are automatically translated using
+ the string table for the symbol table.
+
+ """
+ assert shdr.sh_type == Sht.SHT_SYMTAB
+ size = shdr.sh_entsize
+ if size != self.Sym.size:
+ raise ValueError('Invalid symbol table entry size {}'.format(size))
+ offset = shdr.sh_offset
+ end = shdr.sh_offset + shdr.sh_size
+ if resolve:
+ strtab = self._find_stringtab(shdr.sh_link)
+ while offset < end:
+ sym = self.read(self.Sym, offset)
+ if resolve:
+ sym = sym.resolve(strtab)
+ yield sym
+ offset += size
+ if offset != end:
+ raise ValueError('Symbol table is not a multiple of entry size')
+
+ def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
+ """Looks up a string in a string table identified by its link index."""
+ try:
+ strtab = self._stringtab[strtab_index]
+ except KeyError:
+ strtab = self._find_stringtab(strtab_index)
+ return strtab.get(strtab_offset)
+
+ def find_section(self, shndx: Shn) -> Shdr:
+ """Returns the section header for the indexed section.
+
+ The section name is not resolved.
+ """
+ try:
+ return self._section[shndx]
+ except KeyError:
+ pass
+ if shndx in Shn:
+ raise ValueError('Reserved section index {}'.format(shndx))
+ idx = shndx.value
+ if idx < 0 or idx > self._shdr_num:
+ raise ValueError('Section index {} out of range [0, {})'.format(
+ idx, self._shdr_num))
+ shdr = self.read(
+ self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
+ self._section[shndx] = shdr
+ return shdr
+
+ def _find_stringtab(self, sh_link: int) -> StringTable:
+ if sh_link in self._stringtab:
+ return self._stringtab
+ if sh_link < 0 or sh_link >= self._shdr_num:
+ raise ValueError('Section index {} out of range [0, {})'.format(
+ sh_link, self._shdr_num))
+ shdr = self.read(
+ self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
+ if shdr.sh_type != Sht.SHT_STRTAB:
+ raise ValueError(
+ 'Section {} is not a string table: {}'.format(
+ sh_link, shdr.sh_type))
+ strtab = StringTable(
+ self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
+ # This could retrain essentially arbitrary amounts of data,
+ # but caching string tables seems important for performance.
+ self._stringtab[sh_link] = strtab
+ return strtab
+
+
+__all__ = [name for name in dir() if name[0].isupper()]
printer files.
"""
+ # Disable debuginfod to avoid GDB messages like:
+ #
+ # This GDB supports auto-downloading debuginfo from the following URLs:
+ # https://debuginfod.fedoraproject.org/
+ # Enable debuginfod for this session? (y or [n])
+ #
+ try:
+ test('set debuginfod enabled off')
+ except Exception:
+ pass
+
# Load all the pretty printer files. We're assuming these are safe.
for printer_file in printer_files:
test('source {0}'.format(printer_file))
--- /dev/null
+#!/usr/bin/python3
+# ELF editor for load align tests.
+# Copyright (C) 2022 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+import argparse
+import os
+import sys
+import struct
+
+EI_NIDENT=16
+
+EI_MAG0=0
+ELFMAG0=b'\x7f'
+EI_MAG1=1
+ELFMAG1=b'E'
+EI_MAG2=2
+ELFMAG2=b'L'
+EI_MAG3=3
+ELFMAG3=b'F'
+
+EI_CLASS=4
+ELFCLASSNONE=b'0'
+ELFCLASS32=b'\x01'
+ELFCLASS64=b'\x02'
+
+EI_DATA=5
+ELFDATA2LSB=b'\x01'
+ELFDATA2MSB=b'\x02'
+
+ET_EXEC=2
+ET_DYN=3
+
+PT_LOAD=1
+PT_TLS=7
+
+def elf_types_fmts(e_ident):
+ endian = '<' if e_ident[EI_DATA] == ELFDATA2LSB else '>'
+ addr = 'I' if e_ident[EI_CLASS] == ELFCLASS32 else 'Q'
+ off = 'I' if e_ident[EI_CLASS] == ELFCLASS32 else 'Q'
+ return (endian, addr, off)
+
+class Elf_Ehdr:
+ def __init__(self, e_ident):
+ endian, addr, off = elf_types_fmts(e_ident)
+ self.fmt = '{0}HHI{1}{2}{2}IHHHHHH'.format(endian, addr, off)
+ self.len = struct.calcsize(self.fmt)
+
+ def read(self, f):
+ buf = f.read(self.len)
+ if not buf:
+ error('{}: header too small'.format(f.name))
+ data = struct.unpack(self.fmt, buf)
+ self.e_type = data[0]
+ self.e_machine = data[1]
+ self.e_version = data[2]
+ self.e_entry = data[3]
+ self.e_phoff = data[4]
+ self.e_shoff = data[5]
+ self.e_flags = data[6]
+ self.e_ehsize = data[7]
+ self.e_phentsize= data[8]
+ self.e_phnum = data[9]
+ self.e_shstrndx = data[10]
+
+
+class Elf_Phdr:
+ def __init__(self, e_ident):
+ endian, addr, off = elf_types_fmts(e_ident)
+ self.ei_class = e_ident[EI_CLASS]
+ if self.ei_class == ELFCLASS32:
+ self.fmt = '{0}I{2}{1}{1}IIII'.format(endian, addr, off)
+ else:
+ self.fmt = '{0}II{2}{1}{1}QQQ'.format(endian, addr, off)
+ self.len = struct.calcsize(self.fmt)
+
+ def read(self, f):
+ buf = f.read(self.len)
+ if len(buf) < self.len:
+ error('{}: program header too small'.format(f.name))
+ data = struct.unpack(self.fmt, buf)
+ if self.ei_class == ELFCLASS32:
+ self.p_type = data[0]
+ self.p_offset = data[1]
+ self.p_vaddr = data[2]
+ self.p_paddr = data[3]
+ self.p_filesz = data[4]
+ self.p_memsz = data[5]
+ self.p_flags = data[6]
+ self.p_align = data[7]
+ else:
+ self.p_type = data[0]
+ self.p_flags = data[1]
+ self.p_offset = data[2]
+ self.p_vaddr = data[3]
+ self.p_paddr = data[4]
+ self.p_filesz = data[5]
+ self.p_memsz = data[6]
+ self.p_align = data[7]
+
+ def write(self, f):
+ if self.ei_class == ELFCLASS32:
+ data = struct.pack(self.fmt,
+ self.p_type,
+ self.p_offset,
+ self.p_vaddr,
+ self.p_paddr,
+ self.p_filesz,
+ self.p_memsz,
+ self.p_flags,
+ self.p_align)
+ else:
+ data = struct.pack(self.fmt,
+ self.p_type,
+ self.p_flags,
+ self.p_offset,
+ self.p_vaddr,
+ self.p_paddr,
+ self.p_filesz,
+ self.p_memsz,
+ self.p_align)
+ f.write(data)
+
+
+def error(msg):
+ print(msg, file=sys.stderr)
+ sys.exit(1)
+
+
+def elf_edit_align(phdr, align):
+ if align == 'half':
+ phdr.p_align = phdr.p_align >> 1
+ else:
+ phdr.p_align = int(align)
+
+def elf_edit_maximize_tls_size(phdr, elfclass):
+ if elfclass == ELFCLASS32:
+ # It is possible that the kernel can allocate half of the
+ # address space, so use something larger.
+ phdr.p_memsz = 0xfff00000
+ else:
+ phdr.p_memsz = 1 << 63
+
+def elf_edit(f, opts):
+ ei_nident_fmt = 'c' * EI_NIDENT
+ ei_nident_len = struct.calcsize(ei_nident_fmt)
+
+ data = f.read(ei_nident_len)
+ if len(data) < ei_nident_len:
+ error('{}: e_nident too small'.format(f.name))
+ e_ident = struct.unpack(ei_nident_fmt, data)
+
+ if e_ident[EI_MAG0] != ELFMAG0 \
+ or e_ident[EI_MAG1] != ELFMAG1 \
+ or e_ident[EI_MAG2] != ELFMAG2 \
+ or e_ident[EI_MAG3] != ELFMAG3:
+ error('{}: bad ELF header'.format(f.name))
+
+ if e_ident[EI_CLASS] != ELFCLASS32 \
+ and e_ident[EI_CLASS] != ELFCLASS64:
+ error('{}: unsupported ELF class: {}'.format(f.name, e_ident[EI_CLASS]))
+
+ if e_ident[EI_DATA] != ELFDATA2LSB \
+ and e_ident[EI_DATA] != ELFDATA2MSB: \
+ error('{}: unsupported ELF data: {}'.format(f.name, e_ident[EI_DATA]))
+
+ ehdr = Elf_Ehdr(e_ident)
+ ehdr.read(f)
+ if ehdr.e_type not in (ET_EXEC, ET_DYN):
+ error('{}: not an executable or shared library'.format(f.name))
+
+ phdr = Elf_Phdr(e_ident)
+ maximize_tls_size_done = False
+ for i in range(0, ehdr.e_phnum):
+ f.seek(ehdr.e_phoff + i * phdr.len)
+ phdr.read(f)
+ if phdr.p_type == PT_LOAD and opts.align is not None:
+ elf_edit_align(phdr, opts.align)
+ f.seek(ehdr.e_phoff + i * phdr.len)
+ phdr.write(f)
+ break
+ if phdr.p_type == PT_TLS and opts.maximize_tls_size:
+ elf_edit_maximize_tls_size(phdr, e_ident[EI_CLASS])
+ f.seek(ehdr.e_phoff + i * phdr.len)
+ phdr.write(f)
+ maximize_tls_size_done = True
+ break
+
+ if opts.maximize_tls_size and not maximize_tls_size_done:
+ error('{}: TLS maximum size was not updated'.format(f.name))
+
+def get_parser():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument('-a', dest='align',
+ help='How to set the LOAD alignment')
+ parser.add_argument('--maximize-tls-size', action='store_true',
+ help='Set maximum PT_TLS size')
+ parser.add_argument('output',
+ help='ELF file to edit')
+ return parser
+
+
+def main(argv):
+ parser = get_parser()
+ opts = parser.parse_args(argv)
+ with open(opts.output, 'r+b') as fout:
+ elf_edit(fout, opts)
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
libnss_hesiod=2
libnss_db=2
-# Tests for NSS. They must have the same NSS_SHLIB_REVISION number as
-# the rest.
-libnss_test1=2
-libnss_test2=2
-
# Version for libnsl with YP and NIS+ functions.
libnsl=1
routines := accept bind connect getpeername getsockname getsockopt \
listen recv recvfrom recvmsg send sendmsg sendto \
setsockopt shutdown socket socketpair isfdtype opensock \
- sockatmark accept4 recvmmsg sendmmsg
+ sockatmark accept4 recvmmsg sendmmsg sockaddr_un_set
tests := \
tst-accept4 \
tst-sockopt \
+ tst-cmsghdr \
# tests
+tests-internal := \
+ tst-sockaddr_un_set \
+ # tests-internal
+
tests-time64 := \
tst-sockopt-time64 \
# tests
__fortify_function ssize_t
recv (int __fd, void *__buf, size_t __n, int __flags)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __recv_chk (__fd, __buf, __n, __glibc_objsize0 (__buf),
- __flags);
-
- if (__n > __glibc_objsize0 (__buf))
- return __recv_chk_warn (__fd, __buf, __n, __glibc_objsize0 (__buf),
- __flags);
- }
- return __recv_alias (__fd, __buf, __n, __flags);
+ size_t sz = __glibc_objsize0 (__buf);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz))
+ return __recv_alias (__fd, __buf, __n, __flags);
+ if (__glibc_unsafe_len (__n, sizeof (char), sz))
+ return __recv_chk_warn (__fd, __buf, __n, sz, __flags);
+ return __recv_chk (__fd, __buf, __n, sz, __flags);
}
extern ssize_t __recvfrom_chk (int __fd, void *__restrict __buf, size_t __n,
recvfrom (int __fd, void *__restrict __buf, size_t __n, int __flags,
__SOCKADDR_ARG __addr, socklen_t *__restrict __addr_len)
{
- if (__glibc_objsize0 (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __recvfrom_chk (__fd, __buf, __n, __glibc_objsize0 (__buf),
- __flags, __addr, __addr_len);
- if (__n > __glibc_objsize0 (__buf))
- return __recvfrom_chk_warn (__fd, __buf, __n, __glibc_objsize0 (__buf),
- __flags, __addr, __addr_len);
- }
- return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len);
+ size_t sz = __glibc_objsize0 (__buf);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (char), sz))
+ return __recvfrom_alias (__fd, __buf, __n, __flags, __addr, __addr_len);
+ if (__glibc_unsafe_len (__n, sizeof (char), sz))
+ return __recvfrom_chk_warn (__fd, __buf, __n, sz, __flags, __addr,
+ __addr_len);
+ return __recvfrom_chk (__fd, __buf, __n, sz, __flags, __addr, __addr_len);
}
-/* Copyright (C) 1999-2021 Free Software Foundation, Inc.
+/* Create socket with an unspecified address family for use with ioctl.
+ Copyright (C) 1999-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <stdio.h>
+#include <errno.h>
#include <sys/socket.h>
-#include <libc-lock.h>
/* Return a socket of any type. The socket can be used in subsequent
ioctl calls to talk to the kernel. */
int
__opensock (void)
{
- /* Cache the last AF that worked, to avoid many redundant calls to
- socket(). */
- static int sock_af = -1;
- int fd = -1;
- __libc_lock_define_initialized (static, lock);
-
- if (sock_af != -1)
- {
- fd = __socket (sock_af, SOCK_DGRAM, 0);
- if (fd != -1)
- return fd;
- }
-
- __libc_lock_lock (lock);
-
- if (sock_af != -1)
- fd = __socket (sock_af, SOCK_DGRAM, 0);
-
- if (fd == -1)
- {
-#ifdef AF_INET
- fd = __socket (sock_af = AF_INET, SOCK_DGRAM, 0);
-#endif
-#ifdef AF_INET6
- if (fd < 0)
- fd = __socket (sock_af = AF_INET6, SOCK_DGRAM, 0);
-#endif
-#ifdef AF_IPX
- if (fd < 0)
- fd = __socket (sock_af = AF_IPX, SOCK_DGRAM, 0);
-#endif
-#ifdef AF_AX25
- if (fd < 0)
- fd = __socket (sock_af = AF_AX25, SOCK_DGRAM, 0);
-#endif
-#ifdef AF_APPLETALK
- if (fd < 0)
- fd = __socket (sock_af = AF_APPLETALK, SOCK_DGRAM, 0);
-#endif
- }
-
- __libc_lock_unlock (lock);
+ /* SOCK_DGRAM is supported by all address families. */
+ int type = SOCK_DGRAM | SOCK_CLOEXEC;
+ int fd;
+
+ fd = __socket (AF_UNIX, type, 0);
+ if (fd >= 0)
+ return fd;
+ fd = __socket (AF_INET, type, 0);
+ if (fd >= 0)
+ return fd;
+ fd = __socket (AF_INET6, type, 0);
+ if (fd >= 0)
+ return fd;
+ __set_errno (ENOENT);
return fd;
}
--- /dev/null
+/* Set the sun_path member of struct sockaddr_un.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+int
+__sockaddr_un_set (struct sockaddr_un *addr, const char *pathname)
+{
+ size_t name_length = strlen (pathname);
+
+ /* The kernel supports names of exactly sizeof (addr->sun_path)
+ bytes, without a null terminator, but userspace does not; see the
+ SUN_LEN macro. */
+ if (name_length >= sizeof (addr->sun_path))
+ {
+ __set_errno (EINVAL); /* Error code used by the kernel. */
+ return -1;
+ }
+
+ addr->sun_family = AF_UNIX;
+ memcpy (addr->sun_path, pathname, name_length + 1);
+ return 0;
+}
# else
extern ssize_t __sendmsg64 (int __fd, const struct msghdr *__message,
int __flags);
-# defien sendmsg __sendmsg64
+# define sendmsg __sendmsg64
# endif
#endif
--- /dev/null
+/* Test ancillary data header creation.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* We use the preprocessor to generate the function/macro tests instead of
+ using indirection because having all the macro expansions alongside
+ each other lets the compiler warn us about suspicious pointer
+ arithmetic across subsequent CMSG_{FIRST,NXT}HDR expansions. */
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define RUN_TEST_CONCAT(suffix) run_test_##suffix
+#define RUN_TEST_FUNCNAME(suffix) RUN_TEST_CONCAT (suffix)
+
+static void
+RUN_TEST_FUNCNAME (CMSG_NXTHDR_IMPL) (void)
+{
+ struct msghdr m = {0};
+ struct cmsghdr *cmsg;
+ char cmsgbuf[3 * CMSG_SPACE (sizeof (PAYLOAD))] = {0};
+
+ m.msg_control = cmsgbuf;
+ m.msg_controllen = sizeof (cmsgbuf);
+
+ /* First header should point to the start of the buffer. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+
+ /* If the first header length consumes the entire buffer, there is no
+ space remaining for additional headers. */
+ cmsg->cmsg_len = sizeof (cmsgbuf);
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg == NULL);
+
+ /* The first header length is so big, using it would cause an overflow. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+ cmsg->cmsg_len = SIZE_MAX;
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg == NULL);
+
+ /* The first header leaves just enough space to hold another header. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+ cmsg->cmsg_len = sizeof (cmsgbuf) - sizeof (struct cmsghdr);
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg != NULL);
+
+ /* The first header leaves space but not enough for another header. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+ cmsg->cmsg_len ++;
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg == NULL);
+
+ /* The second header leaves just enough space to hold another header. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (PAYLOAD));
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg != NULL);
+ cmsg->cmsg_len = sizeof (cmsgbuf)
+ - CMSG_SPACE (sizeof (PAYLOAD)) /* First header. */
+ - sizeof (struct cmsghdr);
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg != NULL);
+
+ /* The second header leaves space but not enough for another header. */
+ cmsg = CMSG_FIRSTHDR (&m);
+ TEST_VERIFY_EXIT ((char *) cmsg == cmsgbuf);
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg != NULL);
+ cmsg->cmsg_len ++;
+ cmsg = CMSG_NXTHDR_IMPL (&m, cmsg);
+ TEST_VERIFY_EXIT (cmsg == NULL);
+
+ return;
+}
--- /dev/null
+/* Test ancillary data header creation.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sys/socket.h>
+#include <gnu/lib-names.h>
+#include <support/xdlfcn.h>
+#include <support/check.h>
+
+#define PAYLOAD "Hello, World!"
+
+/* CMSG_NXTHDR is a macro that calls an inline function defined in
+ bits/socket.h. In case the function cannot be inlined, libc.so carries
+ a copy. Both versions need to be tested. */
+
+#define CMSG_NXTHDR_IMPL CMSG_NXTHDR
+#include "tst-cmsghdr-skeleton.c"
+#undef CMSG_NXTHDR_IMPL
+
+static struct cmsghdr * (* cmsg_nxthdr) (struct msghdr *, struct cmsghdr *);
+
+#define CMSG_NXTHDR_IMPL cmsg_nxthdr
+#include "tst-cmsghdr-skeleton.c"
+#undef CMSG_NXTHDR_IMPL
+
+static int
+do_test (void)
+{
+ static void *handle;
+
+ run_test_CMSG_NXTHDR ();
+
+ handle = xdlopen (LIBC_SO, RTLD_LAZY);
+ cmsg_nxthdr = (struct cmsghdr * (*) (struct msghdr *, struct cmsghdr *))
+ xdlsym (handle, "__cmsg_nxthdr");
+
+ run_test_cmsg_nxthdr ();
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test the __sockaddr_un_set function.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Re-compile the function because the version in libc is not
+ exported. */
+#include "sockaddr_un_set.c"
+
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ struct sockaddr_un sun;
+
+ memset (&sun, 0xcc, sizeof (sun));
+ __sockaddr_un_set (&sun, "");
+ TEST_COMPARE (sun.sun_family, AF_UNIX);
+ TEST_COMPARE (__sockaddr_un_set (&sun, ""), 0);
+
+ memset (&sun, 0xcc, sizeof (sun));
+ TEST_COMPARE (__sockaddr_un_set (&sun, "/example"), 0);
+ TEST_COMPARE_STRING (sun.sun_path, "/example");
+
+ {
+ char pathname[108]; /* Length of sun_path (ABI constant). */
+ memset (pathname, 'x', sizeof (pathname));
+ pathname[sizeof (pathname) - 1] = '\0';
+ memset (&sun, 0xcc, sizeof (sun));
+ TEST_COMPARE (__sockaddr_un_set (&sun, pathname), 0);
+ TEST_COMPARE (sun.sun_family, AF_UNIX);
+ TEST_COMPARE_STRING (sun.sun_path, pathname);
+ }
+
+ {
+ char pathname[109];
+ memset (pathname, 'x', sizeof (pathname));
+ pathname[sizeof (pathname) - 1] = '\0';
+ memset (&sun, 0xcc, sizeof (sun));
+ errno = 0;
+ TEST_COMPARE (__sockaddr_un_set (&sun, pathname), -1);
+ TEST_COMPARE (errno, EINVAL);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
static-only-routines = atexit at_quick_exit
test-srcs := tst-fmtmsg
-tests := tst-strtol tst-strtod testmb testrand testsort testdiv \
- test-canon test-canon2 tst-strtoll tst-environ \
- tst-xpg-basename tst-random tst-random2 tst-bsearch \
- tst-limits tst-rand48 bug-strtod tst-setcontext \
- tst-setcontext2 test-a64l tst-qsort testmb2 \
- bug-strtod2 tst-atof1 tst-atof2 tst-strtod2 \
- tst-rand48-2 tst-makecontext tst-strtod5 \
- tst-qsort2 tst-makecontext2 tst-strtod6 tst-unsetenv1 \
- tst-makecontext3 bug-getcontext bug-fmtmsg1 \
- tst-secure-getenv tst-strtod-overflow tst-strtod-round \
- tst-tininess tst-strtod-underflow tst-setcontext3 \
- tst-strtol-locale tst-strtod-nan-locale tst-strfmon_l \
- tst-quick_exit tst-thread-quick_exit tst-width \
- tst-width-stdint tst-strfrom tst-strfrom-locale \
- tst-getrandom tst-atexit tst-at_quick_exit \
- tst-cxa_atexit tst-on_exit test-atexit-race \
- test-at_quick_exit-race test-cxa_atexit-race \
- test-cxa_atexit-race2 \
- test-on_exit-race test-dlclose-exit-race \
- tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
- tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
- tst-setcontext6 tst-setcontext7 tst-setcontext8 \
- tst-setcontext9 tst-bz20544 tst-canon-bz26341 \
- tst-realpath
+tests := \
+ bug-fmtmsg1 \
+ bug-getcontext \
+ bug-strtod \
+ bug-strtod2 \
+ test-a64l \
+ test-at_quick_exit-race \
+ test-atexit-race \
+ test-bz22786 \
+ test-canon \
+ test-canon2 \
+ test-cxa_atexit-race \
+ test-cxa_atexit-race2 \
+ test-dlclose-exit-race \
+ test-on_exit-race \
+ testdiv \
+ testmb \
+ testmb2 \
+ testrand \
+ testsort \
+ tst-at_quick_exit \
+ tst-atexit \
+ tst-atof1 \
+ tst-atof2 \
+ tst-bsearch \
+ tst-bz20544 \
+ tst-canon-bz26341 \
+ tst-cxa_atexit \
+ tst-environ \
+ tst-getrandom \
+ tst-limits \
+ tst-makecontext \
+ tst-makecontext-align \
+ tst-makecontext2 \
+ tst-makecontext3 \
+ tst-on_exit \
+ tst-qsort \
+ tst-qsort2 \
+ tst-quick_exit \
+ tst-rand48 \
+ tst-rand48-2 \
+ tst-random \
+ tst-random2 \
+ tst-realpath \
+ tst-realpath-toolong \
+ tst-secure-getenv \
+ tst-setcontext \
+ tst-setcontext2 \
+ tst-setcontext3 \
+ tst-setcontext4 \
+ tst-setcontext5 \
+ tst-setcontext6 \
+ tst-setcontext7 \
+ tst-setcontext8 \
+ tst-setcontext9 \
+ tst-strfmon_l \
+ tst-strfrom \
+ tst-strfrom-locale \
+ tst-strtod \
+ tst-strtod-nan-locale \
+ tst-strtod-nan-sign \
+ tst-strtod-overflow \
+ tst-strtod-round \
+ tst-strtod-underflow \
+ tst-strtod2 \
+ tst-strtod5 \
+ tst-strtod6 \
+ tst-strtol \
+ tst-strtol-locale \
+ tst-strtoll \
+ tst-swapcontext1 \
+ tst-thread-quick_exit \
+ tst-tininess \
+ tst-unsetenv1 \
+ tst-width \
+ tst-width-stdint \
+ tst-xpg-basename \
+# tests
tests-internal := tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
tst-tls-atexit tst-tls-atexit-nodelete
CFLAGS-tst-makecontext.c += -funwind-tables
CFLAGS-tst-makecontext2.c += $(stack-align-test-flags)
+CFLAGS-testmb.c += -D_FORTIFY_SOURCE=2 -Wall -Werror
+
+
# Run a test on the header files we use.
tests-special += $(objpfx)isomac.out
__fortify_function __wur char *
__NTH (realpath (const char *__restrict __name, char *__restrict __resolved))
{
- if (__glibc_objsize (__resolved) != (size_t) -1)
- {
+ size_t sz = __glibc_objsize (__resolved);
+
+ if (sz == (size_t) -1)
+ return __realpath_alias (__name, __resolved);
+
#if defined _LIBC_LIMITS_H_ && defined PATH_MAX
- if (__glibc_objsize (__resolved) < PATH_MAX)
- return __realpath_chk_warn (__name, __resolved,
- __glibc_objsize (__resolved));
+ if (__glibc_unsafe_len (PATH_MAX, sizeof (char), sz))
+ return __realpath_chk_warn (__name, __resolved, sz);
#endif
- return __realpath_chk (__name, __resolved, __glibc_objsize (__resolved));
- }
-
- return __realpath_alias (__name, __resolved);
+ return __realpath_chk (__name, __resolved, sz);
}
__fortify_function int
__NTH (ptsname_r (int __fd, char *__buf, size_t __buflen))
{
- if (__glibc_objsize (__buf) != (size_t) -1)
- {
- if (!__builtin_constant_p (__buflen))
- return __ptsname_r_chk (__fd, __buf, __buflen,
- __glibc_objsize (__buf));
- if (__buflen > __glibc_objsize (__buf))
- return __ptsname_r_chk_warn (__fd, __buf, __buflen,
- __glibc_objsize (__buf));
- }
- return __ptsname_r_alias (__fd, __buf, __buflen);
+ return __glibc_fortify (ptsname_r, __buflen, sizeof (char),
+ __glibc_objsize (__buf),
+ __fd, __buf, __buflen);
}
const char *__restrict __src,
size_t __len, size_t __dstlen) __THROW
__attr_access ((__write_only__, 1, 3)) __attr_access ((__read_only__, 2));
+extern size_t __REDIRECT_NTH (__mbstowcs_nulldst,
+ (wchar_t *__restrict __dst,
+ const char *__restrict __src,
+ size_t __len), mbstowcs)
+ __attr_access ((__read_only__, 2));
extern size_t __REDIRECT_NTH (__mbstowcs_alias,
(wchar_t *__restrict __dst,
const char *__restrict __src,
__NTH (mbstowcs (wchar_t *__restrict __dst, const char *__restrict __src,
size_t __len))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __mbstowcs_chk (__dst, __src, __len,
- __glibc_objsize (__dst) / sizeof (wchar_t));
-
- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t))
- return __mbstowcs_chk_warn (__dst, __src, __len,
- (__glibc_objsize (__dst)
- / sizeof (wchar_t)));
- }
- return __mbstowcs_alias (__dst, __src, __len);
+ if (__builtin_constant_p (__dst == NULL) && __dst == NULL)
+ return __mbstowcs_nulldst (__dst, __src, __len);
+ else
+ return __glibc_fortify_n (mbstowcs, __len, sizeof (wchar_t),
+ __glibc_objsize (__dst), __dst, __src, __len);
}
-
extern size_t __wcstombs_chk (char *__restrict __dst,
const wchar_t *__restrict __src,
size_t __len, size_t __dstlen) __THROW
__NTH (wcstombs (char *__restrict __dst, const wchar_t *__restrict __src,
size_t __len))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __wcstombs_chk (__dst, __src, __len, __glibc_objsize (__dst));
- if (__len > __glibc_objsize (__dst))
- return __wcstombs_chk_warn (__dst, __src, __len,
- __glibc_objsize (__dst));
- }
- return __wcstombs_alias (__dst, __src, __len);
+ return __glibc_fortify (wcstombs, __len, sizeof (char),
+ __glibc_objsize (__dst),
+ __dst, __src, __len);
}
error:
*dest++ = '\0';
- if (resolved != NULL && dest - rname <= get_path_max ())
- rname = strcpy (resolved, rname);
+ if (resolved != NULL)
+ {
+ if (dest - rname <= get_path_max ())
+ rname = strcpy (resolved, rname);
+ else if (!failed)
+ {
+ failed = true;
+ __set_errno (ENAMETOOLONG);
+ }
+ }
error_nomem:
scratch_buffer_free (&extra_buffer);
extern size_t wcstombs (char *__restrict __s,
const wchar_t *__restrict __pwcs, size_t __n)
__THROW
- __attr_access ((__write_only__, 1, 3)) __attr_access ((__read_only__, 2));
+ __fortified_attr_access (__write_only__, 1, 3)
+ __attr_access ((__read_only__, 2));
#ifdef __USE_MISC
/* Determine whether the string value of RESPONSE matches the affirmation
terminal associated with the master FD is open on in BUF.
Return 0 on success, otherwise an error number. */
extern int ptsname_r (int __fd, char *__buf, size_t __buflen)
- __THROW __nonnull ((2)) __attr_access ((__write_only__, 2, 3));
+ __THROW __nonnull ((2)) __fortified_attr_access (__write_only__, 2, 3);
/* Open a master pseudo terminal and return its file descriptor. */
extern int getpt (void);
lose = 1;
}
+ i = mbstowcs (NULL, "bar", 4);
+ if (!(i == 3 && w[1] == 'a'))
+ {
+ puts ("mbstowcs FAILED2!");
+ lose = 1;
+ }
+
mbstowcs (w, "blah", 5);
i = wcstombs (c, w, 10);
if (i != 4)
--- /dev/null
+/* Verify that realpath returns NULL with ENAMETOOLONG if the result exceeds
+ NAME_MAX.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <support/check.h>
+#include <support/temp_file.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define BASENAME "tst-realpath-toolong."
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+int
+do_test (void)
+{
+ char *base = support_create_and_chdir_toolong_temp_directory (BASENAME);
+
+ char buf[PATH_MAX + 1];
+ const char *res = realpath (".", buf);
+
+ /* canonicalize.c states that if the real path is >= PATH_MAX, then
+ realpath returns NULL and sets ENAMETOOLONG. */
+ TEST_VERIFY (res == NULL);
+ TEST_VERIFY (errno == ENAMETOOLONG);
+
+ free (base);
+ return 0;
+}
+
+#include <support/test-driver.c>
# include <bits/strings_fortified.h>
void __explicit_bzero_chk (void *__dest, size_t __len, size_t __destlen)
- __THROW __nonnull ((1)) __attr_access ((__write_only__, 1, 2));
+ __THROW __nonnull ((1)) __fortified_attr_access (__write_only__, 1, 2);
__fortify_function void
__NTH (explicit_bzero (void *__dest, size_t __len))
return __builtin___strcpy_chk (__dest, __src, __glibc_objsize (__dest));
}
-#ifdef __USE_GNU
+#ifdef __USE_XOPEN2K8
__fortify_function char *
__NTH (stpcpy (char *__restrict __dest, const char *__restrict __src))
{
__glibc_objsize (__dest));
}
-#if __GNUC_PREREQ (4, 7) || __glibc_clang_prereq (2, 6)
+#ifdef __USE_XOPEN2K8
+# if __GNUC_PREREQ (4, 7) || __glibc_clang_prereq (2, 6)
__fortify_function char *
__NTH (stpncpy (char *__dest, const char *__src, size_t __n))
{
return __builtin___stpncpy_chk (__dest, __src, __n,
__glibc_objsize (__dest));
}
-#else
+# else
extern char *__stpncpy_chk (char *__dest, const char *__src, size_t __n,
size_t __destlen) __THROW
- __attr_access ((__write_only__, 1, 3)) __attr_access ((__read_only__, 2));
+ __fortified_attr_access (__write_only__, 1, 3)
+ __attr_access ((__read_only__, 2));
extern char *__REDIRECT_NTH (__stpncpy_alias, (char *__dest, const char *__src,
size_t __n), stpncpy);
return __stpncpy_chk (__dest, __src, __n, __bos (__dest));
return __stpncpy_alias (__dest, __src, __n);
}
+# endif
#endif
/* Set N bytes of S to 0. The compiler will not delete a call to this
function, even if S is dead after the call. */
extern void explicit_bzero (void *__s, size_t __n) __THROW __nonnull ((1))
- __attr_access ((__write_only__, 1, 2));
+ __fortified_attr_access (__write_only__, 1, 2);
/* Return the next DELIM-delimited token from *STRINGP,
terminating it with a '\0', and update *STRINGP to point past it. */
<https://www.gnu.org/licenses/>. */
#include <assert.h>
+#include <support/xunistd.h>
#define TEST_MAIN
#define TEST_NAME "rawmemchr"
}
}
+static void
+do_test_bz29234 (void)
+{
+ size_t i, j;
+ char *ptr_start;
+ char *buf = xmmap (0, 8192, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1);
+
+ memset (buf, -1, 8192);
+
+ ptr_start = buf + 4096 - 8;
+
+ /* Out of range matches before the start of a page. */
+ memset (ptr_start - 8, 0x1, 8);
+
+ for (j = 0; j < 8; ++j)
+ {
+ for (i = 0; i < 128; ++i)
+ {
+ ptr_start[i + j] = 0x1;
+
+ FOR_EACH_IMPL (impl, 0)
+ do_one_test (impl, (char *) (ptr_start + j), 0x1,
+ ptr_start + i + j);
+
+ ptr_start[i + j] = 0xff;
+ }
+ }
+
+ xmunmap (buf, 8192);
+}
+
static void
do_test (size_t align, size_t pos, size_t len, int seek_char)
{
size_t i;
char *result;
- align &= 7;
+ align &= getpagesize () - 1;
if (align + len >= page_size)
return;
}
}
+ if (align)
+ {
+ p[align - 1] = seek_char;
+ if (align > 4)
+ p[align - 4] = seek_char;
+ }
+
assert (pos < len);
size_t r = random ();
if ((r & 31) == 0)
result, p);
ret = 1;
}
+
+ if (align)
+ {
+ p[align - 1] = seek_char;
+ if (align > 4)
+ p[align - 4] = seek_char;
+ }
}
}
do_test (i, 64, 256, 23);
do_test (0, 16 << i, 2048, 0);
do_test (i, 64, 256, 0);
+
+ do_test (getpagesize () - i, 64, 256, 23);
+ do_test (getpagesize () - i, 64, 256, 0);
}
for (i = 1; i < 32; ++i)
{
do_test (0, i, i + 1, 23);
do_test (0, i, i + 1, 0);
+
+ do_test (getpagesize () - 7, i, i + 1, 23);
+ do_test (getpagesize () - i / 2, i, i + 1, 23);
+ do_test (getpagesize () - i, i, i + 1, 23);
}
do_random_tests ();
+ do_test_bz29234 ();
return ret;
}
# define TEST_NAME "strcmp"
#endif
#include "test-string.h"
+#include <support/test-driver.h>
#ifdef WIDE
# include <wchar.h>
}
}
+static void
+check3 (void)
+{
+ size_t size = 0xd000 + 0x4000;
+ CHAR *s1, *s2;
+ CHAR *buffer1 = mmap (NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ CHAR *buffer2 = mmap (NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (buffer1 == MAP_FAILED || buffer1 == MAP_FAILED)
+ error (EXIT_UNSUPPORTED, errno, "mmap failed");
+
+ s1 = (CHAR *) (buffer1 + 0x8f8 / sizeof (CHAR));
+ s2 = (CHAR *) (buffer2 + 0xcff3 / sizeof (CHAR));
+
+ STRCPY(s1, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/util/PathDocFileFactory.java"));
+ STRCPY(s2, L("/export/redhat/rpms/BUILD/java-1.8.0-openjdk-1.8.0.312.b07-2.fc35.x86_64/openjdk/langtools/src/share/classes/com/sun/tools/doclets/internal/toolkit/taglets/ThrowsTaglet.java"));
+
+ int exp_result = SIMPLE_STRCMP (s1, s2);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, s1, s2, exp_result);
+
+ munmap ((void *) buffer1, size);
+ munmap ((void *) buffer2, size);
+}
+
int
test_main (void)
{
test_init ();
check();
check2 ();
+ check3 ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
}
}
+static void
+check4 (void)
+{
+ /* To trigger bug 28895; We need 1) both s1 and s2 to be within 32 bytes of
+ the end of the page. 2) For there to be no mismatch/null byte before the
+ first page cross. 3) For length (`n`) to be large enough for one string to
+ cross the page. And 4) for there to be either mismatch/null bytes before
+ the start of the strings. */
+
+ size_t size = 10;
+ size_t addr_mask = (getpagesize () - 1) ^ (sizeof (CHAR) - 1);
+ CHAR *s1 = (CHAR *)(buf1 + (addr_mask & 0xffa));
+ CHAR *s2 = (CHAR *)(buf2 + (addr_mask & 0xfed));
+ int exp_result;
+
+ STRCPY (s1, L ("tst-tlsmod%"));
+ STRCPY (s2, L ("tst-tls-manydynamic73mod"));
+ exp_result = SIMPLE_STRNCMP (s1, s2, size);
+ FOR_EACH_IMPL (impl, 0)
+ check_result (impl, s1, s2, size, exp_result);
+}
+
int
test_main (void)
{
check1 ();
check2 ();
check3 ();
+ check4 ();
printf ("%23s", "");
FOR_EACH_IMPL (impl, 0)
endif
tests = tst-xdrmem tst-xdrmem2 test-rpcent tst-udp-error tst-udp-timeout \
- tst-udp-nonblocking
+ tst-udp-nonblocking tst-bug22542 tst-bug28768
+
xtests := tst-getmyaddr
ifeq ($(have-thread-library),yes)
$(objpfx)tst-udp-garbage: \
$(common-objpfx)linkobj/libc.so $(shared-thread-library)
+$(objpfx)tst-bug22542: $(common-objpfx)linkobj/libc.so
+
else # !have-GLIBC_2.31
routines = $(routines-for-nss)
if (strcmp (proto, "unix") == 0)
{
- memset ((char *)&sun, 0, sizeof (sun));
- sun.sun_family = AF_UNIX;
- strcpy (sun.sun_path, hostname);
+ if (__sockaddr_un_set (&sun, hostname) < 0)
+ {
+ struct rpc_createerr *ce = &get_rpc_createerr ();
+ ce->cf_stat = RPC_SYSTEMERROR;
+ ce->cf_error.re_errno = errno;
+ return NULL;
+ }
sock = RPC_ANYSOCK;
client = clntunix_create (&sun, prog, vers, &sock, 0, 0);
if (client == NULL)
SVCXPRT *xprt;
struct unix_rendezvous *r;
struct sockaddr_un addr;
- socklen_t len = sizeof (struct sockaddr_in);
+ socklen_t len = sizeof (addr);
+
+ if (__sockaddr_un_set (&addr, path) < 0)
+ return NULL;
if (sock == RPC_ANYSOCK)
{
}
madesock = TRUE;
}
- memset (&addr, '\0', sizeof (addr));
- addr.sun_family = AF_UNIX;
- len = strlen (path) + 1;
- memcpy (addr.sun_path, path, len);
- len += sizeof (addr.sun_family);
-
__bind (sock, (struct sockaddr *) &addr, len);
if (__getsockname (sock, (struct sockaddr *) &addr, &len) != 0
--- /dev/null
+/* Test to verify that overlong hostname is rejected by clnt_create
+ and doesn't cause a buffer overflow (bug 22542).
+
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <rpc/clnt.h>
+#include <string.h>
+#include <support/check.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+static int
+do_test (void)
+{
+ /* Create an arbitrary hostname that's longer than fits in sun_path. */
+ char name [sizeof ((struct sockaddr_un*)0)->sun_path * 2];
+ memset (name, 'x', sizeof name - 1);
+ name [sizeof name - 1] = '\0';
+
+ errno = 0;
+ CLIENT *clnt = clnt_create (name, 0, 0, "unix");
+
+ TEST_VERIFY (clnt == NULL);
+ TEST_COMPARE (errno, EINVAL);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test to verify that long path is rejected by svcunix_create (bug 28768).
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <rpc/svc.h>
+#include <shlib-compat.h>
+#include <string.h>
+#include <support/check.h>
+
+/* svcunix_create does not have a default version in linkobj/libc.so. */
+compat_symbol_reference (libc, svcunix_create, svcunix_create, GLIBC_2_1);
+
+static int
+do_test (void)
+{
+ char pathname[109];
+ memset (pathname, 'x', sizeof (pathname));
+ pathname[sizeof (pathname) - 1] = '\0';
+
+ errno = 0;
+ TEST_VERIFY (svcunix_create (RPC_ANYSOCK, 4096, 4096, pathname) == NULL);
+ TEST_COMPARE (errno, EINVAL);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
support_path_support_time64 \
support_process_state \
support_ptrace \
+ support-open-dev-null-range \
support_openpty \
support_paths \
support_quote_blob \
support_set_small_thread_stack_size \
support_shared_allocate \
support_small_stack_thread_attribute \
+ support_socket_so_timestamp_time64 \
support_stat_nanoseconds \
support_subprocess \
support_test_compare_blob \
support_test_compare_failure \
support_test_compare_string \
- support_write_file_string \
support_test_main \
support_test_verify_impl \
+ support_wait_for_thread_exit \
+ support_write_file_string \
temp_file \
timespec \
timespec-time64 \
tst-support_capture_subprocess \
tst-support_descriptors \
tst-support_format_dns_packet \
+ tst-support-open-dev-null-range \
tst-support-process_state \
tst-support_quote_blob \
tst-support_quote_string \
--- /dev/null
+/* Return a range of open file descriptors.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <support/support.h>
+#include <support/check.h>
+#include <support/xunistd.h>
+#include <stdlib.h>
+#include <sys/resource.h>
+
+static void
+increase_nofile (void)
+{
+ struct rlimit rl;
+ if (getrlimit (RLIMIT_NOFILE, &rl) == -1)
+ FAIL_EXIT1 ("getrlimit (RLIMIT_NOFILE): %m");
+
+ rl.rlim_cur += 128;
+
+ if (setrlimit (RLIMIT_NOFILE, &rl) == 1)
+ FAIL_EXIT1 ("setrlimit (RLIMIT_NOFILE): %m");
+}
+
+static int
+open_dev_null (int flags, mode_t mode)
+{
+ int fd = open64 ("/dev/null", flags, mode);
+ if (fd >= 0)
+ return fd;
+
+ if (fd < 0 && errno != EMFILE)
+ FAIL_EXIT1 ("open64 (\"/dev/null\", 0x%x, 0%o): %m", flags, mode);
+
+ increase_nofile ();
+
+ return xopen ("/dev/null", flags, mode);
+}
+
+struct range
+{
+ int lowfd;
+ size_t len;
+};
+
+struct range_list
+{
+ size_t total;
+ size_t used;
+ struct range *ranges;
+};
+
+static void
+range_init (struct range_list *r)
+{
+ r->total = 8;
+ r->used = 0;
+ r->ranges = xmalloc (r->total * sizeof (struct range));
+}
+
+static void
+range_add (struct range_list *r, int lowfd, size_t len)
+{
+ if (r->used == r->total)
+ {
+ r->total *= 2;
+ r->ranges = xrealloc (r->ranges, r->total * sizeof (struct range));
+ }
+ r->ranges[r->used].lowfd = lowfd;
+ r->ranges[r->used].len = len;
+ r->used++;
+}
+
+static void
+range_close (struct range_list *r)
+{
+ for (size_t i = 0; i < r->used; i++)
+ {
+ int minfd = r->ranges[i].lowfd;
+ int maxfd = r->ranges[i].lowfd + r->ranges[i].len;
+ for (int fd = minfd; fd < maxfd; fd++)
+ xclose (fd);
+ }
+ free (r->ranges);
+}
+
+int
+support_open_dev_null_range (int num, int flags, mode_t mode)
+{
+ /* We keep track of the ranges that hit an already opened descriptor, so
+ we close them after we get a working range. */
+ struct range_list rl;
+ range_init (&rl);
+
+ int lowfd = open_dev_null (flags, mode);
+ int prevfd = lowfd;
+ while (true)
+ {
+ int i = 1;
+ for (; i < num; i++)
+ {
+ int fd = open_dev_null (flags, mode);
+ if (fd != lowfd + i)
+ {
+ range_add (&rl, lowfd, prevfd - lowfd + 1);
+
+ prevfd = lowfd = fd;
+ break;
+ }
+ prevfd = fd;
+ }
+ if (i == num)
+ break;
+ }
+
+ range_close (&rl);
+
+ return lowfd;
+}
tv_usec larger than 1000000. */
extern bool support_select_normalizes_timeout (void);
+/* Return true if socket FD supports 64-bit timestamps with the SOL_SOCKET
+ and SO_TIMESTAMP/SO_TIMESTAMPNS. */
+extern bool support_socket_so_timestamp_time64 (int fd);
+
/* Create a timer that trigger after SEC seconds and NSEC nanoseconds. If
REPEAT is true the timer will repeat indefinitely. If CALLBACK is not
NULL, the function will be called when the timer expires; otherwise a
/* Disable the timer TIMER. */
void support_delete_timer (timer_t timer);
+/* Wait until all threads except the current thread have exited (as
+ far as the kernel is concerned). */
+void support_wait_for_thread_exit (void);
+
struct support_stack
{
void *stack;
/* Deallocate the STACK. */
void support_stack_free (struct support_stack *stack);
+
+/* Create a range of NUM opened '/dev/null' file descriptors using FLAGS and
+ MODE. The function takes care of restarting the open range if a file
+ descriptor is found within the specified range and also increases
+ RLIMIT_NOFILE if required.
+ The returned value is the lowest file descriptor number. */
+int support_open_dev_null_range (int num, int flags, mode_t mode);
+
__END_DECLS
#endif /* SUPPORT_H */
support_subprogram because we only want the program exit status, not the
contents. */
ret = 0;
+ infd = outfd = -1;
char * const args[] = {execname, child_id, NULL};
--- /dev/null
+/* Return whether socket supports 64-bit timestamps.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <support/support.h>
+#ifdef __linux__
+# include <socket-constants-time64.h>
+#endif
+
+bool
+support_socket_so_timestamp_time64 (int fd)
+{
+#ifdef __linux__
+# if __LINUX_KERNEL_VERSION >= 0x050100 \
+ || __WORDSIZE == 64 \
+ || (defined __SYSCALL_WORDSIZE && __SYSCALL_WORDSIZE == 64)
+ return true;
+# else
+ int level = SOL_SOCKET;
+ int optname = COMPAT_SO_TIMESTAMP_NEW;
+ int optval;
+ socklen_t len = sizeof (optval);
+
+ int r = syscall (__NR_getsockopt, fd, level, optname, &optval, &len);
+ return r != -1;
+# endif
+#else
+ return false;
+#endif
+}
--- /dev/null
+/* Wait until all threads except the current thread has exited.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <unistd.h>
+
+void
+support_wait_for_thread_exit (void)
+{
+#ifdef __linux__
+ DIR *proc_self_task = opendir ("/proc/self/task");
+ TEST_VERIFY_EXIT (proc_self_task != NULL);
+
+ while (true)
+ {
+ errno = 0;
+ struct dirent *e = readdir (proc_self_task);
+ if (e == NULL && errno != 0)
+ FAIL_EXIT1 ("readdir: %m");
+ if (e == NULL)
+ {
+ /* Only the main thread remains. Testing may continue. */
+ closedir (proc_self_task);
+ return;
+ }
+
+ /* In some kernels, "0" entries denote a thread that has just
+ exited. */
+ if (strcmp (e->d_name, ".") == 0 || strcmp (e->d_name, "..") == 0
+ || strcmp (e->d_name, "0") == 0)
+ continue;
+
+ int task_tid = atoi (e->d_name);
+ if (task_tid <= 0)
+ FAIL_EXIT1 ("Invalid /proc/self/task entry: %s", e->d_name);
+
+ if (task_tid == gettid ())
+ /* The current thread. Keep scanning for other
+ threads. */
+ continue;
+
+ /* task_tid does not refer to this thread here, i.e., there is
+ another running thread. */
+
+ /* Small timeout to give the thread a chance to exit. */
+ usleep (50 * 1000);
+
+ /* Start scanning the directory from the start. */
+ rewinddir (proc_self_task);
+ }
+#else
+ /* Use a large timeout because we cannot verify that the thread has
+ exited. */
+ usleep (5 * 1000 * 1000);
+#endif
+}
/* Temporary file handling for tests.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
+ Copyright (C) 1998-2022 Free Software Foundation, Inc.
+ Copyright The GNU Tools Authors.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
some 32-bit platforms. */
#define _FILE_OFFSET_BITS 64
+#include <support/check.h>
#include <support/temp_file.h>
#include <support/temp_file-internal.h>
#include <support/support.h>
+#include <errno.h>
#include <paths.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
+#include <xunistd.h>
/* List of temporary files. */
static struct temp_name_list
struct temp_name_list *next;
char *name;
pid_t owner;
+ bool toolong;
} *temp_name_list;
/* Location of the temporary files. Set by the test skeleton via
support_set_test_dir. The string is not be freed. */
static const char *test_dir = _PATH_TMP;
-void
-add_temp_file (const char *name)
+/* Name of subdirectories in a too long temporary directory tree. */
+static char toolong_subdir[NAME_MAX + 1];
+static bool toolong_initialized;
+static size_t toolong_path_max;
+
+static void
+add_temp_file_internal (const char *name, bool toolong)
{
struct temp_name_list *newp
= (struct temp_name_list *) xcalloc (sizeof (*newp), 1);
newp->name = newname;
newp->next = temp_name_list;
newp->owner = getpid ();
+ newp->toolong = toolong;
temp_name_list = newp;
}
else
free (newp);
}
+void
+add_temp_file (const char *name)
+{
+ add_temp_file_internal (name, false);
+}
+
int
create_temp_file_in_dir (const char *base, const char *dir, char **filename)
{
return create_temp_file_in_dir (base, test_dir, filename);
}
-char *
-support_create_temp_directory (const char *base)
+static char *
+create_temp_directory_internal (const char *base, bool toolong)
{
char *path = xasprintf ("%s/%sXXXXXX", test_dir, base);
if (mkdtemp (path) == NULL)
printf ("error: mkdtemp (\"%s\"): %m", path);
exit (1);
}
- add_temp_file (path);
+ add_temp_file_internal (path, toolong);
return path;
}
-/* Helper functions called by the test skeleton follow. */
+char *
+support_create_temp_directory (const char *base)
+{
+ return create_temp_directory_internal (base, false);
+}
+
+static void
+ensure_toolong_initialized (void)
+{
+ if (!toolong_initialized)
+ FAIL_EXIT1 ("uninitialized toolong directory tree\n");
+}
+
+static void
+initialize_toolong (const char *base)
+{
+ long name_max = pathconf (base, _PC_NAME_MAX);
+ name_max = (name_max < 0 ? 64
+ : (name_max < sizeof (toolong_subdir) ? name_max
+ : sizeof (toolong_subdir) - 1));
+
+ long path_max = pathconf (base, _PC_PATH_MAX);
+ path_max = (path_max < 0 ? 1024
+ : path_max <= PTRDIFF_MAX ? path_max : PTRDIFF_MAX);
+
+ /* Sanity check to ensure that the test does not create temporary directories
+ in different filesystems because this API doesn't support it. */
+ if (toolong_initialized)
+ {
+ if (name_max != strlen (toolong_subdir))
+ FAIL_UNSUPPORTED ("name_max: Temporary directories in different"
+ " filesystems not supported yet\n");
+ if (path_max != toolong_path_max)
+ FAIL_UNSUPPORTED ("path_max: Temporary directories in different"
+ " filesystems not supported yet\n");
+ return;
+ }
+
+ toolong_path_max = path_max;
+
+ size_t len = name_max;
+ memset (toolong_subdir, 'X', len);
+ toolong_initialized = true;
+}
+
+char *
+support_create_and_chdir_toolong_temp_directory (const char *basename)
+{
+ char *base = create_temp_directory_internal (basename, true);
+ xchdir (base);
+
+ initialize_toolong (base);
+
+ size_t sz = strlen (toolong_subdir);
+
+ /* Create directories and descend into them so that the final path is larger
+ than PATH_MAX. */
+ for (size_t i = 0; i <= toolong_path_max / sz; i++)
+ {
+ int ret = mkdir (toolong_subdir, S_IRWXU);
+ if (ret != 0 && errno == ENAMETOOLONG)
+ FAIL_UNSUPPORTED ("Filesystem does not support creating too long "
+ "directory trees\n");
+ else if (ret != 0)
+ FAIL_EXIT1 ("Failed to create directory tree: %m\n");
+ xchdir (toolong_subdir);
+ }
+ return base;
+}
void
-support_set_test_dir (const char *path)
+support_chdir_toolong_temp_directory (const char *base)
{
- test_dir = path;
+ ensure_toolong_initialized ();
+
+ xchdir (base);
+
+ size_t sz = strlen (toolong_subdir);
+ for (size_t i = 0; i <= toolong_path_max / sz; i++)
+ xchdir (toolong_subdir);
+}
+
+/* Helper functions called by the test skeleton follow. */
+
+static void
+remove_toolong_subdirs (const char *base)
+{
+ ensure_toolong_initialized ();
+
+ if (chdir (base) != 0)
+ {
+ printf ("warning: toolong cleanup base failed: chdir (\"%s\"): %m\n",
+ base);
+ return;
+ }
+
+ /* Descend. */
+ int levels = 0;
+ size_t sz = strlen (toolong_subdir);
+ for (levels = 0; levels <= toolong_path_max / sz; levels++)
+ if (chdir (toolong_subdir) != 0)
+ {
+ printf ("warning: toolong cleanup failed: chdir (\"%s\"): %m\n",
+ toolong_subdir);
+ break;
+ }
+
+ /* Ascend and remove. */
+ while (--levels >= 0)
+ {
+ if (chdir ("..") != 0)
+ {
+ printf ("warning: toolong cleanup failed: chdir (\"..\"): %m\n");
+ return;
+ }
+ if (remove (toolong_subdir) != 0)
+ {
+ printf ("warning: could not remove subdirectory: %s: %m\n",
+ toolong_subdir);
+ return;
+ }
+ }
}
void
around, to prevent PID reuse.) */
if (temp_name_list->owner == pid)
{
+ if (temp_name_list->toolong)
+ remove_toolong_subdirs (temp_name_list->name);
+
if (remove (temp_name_list->name) != 0)
printf ("warning: could not remove temporary file: %s: %m\n",
temp_name_list->name);
fprintf (f, ")\n");
}
}
+
+void
+support_set_test_dir (const char *path)
+{
+ test_dir = path;
+}
returns. The caller should free this string. */
char *support_create_temp_directory (const char *base);
+/* Create a temporary directory tree that is longer than PATH_MAX and schedule
+ it for deletion. BASENAME is used as a prefix for the unique directory
+ name, which the function returns. The caller should free this string. */
+char *support_create_and_chdir_toolong_temp_directory (const char *basename);
+
+/* Change into the innermost directory of the directory tree BASE, which was
+ created using support_create_and_chdir_toolong_temp_directory. */
+void support_chdir_toolong_temp_directory (const char *base);
+
__END_DECLS
#endif /* SUPPORT_TEMP_FILE_H */
--- /dev/null
+/* Tests for support_open_dev_null_range.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xunistd.h>
+#include <sys/resource.h>
+#include <stdlib.h>
+
+#ifndef PATH_MAX
+# define PATH_MAX 1024
+#endif
+
+#include <stdio.h>
+
+static void
+check_path (int fd)
+{
+ char *proc_fd_path = xasprintf ("/proc/self/fd/%d", fd);
+ char file_path[PATH_MAX];
+ ssize_t file_path_length
+ = readlink (proc_fd_path, file_path, sizeof (file_path));
+ free (proc_fd_path);
+ if (file_path_length < 0)
+ FAIL_EXIT1 ("readlink (%s, %p, %zu)", proc_fd_path, file_path,
+ sizeof (file_path));
+ file_path[file_path_length] = '\0';
+ TEST_COMPARE_STRING (file_path, "/dev/null");
+}
+
+static int
+number_of_opened_files (void)
+{
+ DIR *fds = opendir ("/proc/self/fd");
+ if (fds == NULL)
+ FAIL_EXIT1 ("opendir (\"/proc/self/fd\"): %m");
+
+ int r = 0;
+ while (true)
+ {
+ errno = 0;
+ struct dirent64 *e = readdir64 (fds);
+ if (e == NULL)
+ {
+ if (errno != 0)
+ FAIL_EXIT1 ("readdir: %m");
+ break;
+ }
+
+ if (e->d_name[0] == '.')
+ continue;
+
+ char *endptr;
+ long int fd = strtol (e->d_name, &endptr, 10);
+ if (*endptr != '\0' || fd < 0 || fd > INT_MAX)
+ FAIL_EXIT1 ("readdir: invalid file descriptor name: /proc/self/fd/%s",
+ e->d_name);
+
+ /* Skip the descriptor which is used to enumerate the
+ descriptors. */
+ if (fd == dirfd (fds))
+ continue;
+
+ r = r + 1;
+ }
+
+ closedir (fds);
+
+ return r;
+}
+
+static int
+do_test (void)
+{
+ const int nfds1 = 8;
+ int lowfd = support_open_dev_null_range (nfds1, O_RDONLY, 0600);
+ for (int i = 0; i < nfds1; i++)
+ {
+ TEST_VERIFY (fcntl (lowfd + i, F_GETFL) > -1);
+ check_path (lowfd + i);
+ }
+
+ /* create some gaps. */
+ xclose (lowfd + 1);
+ xclose (lowfd + 5);
+ xclose (lowfd + 6);
+
+ const int nfds2 = 16;
+ int lowfd2 = support_open_dev_null_range (nfds2, O_RDONLY, 0600);
+ for (int i = 0; i < nfds2; i++)
+ {
+ TEST_VERIFY (fcntl (lowfd2 + i, F_GETFL) > -1);
+ check_path (lowfd2 + i);
+ }
+
+ /* Decrease the maximum number of files. */
+ {
+ struct rlimit rl;
+ if (getrlimit (RLIMIT_NOFILE, &rl) == -1)
+ FAIL_EXIT1 ("getrlimit (RLIMIT_NOFILE): %m");
+
+ rl.rlim_cur = number_of_opened_files ();
+
+ if (setrlimit (RLIMIT_NOFILE, &rl) == 1)
+ FAIL_EXIT1 ("setrlimit (RLIMIT_NOFILE): %m");
+ }
+
+ const int nfds3 = 16;
+ int lowfd3 = support_open_dev_null_range (nfds3, O_RDONLY, 0600);
+ for (int i = 0; i < nfds3; i++)
+ {
+ TEST_VERIFY (fcntl (lowfd3 + i, F_GETFL) > -1);
+ check_path (lowfd3 + i);
+ }
+
+ /* create a lot of gaps to trigger the range extension. */
+ xclose (lowfd3 + 1);
+ xclose (lowfd3 + 3);
+ xclose (lowfd3 + 5);
+ xclose (lowfd3 + 7);
+ xclose (lowfd3 + 9);
+ xclose (lowfd3 + 11);
+ xclose (lowfd3 + 13);
+
+ const int nfds4 = 16;
+ int lowfd4 = support_open_dev_null_range (nfds4, O_RDONLY, 0600);
+ for (int i = 0; i < nfds4; i++)
+ {
+ TEST_VERIFY (fcntl (lowfd4 + i, F_GETFL) > -1);
+ check_path (lowfd4 + i);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
terminate the process on error. */
void xraise (int sig);
+#ifdef _GNU_SOURCE
sighandler_t xsignal (int sig, sighandler_t handler);
+#endif
void xsigaction (int sig, const struct sigaction *newact,
struct sigaction *oldact);
ifeq ($(subdir),elf)
sysdep-dl-routines += dl-bti
+
+tests += tst-audit26 \
+ tst-audit27
+
+modules-names += \
+ tst-audit26mod \
+ tst-auditmod26 \
+ tst-audit27mod \
+ tst-auditmod27
+
+$(objpfx)tst-audit26: $(objpfx)tst-audit26mod.so \
+ $(objpfx)tst-auditmod26.so
+LDFLAGS-tst-audit26 += -Wl,-z,lazy
+tst-audit26-ENV = LD_AUDIT=$(objpfx)tst-auditmod26.so
+
+$(objpfx)tst-audit27: $(objpfx)tst-audit27mod.so \
+ $(objpfx)tst-auditmod27.so
+$(objpfx)tst-audit27mod.so: $(libsupport)
+LDFLAGS-tst-audit27 += -Wl,-z,lazy
+tst-audit27-ENV = LD_AUDIT=$(objpfx)tst-auditmod27.so
endif
ifeq ($(subdir),elf)
# error "Never include <bits/link.h> directly; use <link.h> instead."
#endif
+typedef union
+{
+ float s;
+ double d;
+ long double q;
+} La_aarch64_vector;
+
/* Registers for entry into PLT on AArch64. */
typedef struct La_aarch64_regs
{
- uint64_t lr_xreg[8];
- uint64_t lr_dreg[8];
- uint64_t lr_sp;
- uint64_t lr_lr;
+ uint64_t lr_xreg[9];
+ La_aarch64_vector lr_vreg[8];
+ uint64_t lr_sp;
+ uint64_t lr_lr;
+ void *lr_vpcs;
} La_aarch64_regs;
/* Return values for calls from PLT on AArch64. */
typedef struct La_aarch64_retval
{
- /* Up to two integer registers can be used for a return value. */
- uint64_t lrv_xreg[2];
- /* Up to four D registers can be used for a return value. */
- uint64_t lrv_dreg[4];
-
+ /* Up to eight integer registers can be used for a return value. */
+ uint64_t lrv_xreg[8];
+ /* Up to eight V registers can be used for a return value. */
+ La_aarch64_vector lrv_vreg[8];
+ void *lrv_vpcs;
} La_aarch64_retval;
__BEGIN_DECLS
--- /dev/null
+/* rtld-audit version check. AArch64 version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+static inline bool
+_dl_audit_check_version (unsigned int lav)
+{
+ /* Audit version 1 do not save x8 or NEON registers, which required
+ changing La_aarch64_regs and La_aarch64_retval layout (BZ#26643). The
+ missing indirect result save/restore makes _dl_runtime_profile
+ potentially trigger undefined behavior if the function returns a large
+ struct (even when PLT trace is not requested). */
+ return lav == LAV_CURRENT;
+}
DL_SIZEOF_RV sizeof(struct La_aarch64_retval)
DL_OFFSET_RG_X0 offsetof(struct La_aarch64_regs, lr_xreg)
-DL_OFFSET_RG_D0 offsetof(struct La_aarch64_regs, lr_dreg)
+DL_OFFSET_RG_V0 offsetof(struct La_aarch64_regs, lr_vreg)
DL_OFFSET_RG_SP offsetof(struct La_aarch64_regs, lr_sp)
DL_OFFSET_RG_LR offsetof(struct La_aarch64_regs, lr_lr)
+DL_OFFSET_RG_VPCS offsetof(struct La_aarch64_regs, lr_vpcs)
DL_OFFSET_RV_X0 offsetof(struct La_aarch64_retval, lrv_xreg)
-DL_OFFSET_RV_D0 offsetof(struct La_aarch64_retval, lrv_dreg)
+DL_OFFSET_RV_V0 offsetof(struct La_aarch64_retval, lrv_vreg)
+DL_OFFSET_RV_VPCS offsetof(struct La_aarch64_retval, lrv_vpcs)
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
#include <dl-irel.h>
+#include <dl-machine-rel.h>
#include <cpu-features.c>
/* Translate a processor specific dynamic tag to the index in l_info array. */
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
if (l->l_info[DT_JMPREL] && lazy)
{
#define ELF_MACHINE_JMP_SLOT AARCH64_R(JUMP_SLOT)
-/* AArch64 uses RELA not REL */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
#define DL_PLATFORM_INIT dl_platform_init ()
static inline void __attribute__ ((unused))
#ifdef RESOLVE_MAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
ElfW(Addr) *const reloc_addr = reloc_addr_arg;
else
{
const ElfW(Sym) *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
}
-inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr,
const ElfW(Rela) *reloc,
*reloc_addr = l_addr + reloc->r_addend;
}
-inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr,
const ElfW(Rela) *reloc,
int skip_ifunc)
(const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
version = &map->l_versions[vernum[symndx] & 0x7fff];
}
- elf_machine_rela (map, reloc, sym, version, reloc_addr,
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
skip_ifunc);
return;
}
/* Always initialize TLS descriptors completely, because lazy
initialization requires synchronization at every TLS access. */
- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+ skip_ifunc);
}
else if (__glibc_unlikely (r_type == AARCH64_R(IRELATIVE)))
{
cfi_rel_offset (lr, 8)
- /* Save arguments. */
+ /* Note: Saving x9 is not required by the ABI but the assembler requires
+ the immediate values of operand 3 to be a multiple of 16 */
stp x8, x9, [sp, #-(80+8*16)]!
cfi_adjust_cfa_offset (80+8*16)
cfi_rel_offset (x8, 0)
Stack frame layout:
[sp, #...] lr
[sp, #...] &PLTGOT[n]
- [sp, #96] La_aarch64_regs
+ [sp, #256] La_aarch64_regs
[sp, #48] La_aarch64_retval
[sp, #40] frame size return from pltenter
[sp, #32] dl_profile_call saved x1
stp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
-
- stp d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
- cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
- stp d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
- cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
- cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
- stp d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
- cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
- stp d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
- cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
- cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0]
+ cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0)
+ /* Note 8 bytes of padding is in the stack frame for alignment */
+
+ stp q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+ cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0)
+ cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16)
+ stp q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1]
+ cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0)
+ cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16)
+ stp q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+ cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0)
+ cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16)
+ stp q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
+ cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0)
+ cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16)
+
+ /* No APCS extension supported. */
+ str xzr, [X29, #OFFSET_RG + DL_OFFSET_RG_VPCS]
add x0, x29, #SF_SIZE + 16
ldr x1, [x29, #OFFSET_LR]
ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
cfi_def_cfa_register (sp)
ldp x29, x30, [x29, #0]
ldp x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
ldp x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
ldp x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
- ldp d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
- ldp d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
- ldp d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+ ldr x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+ ldp q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+ ldp q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+ ldp q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+ ldp q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
blr ip0
- stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
- stp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
- stp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+ stp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+ stp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+ stp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+ stp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+ str x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+ stp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+ stp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+ stp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+ stp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
+ str xzr, [X29, #OFFSET_RV + DL_OFFSET_RG_VPCS]
/* Setup call to pltexit */
ldp x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
add x2, x29, #OFFSET_RG
add x3, x29, #OFFSET_RV
- bl _dl_call_pltexit
+ bl _dl_audit_pltexit
+
+ ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+ ldp x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+ ldp x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+ ldp x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+ ldr x8, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4]
+ ldp q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+ ldp q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+ ldp q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+ ldp q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
- ldp x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
- ldp d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
- ldp d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
/* LR from within La_aarch64_reg */
ldr lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
cfi_restore(lr)
|| IS_NEOVERSE_V1 (midr)
? __memcpy_simd
# if HAVE_AARCH64_SVE_ASM
- : (IS_A64FX (midr)
+ : (IS_A64FX (midr) && sve
? __memcpy_a64fx
: __memcpy_generic))))));
# else
|| IS_NEOVERSE_V1 (midr)
? __memmove_simd
# if HAVE_AARCH64_SVE_ASM
- : (IS_A64FX (midr)
+ : (IS_A64FX (midr) && sve
? __memmove_a64fx
: __memmove_generic))))));
# else
: (IS_EMAG (midr) && zva_size == 64
? __memset_emag
# if HAVE_AARCH64_SVE_ASM
- : (IS_A64FX (midr)
+ : (IS_A64FX (midr) && sve
? __memset_a64fx
: __memset_generic))));
# else
--- /dev/null
+/* Check LD_AUDIT for aarch64 ABI specifics.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <string.h>
+#include <support/check.h>
+#include "tst-audit26mod.h"
+
+int
+do_test (void)
+{
+ /* Returning a large struct uses 'x8' as indirect result location. */
+ struct large_struct r = tst_audit26_func (ARG1, ARG2, ARG3);
+
+ struct large_struct e = set_large_struct (ARG1, ARG2, ARG3);
+
+ TEST_COMPARE_BLOB (r.a, sizeof (r.a), e.a, sizeof (e.a));
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check LD_AUDIT for aarch64 ABI specifics.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include "tst-audit26mod.h"
+
+struct large_struct
+tst_audit26_func (char a, short b, long int c)
+{
+ if (a != ARG1)
+ abort ();
+ if (b != ARG2)
+ abort ();
+ if (c != ARG3)
+ abort ();
+
+ return set_large_struct (a, b, c);
+}
--- /dev/null
+/* Check LD_AUDIT for aarch64 specific ABI.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _TST_AUDIT27MOD_H
+#define _TST_AUDIT27MOD_H 1
+
+#include <array_length.h>
+
+struct large_struct
+{
+ char a[16];
+ short b[8];
+ long int c[4];
+};
+
+static inline struct large_struct
+set_large_struct (char a, short b, long int c)
+{
+ struct large_struct r;
+ for (int i = 0; i < array_length (r.a); i++)
+ r.a[i] = a;
+ for (int i = 0; i < array_length (r.b); i++)
+ r.b[i] = b;
+ for (int i = 0; i < array_length (r.c); i++)
+ r.c[i] = c;
+ return r;
+}
+
+#define ARG1 0x12
+#define ARG2 0x1234
+#define ARG3 0x12345678
+
+struct large_struct tst_audit26_func (char a, short b, long int c);
+
+#endif
--- /dev/null
+/* Check LD_AUDIT for aarch64 ABI specifics.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <string.h>
+#include <support/check.h>
+#include "tst-audit27mod.h"
+
+int
+do_test (void)
+{
+ {
+ float r = tst_audit27_func_float (FUNC_FLOAT_ARG0, FUNC_FLOAT_ARG1,
+ FUNC_FLOAT_ARG2, FUNC_FLOAT_ARG3,
+ FUNC_FLOAT_ARG4, FUNC_FLOAT_ARG5,
+ FUNC_FLOAT_ARG6, FUNC_FLOAT_ARG7);
+ if (r != FUNC_FLOAT_RET)
+ FAIL_EXIT1 ("tst_audit27_func_float() returned %a, expected %a",
+ r, FUNC_FLOAT_RET);
+ }
+
+ {
+ double r = tst_audit27_func_double (FUNC_DOUBLE_ARG0, FUNC_DOUBLE_ARG1,
+ FUNC_DOUBLE_ARG2, FUNC_DOUBLE_ARG3,
+ FUNC_DOUBLE_ARG4, FUNC_DOUBLE_ARG5,
+ FUNC_DOUBLE_ARG6, FUNC_DOUBLE_ARG7);
+ if (r != FUNC_DOUBLE_RET)
+ FAIL_EXIT1 ("tst_audit27_func_double() returned %la, expected %la",
+ r, FUNC_DOUBLE_RET);
+ }
+
+ {
+ long double r = tst_audit27_func_ldouble (FUNC_LDOUBLE_ARG0,
+ FUNC_LDOUBLE_ARG1,
+ FUNC_LDOUBLE_ARG2,
+ FUNC_LDOUBLE_ARG3,
+ FUNC_LDOUBLE_ARG4,
+ FUNC_LDOUBLE_ARG5,
+ FUNC_LDOUBLE_ARG6,
+ FUNC_LDOUBLE_ARG7);
+ if (r != FUNC_LDOUBLE_RET)
+ FAIL_EXIT1 ("tst_audit27_func_ldouble() returned %La, expected %La",
+ r, FUNC_LDOUBLE_RET);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check LD_AUDIT for aarch64 ABI specifics.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <stdlib.h>
+#include <support/check.h>
+#include "tst-audit27mod.h"
+
+float
+tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7)
+{
+ if (a0 != FUNC_FLOAT_ARG0)
+ FAIL_EXIT1 ("a0: %a != %a", a0, FUNC_FLOAT_ARG0);
+ if (a1 != FUNC_FLOAT_ARG1)
+ FAIL_EXIT1 ("a1: %a != %a", a1, FUNC_FLOAT_ARG1);
+ if (a2 != FUNC_FLOAT_ARG2)
+ FAIL_EXIT1 ("a2: %a != %a", a2, FUNC_FLOAT_ARG2);
+ if (a3 != FUNC_FLOAT_ARG3)
+ FAIL_EXIT1 ("a3: %a != %a", a3, FUNC_FLOAT_ARG3);
+ if (a4 != FUNC_FLOAT_ARG4)
+ FAIL_EXIT1 ("a4: %a != %a", a4, FUNC_FLOAT_ARG4);
+ if (a5 != FUNC_FLOAT_ARG5)
+ FAIL_EXIT1 ("a5: %a != %a", a5, FUNC_FLOAT_ARG5);
+ if (a6 != FUNC_FLOAT_ARG6)
+ FAIL_EXIT1 ("a6: %a != %a", a6, FUNC_FLOAT_ARG6);
+ if (a7 != FUNC_FLOAT_ARG7)
+ FAIL_EXIT1 ("a7: %a != %a", a7, FUNC_FLOAT_ARG7);
+
+ return FUNC_FLOAT_RET;
+}
+
+double
+tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4,
+ double a5, double a6, double a7)
+{
+ if (a0 != FUNC_DOUBLE_ARG0)
+ FAIL_EXIT1 ("a0: %la != %la", a0, FUNC_DOUBLE_ARG0);
+ if (a1 != FUNC_DOUBLE_ARG1)
+ FAIL_EXIT1 ("a1: %la != %la", a1, FUNC_DOUBLE_ARG1);
+ if (a2 != FUNC_DOUBLE_ARG2)
+ FAIL_EXIT1 ("a2: %la != %la", a2, FUNC_DOUBLE_ARG2);
+ if (a3 != FUNC_DOUBLE_ARG3)
+ FAIL_EXIT1 ("a3: %la != %la", a3, FUNC_DOUBLE_ARG3);
+ if (a4 != FUNC_DOUBLE_ARG4)
+ FAIL_EXIT1 ("a4: %la != %la", a4, FUNC_DOUBLE_ARG4);
+ if (a5 != FUNC_DOUBLE_ARG5)
+ FAIL_EXIT1 ("a5: %la != %la", a5, FUNC_DOUBLE_ARG5);
+ if (a6 != FUNC_DOUBLE_ARG6)
+ FAIL_EXIT1 ("a6: %la != %la", a6, FUNC_DOUBLE_ARG6);
+ if (a7 != FUNC_DOUBLE_ARG7)
+ FAIL_EXIT1 ("a7: %la != %la", a7, FUNC_DOUBLE_ARG7);
+
+ return FUNC_DOUBLE_RET;
+}
+
+long double
+tst_audit27_func_ldouble (long double a0, long double a1, long double a2,
+ long double a3, long double a4, long double a5,
+ long double a6, long double a7)
+{
+ if (a0 != FUNC_LDOUBLE_ARG0)
+ FAIL_EXIT1 ("a0: %La != %La", a0, FUNC_LDOUBLE_ARG0);
+ if (a1 != FUNC_LDOUBLE_ARG1)
+ FAIL_EXIT1 ("a1: %La != %La", a1, FUNC_LDOUBLE_ARG1);
+ if (a2 != FUNC_LDOUBLE_ARG2)
+ FAIL_EXIT1 ("a2: %La != %La", a2, FUNC_LDOUBLE_ARG2);
+ if (a3 != FUNC_LDOUBLE_ARG3)
+ FAIL_EXIT1 ("a3: %La != %La", a3, FUNC_LDOUBLE_ARG3);
+ if (a4 != FUNC_LDOUBLE_ARG4)
+ FAIL_EXIT1 ("a4: %La != %La", a4, FUNC_LDOUBLE_ARG4);
+ if (a5 != FUNC_LDOUBLE_ARG5)
+ FAIL_EXIT1 ("a5: %La != %La", a5, FUNC_LDOUBLE_ARG5);
+ if (a6 != FUNC_LDOUBLE_ARG6)
+ FAIL_EXIT1 ("a6: %La != %La", a6, FUNC_LDOUBLE_ARG6);
+ if (a7 != FUNC_LDOUBLE_ARG7)
+ FAIL_EXIT1 ("a7: %La != %La", a7, FUNC_LDOUBLE_ARG7);
+
+ return FUNC_LDOUBLE_RET;
+}
--- /dev/null
+/* Check LD_AUDIT for aarch64 specific ABI.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _TST_AUDIT27MOD_H
+#define _TST_AUDIT27MOD_H 1
+
+#include <float.h>
+
+#define FUNC_FLOAT_ARG0 FLT_MIN
+#define FUNC_FLOAT_ARG1 FLT_MAX
+#define FUNC_FLOAT_ARG2 FLT_EPSILON
+#define FUNC_FLOAT_ARG3 FLT_TRUE_MIN
+#define FUNC_FLOAT_ARG4 0.0f
+#define FUNC_FLOAT_ARG5 1.0f
+#define FUNC_FLOAT_ARG6 2.0f
+#define FUNC_FLOAT_ARG7 3.0f
+#define FUNC_FLOAT_RET 4.0f
+
+float
+tst_audit27_func_float (float a0, float a1, float a2, float a3, float a4,
+ float a5, float a6, float a7);
+
+#define FUNC_DOUBLE_ARG0 DBL_MIN
+#define FUNC_DOUBLE_ARG1 DBL_MAX
+#define FUNC_DOUBLE_ARG2 DBL_EPSILON
+#define FUNC_DOUBLE_ARG3 DBL_TRUE_MIN
+#define FUNC_DOUBLE_ARG4 0.0
+#define FUNC_DOUBLE_ARG5 1.0
+#define FUNC_DOUBLE_ARG6 2.0
+#define FUNC_DOUBLE_ARG7 3.0
+#define FUNC_DOUBLE_RET 0x1.fffffe0000001p+127
+
+double
+tst_audit27_func_double (double a0, double a1, double a2, double a3, double a4,
+ double a5, double a6, double a7);
+
+#define FUNC_LDOUBLE_ARG0 DBL_MAX + 1.0L
+#define FUNC_LDOUBLE_ARG1 DBL_MAX + 2.0L
+#define FUNC_LDOUBLE_ARG2 DBL_MAX + 3.0L
+#define FUNC_LDOUBLE_ARG3 DBL_MAX + 4.0L
+#define FUNC_LDOUBLE_ARG4 DBL_MAX + 5.0L
+#define FUNC_LDOUBLE_ARG5 DBL_MAX + 6.0L
+#define FUNC_LDOUBLE_ARG6 DBL_MAX + 7.0L
+#define FUNC_LDOUBLE_ARG7 DBL_MAX + 8.0L
+#define FUNC_LDOUBLE_RET 0x1.fffffffffffff000000000000001p+1023L
+
+long double
+tst_audit27_func_ldouble (long double a0, long double a1, long double a2,
+ long double a3, long double a4, long double a5,
+ long double a6, long double a7);
+
+#endif
--- /dev/null
+/* Check LD_AUDIT for aarch64 specific ABI.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <link.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "tst-audit26mod.h"
+
+#define TEST_NAME "tst-audit26"
+
+#define AUDIT26_COOKIE 0
+
+unsigned int
+la_version (unsigned int v)
+{
+ return v;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? map->l_name : p + 1;
+ uintptr_t ck = -1;
+ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+ ck = AUDIT26_COOKIE;
+ *cookie = ck;
+ printf ("objopen: %ld, %s [cookie=%ld]\n", lmid, l_name, ck);
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)),
+ unsigned int ndx __attribute__ ((unused)),
+ uintptr_t *refcook, uintptr_t *defcook,
+ La_aarch64_regs *regs, unsigned int *flags,
+ const char *symname, long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ if (strcmp (symname, "tst_audit26_func") == 0)
+ {
+ assert (regs->lr_xreg[0] == ARG1);
+ assert (regs->lr_xreg[1] == ARG2);
+ assert (regs->lr_xreg[2] == ARG3);
+ }
+ else
+ abort ();
+
+ assert (regs->lr_vpcs == 0);
+
+ /* Clobber 'x8'. */
+ asm volatile ("mov x8, -1" : : : "x8");
+
+ *framesizep = 1024;
+
+ return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook,
+ const struct La_aarch64_regs *inregs,
+ struct La_aarch64_retval *outregs, const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n",
+ symname, (long int) sym->st_value, ndx);
+
+ if (strcmp (symname, "tst_audit26_func") == 0)
+ {
+ assert (inregs->lr_xreg[0] == ARG1);
+ assert (inregs->lr_xreg[1] == ARG2);
+ assert (inregs->lr_xreg[2] == ARG3);
+ }
+ else
+ abort ();
+
+ assert (inregs->lr_vpcs == 0);
+ assert (outregs->lrv_vpcs == 0);
+
+ /* Clobber 'x8'. */
+ asm volatile ("mov x8, -1" : : : "x8");
+
+ return 0;
+}
--- /dev/null
+/* Check LD_AUDIT for aarch64 specific ABI.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include <link.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "tst-audit27mod.h"
+
+#define TEST_NAME "tst-audit27"
+
+#define AUDIT27_COOKIE 0
+
+unsigned int
+la_version (unsigned int v)
+{
+ return v;
+}
+
+unsigned int
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
+{
+ const char *p = strrchr (map->l_name, '/');
+ const char *l_name = p == NULL ? map->l_name : p + 1;
+ uintptr_t ck = -1;
+ if (strncmp (l_name, TEST_NAME, strlen (TEST_NAME)) == 0)
+ ck = AUDIT27_COOKIE;
+ *cookie = ck;
+ printf ("objopen: %ld, %s [%ld]\n", lmid, l_name, ck);
+ return ck == -1 ? 0 : LA_FLG_BINDFROM | LA_FLG_BINDTO;
+}
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook, La_aarch64_regs *regs,
+ unsigned int *flags, const char *symname,
+ long int *framesizep)
+{
+ printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
+ symname, (long int) sym->st_value, ndx, *flags);
+
+ if (strcmp (symname, "tst_audit27_func_float") == 0)
+ {
+ assert (regs->lr_vreg[0].s == FUNC_FLOAT_ARG0);
+ assert (regs->lr_vreg[1].s == FUNC_FLOAT_ARG1);
+ assert (regs->lr_vreg[2].s == FUNC_FLOAT_ARG2);
+ assert (regs->lr_vreg[3].s == FUNC_FLOAT_ARG3);
+ assert (regs->lr_vreg[4].s == FUNC_FLOAT_ARG4);
+ assert (regs->lr_vreg[5].s == FUNC_FLOAT_ARG5);
+ assert (regs->lr_vreg[6].s == FUNC_FLOAT_ARG6);
+ assert (regs->lr_vreg[7].s == FUNC_FLOAT_ARG7);
+ }
+ else if (strcmp (symname, "tst_audit27_func_double") == 0)
+ {
+ assert (regs->lr_vreg[0].d == FUNC_DOUBLE_ARG0);
+ assert (regs->lr_vreg[1].d == FUNC_DOUBLE_ARG1);
+ assert (regs->lr_vreg[2].d == FUNC_DOUBLE_ARG2);
+ assert (regs->lr_vreg[3].d == FUNC_DOUBLE_ARG3);
+ assert (regs->lr_vreg[4].d == FUNC_DOUBLE_ARG4);
+ assert (regs->lr_vreg[5].d == FUNC_DOUBLE_ARG5);
+ assert (regs->lr_vreg[6].d == FUNC_DOUBLE_ARG6);
+ assert (regs->lr_vreg[7].d == FUNC_DOUBLE_ARG7);
+ }
+ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0)
+ {
+ assert (regs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0);
+ assert (regs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1);
+ assert (regs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2);
+ assert (regs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3);
+ assert (regs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4);
+ assert (regs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5);
+ assert (regs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6);
+ assert (regs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7);
+ }
+ else
+ abort ();
+
+ assert (regs->lr_vpcs == 0);
+
+ /* Clobber the q registers on exit. */
+ uint8_t v = 0xff;
+ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0");
+ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1");
+ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2");
+ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3");
+ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4");
+ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5");
+ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6");
+ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7");
+
+ *framesizep = 1024;
+
+ return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+ uintptr_t *defcook,
+ const struct La_aarch64_regs *inregs,
+ struct La_aarch64_retval *outregs,
+ const char *symname)
+{
+ printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u\n",
+ symname, (long int) sym->st_value, ndx);
+
+ if (strcmp (symname, "tst_audit27_func_float") == 0)
+ {
+ assert (inregs->lr_vreg[0].s == FUNC_FLOAT_ARG0);
+ assert (inregs->lr_vreg[1].s == FUNC_FLOAT_ARG1);
+ assert (inregs->lr_vreg[2].s == FUNC_FLOAT_ARG2);
+ assert (inregs->lr_vreg[3].s == FUNC_FLOAT_ARG3);
+ assert (inregs->lr_vreg[4].s == FUNC_FLOAT_ARG4);
+ assert (inregs->lr_vreg[5].s == FUNC_FLOAT_ARG5);
+ assert (inregs->lr_vreg[6].s == FUNC_FLOAT_ARG6);
+ assert (inregs->lr_vreg[7].s == FUNC_FLOAT_ARG7);
+
+ assert (outregs->lrv_vreg[0].s == FUNC_FLOAT_RET);
+ }
+ else if (strcmp (symname, "tst_audit27_func_double") == 0)
+ {
+ assert (inregs->lr_vreg[0].d == FUNC_DOUBLE_ARG0);
+ assert (inregs->lr_vreg[1].d == FUNC_DOUBLE_ARG1);
+ assert (inregs->lr_vreg[2].d == FUNC_DOUBLE_ARG2);
+ assert (inregs->lr_vreg[3].d == FUNC_DOUBLE_ARG3);
+ assert (inregs->lr_vreg[4].d == FUNC_DOUBLE_ARG4);
+ assert (inregs->lr_vreg[5].d == FUNC_DOUBLE_ARG5);
+ assert (inregs->lr_vreg[6].d == FUNC_DOUBLE_ARG6);
+ assert (inregs->lr_vreg[7].d == FUNC_DOUBLE_ARG7);
+
+ assert (outregs->lrv_vreg[0].d == FUNC_DOUBLE_RET);
+ }
+ else if (strcmp (symname, "tst_audit27_func_ldouble") == 0)
+ {
+ assert (inregs->lr_vreg[0].q == FUNC_LDOUBLE_ARG0);
+ assert (inregs->lr_vreg[1].q == FUNC_LDOUBLE_ARG1);
+ assert (inregs->lr_vreg[2].q == FUNC_LDOUBLE_ARG2);
+ assert (inregs->lr_vreg[3].q == FUNC_LDOUBLE_ARG3);
+ assert (inregs->lr_vreg[4].q == FUNC_LDOUBLE_ARG4);
+ assert (inregs->lr_vreg[5].q == FUNC_LDOUBLE_ARG5);
+ assert (inregs->lr_vreg[6].q == FUNC_LDOUBLE_ARG6);
+ assert (inregs->lr_vreg[7].q == FUNC_LDOUBLE_ARG7);
+
+ assert (outregs->lrv_vreg[0].q == FUNC_LDOUBLE_RET);
+ }
+ else
+ abort ();
+
+ assert (inregs->lr_vpcs == 0);
+ assert (outregs->lrv_vpcs == 0);
+
+ /* Clobber the q registers on exit. */
+ uint8_t v = 0xff;
+ asm volatile ("dup v0.8b, %w0" : : "r" (v) : "v0");
+ asm volatile ("dup v1.8b, %w0" : : "r" (v) : "v1");
+ asm volatile ("dup v2.8b, %w0" : : "r" (v) : "v2");
+ asm volatile ("dup v3.8b, %w0" : : "r" (v) : "v3");
+ asm volatile ("dup v4.8b, %w0" : : "r" (v) : "v4");
+ asm volatile ("dup v5.8b, %w0" : : "r" (v) : "v5");
+ asm volatile ("dup v6.8b, %w0" : : "r" (v) : "v6");
+ asm volatile ("dup v7.8b, %w0" : : "r" (v) : "v7");
+
+ return 0;
+}
#define ELF_MACHINE_NAME "alpha"
#include <string.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Mask identifying addresses reserved for the user program,
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int
-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern char _dl_runtime_resolve_new[] attribute_hidden;
extern char _dl_runtime_profile_new[] attribute_hidden;
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_ALPHA_JMP_SLOT
-/* The alpha never uses Elf64_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization functions. This is called very early in
* _dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
const Elf64_Rela *reloc,
const Elf64_Sym *sym,
const struct r_found_version *version,
return;
else
{
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf64_Addr sym_value;
Elf64_Addr sym_raw_value;
can be skipped. */
#define ELF_MACHINE_REL_RELATIVE 1
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
void *const reloc_addr_arg)
memcpy (reloc_addr_arg, &reloc_addr_val, 8);
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf64_Addr l_addr, const Elf64_Rela *reloc,
int skip_ifunc)
{
jsr $26, ($27), 0
ldgp $29, 0($26)
- /* Set up for call to _dl_call_pltexit. */
+ /* Set up for call to _dl_audit_pltexit. */
ldq $16, 16*8($15)
ldq $17, 17*8($15)
stq $0, 16*8($15)
lda $19, 16*8($15)
stt $f0, 18*8($15)
stt $f1, 19*8($15)
- bsr $26, _dl_call_pltexit !samegp
+ bsr $26, _dl_audit_pltexit !samegp
mov $15, $30
cfi_def_cfa_register (30)
jsr $26, ($27), 0
ldgp $29, 0($26)
- /* Set up for call to _dl_call_pltexit. */
+ /* Set up for call to _dl_audit_pltexit. */
ldq $16, 48*8($15)
ldq $17, 49*8($15)
stq $0, 46*8($15)
lda $19, 46*8($15)
stt $f0, 48*8($15)
stt $f1, 49*8($15)
- bsr $26, _dl_call_pltexit !samegp
+ bsr $26, _dl_audit_pltexit !samegp
mov $15, $30
cfi_def_cfa_register (30)
#include <string.h>
#include <link.h>
#include <dl-tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Dynamic Linking ABI for ARCv2 ISA.
static inline int
__attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (void);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_ARC_JUMP_SLOT
-/* ARC uses Rela relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Fixup a PLT entry to bounce directly to the function at VALUE. */
static inline ElfW(Addr)
#ifdef RESOLVE_MAP
-inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
ElfW(Addr) r_info = reloc->r_info;
else
{
const ElfW(Sym) *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
switch (r_type)
}
}
-inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr_arg)
*reloc_addr += l_addr;
}
-inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr,
- const ElfW(Rela) *reloc, int skip_ifunc)
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+ ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
+ int skip_ifunc)
{
ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset);
const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
--- /dev/null
+/* ELF dynamic relocation type supported by the architecture. ARM version.
+ Copyright (C) 2001-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_MACHINE_REL_H
+#define _DL_MACHINE_REL_H
+
+/* ARM never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP
+#define ELF_MACHINE_NO_REL 0
+
+/* ARM never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_PLT_REL 1
+
+#define PLTREL ElfW(Rel)
+
+#endif
#define ELF_MACHINE_NAME "ARM"
+#include <assert.h>
#include <sys/param.h>
#include <tls.h>
#include <dl-tlsdesc.h>
#include <dl-irel.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
#ifndef CLEAR_CACHE
# error CLEAR_CACHE definition required to handle TEXTREL
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_ARM_JUMP_SLOT
-/* ARM never uses Elf32_Rela relocations for the dynamic linker.
- Prelinked libraries may use Elf32_Rela though. */
-#define ELF_MACHINE_PLT_REL 1
-
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
#endif /* !dl_machine_h */
-/* ARM never uses Elf32_Rela relocations for the dynamic linker.
- Prelinked libraries may use Elf32_Rela though. */
-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP
-#define ELF_MACHINE_NO_REL 0
-
/* Names of the architecture-specific auditing callback functions. */
#define ARCH_LA_PLTENTER arm_gnu_pltenter
#define ARCH_LA_PLTEXIT arm_gnu_pltexit
#ifdef RESOLVE_MAP
/* Handle a PC24 reloc, including the out-of-range case. */
-auto void
+static void
relocate_pc24 (struct link_map *map, Elf32_Addr value,
Elf32_Addr *const reloc_addr, Elf32_Sword addend)
{
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rel *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
#endif
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
# ifndef RTLD_BOOTSTRAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
# ifndef RESOLVE_CONFLICT_FIND_MAP
const Elf32_Sym *const refsym = sym;
# endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
# endif
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc,
void *const reloc_addr_arg)
}
# ifndef RTLD_BOOTSTRAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
}
# endif
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rel *reloc,
int skip_ifunc)
{
/* Always initialize TLS descriptors completely, because lazy
initialization requires synchronization at every TLS access. */
- elf_machine_rel (map, reloc, sym, version, reloc_addr, skip_ifunc);
+ elf_machine_rel (map, scope, reloc, sym, version, reloc_addr, skip_ifunc);
}
else
_dl_reloc_bad_type (map, r_type, 1);
ldmia ip, {r0,r1}
add r2, r7, #72
add r3, r7, #0
- bl _dl_call_pltexit
+ bl _dl_audit_pltexit
@ Return to caller.
ldmia r7, {r0-r3}
#include <sys/param.h>
#include <sysdep.h>
#include <dl-tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero if ELF header is compatible with the running host. */
static inline int
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_CKCORE_JUMP_SLOT
-/* C-SKY never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void __attribute__ ((unused, always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+static inline void __attribute__ ((unused, always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
else
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
opcode16_addr = (unsigned short *)reloc_addr;
}
}
-auto inline void __attribute__ ((unused, always_inline))
+static inline void __attribute__ ((unused, always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
{
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void __attribute__ ((unused, always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__ ((unused, always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
--- /dev/null
+/* rtld-audit version check. Generic version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+static inline bool
+_dl_audit_check_version (unsigned int lav)
+{
+ return lav <= LAV_CURRENT;
+}
--- /dev/null
+/* ABI specifics for lazy resolution functions.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_FIXUP_ATTRIBUTE_H
+#define _DL_FIXUP_ATTRIBUTE_H
+
+#define DL_ARCH_FIXUP_ATTRIBUTE
+
+#endif
#define DL_FIXUP_VALUE_CODE_ADDR(value) (value)
#define DL_FIXUP_VALUE_ADDR(value) (value)
#define DL_FIXUP_ADDR_VALUE(addr) (addr)
+#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr)
+#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \
+ (*value) = st_value;
--- /dev/null
+/* ELF dynamic relocation type supported by the architecture.
+ Copyright (C) 2001-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_MACHINE_REL_H
+#define _DL_MACHINE_REL_H
+
+/* Defined if the architecture supports Elf{32,64}_Rel relocations. */
+#define ELF_MACHINE_NO_REL 1
+/* Defined if the architecture supports Elf{32,64}_Rela relocations. */
+#define ELF_MACHINE_NO_RELA 0
+/* Used to calculate the index of link_map l_reloc_result. */
+#define PLTREL ElfW(Rela)
+
+#endif
#include <string.h>
#include <link.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
--- /dev/null
+/* Check if dynamic section should be relocated. Generic version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_RELOCATE_LD_H
+#define _DL_RELOCATE_LD_H
+
+/* The dynamic section is writable. */
+#define DL_RO_DYN_SECTION 0
+
+#endif /* _DL_RELOCATE_LD_H */
#include <link.h>
#include <dl-lookupcfg.h>
#include <dl-sysdep.h>
+#include <dl-fixup-attribute.h>
#include <libc-lock.h>
#include <hp-timing.h>
#include <tls.h>
`ElfW(TYPE)' is used in place of `Elf32_TYPE' or `Elf64_TYPE'. */
#define ELFW(type) _ElfW (ELF, __ELF_NATIVE_CLASS, type)
+/* Return true if dynamic section in the shared library L should be
+ relocated. */
+
+static inline bool
+dl_relocate_ld (const struct link_map *l)
+{
+ /* Don't relocate dynamic section if it is readonly */
+ return !(l->l_ld_readonly || DL_RO_DYN_SECTION);
+}
+
/* All references to the value of l_info[DT_PLTGOT],
l_info[DT_STRTAB], l_info[DT_SYMTAB], l_info[DT_RELA],
l_info[DT_REL], l_info[DT_JMPREL], and l_info[VERSYMIDX (DT_VERSYM)]
have to be accessed via the D_PTR macro. The macro is needed since for
most architectures the entry is already relocated - but for some not
and we need to relocate at access time. */
-#ifdef DL_RO_DYN_SECTION
-# define D_PTR(map, i) ((map)->i->d_un.d_ptr + (map)->l_addr)
-#else
-# define D_PTR(map, i) (map)->i->d_un.d_ptr
-#endif
+#define D_PTR(map, i) \
+ ((map)->i->d_un.d_ptr + (dl_relocate_ld (map) ? 0 : (map)->l_addr))
/* Result of the lookup functions and how to retrieve the base address. */
typedef struct link_map *lookup_t;
list of loaded objects while an object is added to or removed
from that list. */
__rtld_lock_define_recursive (EXTERN, _dl_load_write_lock)
+ /* This lock protects global and module specific TLS related data.
+ E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
+ GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
+ accessed and when TLS related relocations are processed for a
+ module. It was introduced to keep pthread_create accessing TLS
+ state that is being set up. */
+ __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock)
/* Incremented whenever something may have been added to dl_loaded. */
EXTERN unsigned long long _dl_load_adds;
# endif
#endif
+/* Perform early memory allocation, avoding a TCB dependency.
+ Terminate the process if allocation fails. May attempt to use
+ brk. */
+void *_dl_early_allocate (size_t size) attribute_hidden;
+
/* Initialization of libpthread for statically linked applications.
If libpthread is not linked in, this is an empty function. */
void __pthread_initialize_minimal (void) weak_function;
/* These are internal entry points to the two halves of _dl_allocate_tls,
only used within rtld.c itself at startup time. */
extern void *_dl_allocate_tls_storage (void) attribute_hidden;
-extern void *_dl_allocate_tls_init (void *);
+extern void *_dl_allocate_tls_init (void *, bool);
rtld_hidden_proto (_dl_allocate_tls_init)
/* Deallocate memory allocated with _dl_allocate_tls. */
/* Add module to slot information data. If DO_ADD is false, only the
required memory is allocated. Must be called with GL
- (dl_load_lock) acquired. If the function has already been called
+ (dl_load_tls_lock) acquired. If the function has already been called
for the link map L with !do_add, then this function will not raise
an exception, otherwise it is possible that it encounters a memory
allocation failure. */
return &base[index];
}
}
+
+/* Call the la_objsearch from the audit modules from the link map L. If
+ ORIGNAME is non NULL, it is updated with the revious name prior calling
+ la_objsearch. */
+const char *_dl_audit_objsearch (const char *name, struct link_map *l,
+ unsigned int code)
+ attribute_hidden;
+
+/* Call the la_activity from the audit modules from the link map L and issues
+ the ACTION argument. */
+void _dl_audit_activity_map (struct link_map *l, int action)
+ attribute_hidden;
+
+/* Call the la_activity from the audit modules from the link map from the
+ namespace NSID and issues the ACTION argument. */
+void _dl_audit_activity_nsid (Lmid_t nsid, int action)
+ attribute_hidden;
+
+/* Call the la_objopen from the audit modules for the link_map L on the
+ namespace identification NSID. */
+void _dl_audit_objopen (struct link_map *l, Lmid_t nsid)
+ attribute_hidden;
+
+/* Call the la_objclose from the audit modules for the link_map L. */
+void _dl_audit_objclose (struct link_map *l)
+ attribute_hidden;
+
+/* Call the la_preinit from the audit modules for the link_map L. */
+void _dl_audit_preinit (struct link_map *l);
+
+/* Call the la_symbind{32,64} from the audit modules for the link_map L. If
+ RELOC_RESULT is NULL it assumes the symbol to be bind-now and will set
+ the flags with LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT prior calling
+ la_symbind{32,64}. */
+void _dl_audit_symbind (struct link_map *l, struct reloc_result *reloc_result,
+ const ElfW(Sym) *defsym, DL_FIXUP_VALUE_TYPE *value,
+ lookup_t result)
+ attribute_hidden;
+/* Same as _dl_audit_symbind, but also sets LA_SYMB_DLSYM flag. */
+void _dl_audit_symbind_alt (struct link_map *l, const ElfW(Sym) *ref,
+ void **value, lookup_t result);
+rtld_hidden_proto (_dl_audit_symbind_alt)
+void _dl_audit_pltenter (struct link_map *l, struct reloc_result *reloc_result,
+ DL_FIXUP_VALUE_TYPE *value, void *regs,
+ long int *framesize)
+ attribute_hidden;
+void DL_ARCH_FIXUP_ATTRIBUTE _dl_audit_pltexit (struct link_map *l,
+ ElfW(Word) reloc_arg,
+ const void *inregs,
+ void *outregs)
+ attribute_hidden;
#endif /* SHARED */
#if PTHREAD_IN_LIBC && defined SHARED
/* Use macro instead of inline function to avoid including <stdio.h>. */
#define _startup_fatal(message) __libc_fatal ((message))
-
-static inline uid_t
-startup_getuid (void)
-{
- return __getuid ();
-}
-
-static inline uid_t
-startup_geteuid (void)
-{
- return __geteuid ();
-}
-
-static inline gid_t
-startup_getgid (void)
-{
- return __getgid ();
-}
-
-static inline gid_t
-startup_getegid (void)
-{
- return __getegid ();
-}
#include <ldsodefs.h>
#include <elf/dynamic-link.h>
#include <dl-fptr.h>
+#include <dl-runtime.h>
#include <dl-unmap-segments.h>
#include <atomic.h>
#include <libc-pointer-arith.h>
{
ElfW(Addr) addr = (ElfW(Addr)) address;
ElfW(Word) reloc_arg;
- volatile unsigned int *desc;
- unsigned int *gptr;
+ unsigned int *desc, *gptr;
/* Return ADDR if the least-significant two bits of ADDR are not consistent
with ADDR being a linker defined function pointer. The normal value for
a code address in a backtrace is 3. */
- if (((unsigned int) addr & 3) != 2)
+ if (((uintptr_t) addr & 3) != 2)
return addr;
/* Handle special case where ADDR points to page 0. */
- if ((unsigned int) addr < 4096)
+ if ((uintptr_t) addr < 4096)
return addr;
/* Clear least-significant two bits from descriptor address. */
- desc = (unsigned int *) ((unsigned int) addr & ~3);
+ desc = (unsigned int *) ((uintptr_t) addr & ~3);
if (!_dl_read_access_allowed (desc))
return addr;
/* Then load first word of candidate descriptor. It should be a pointer
with word alignment and point to memory that can be read. */
gptr = (unsigned int *) desc[0];
- if (((unsigned int) gptr & 3) != 0
+ if (((uintptr_t) gptr & 3) != 0
|| !_dl_read_access_allowed (gptr))
return addr;
/* If gp has been resolved, we need to hunt for relocation offset. */
if (!(reloc_arg & PA_GP_RELOC))
- reloc_arg = _dl_fix_reloc_arg (addr, l);
+ reloc_arg = _dl_fix_reloc_arg ((struct fdesc *) addr, l);
_dl_fixup (l, reloc_arg);
}
return (ElfW(Addr)) desc[0];
}
+rtld_hidden_def (_dl_lookup_address)
#define DL_SYMBOL_ADDRESS(map, ref) _dl_symbol_address(map, ref)
Elf32_Addr _dl_lookup_address (const void *address);
+rtld_hidden_proto (_dl_lookup_address)
#define DL_LOOKUP_ADDRESS(addr) _dl_lookup_address ((const void *) addr)
/* Extract the code address from a fixup value */
#define DL_FIXUP_VALUE_CODE_ADDR(value) ((value).ip)
#define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value))
-#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr))
+/* Clear the plabel bit to get the actual address of the descriptor. */
+#define DL_FIXUP_ADDR_VALUE(addr) \
+ (*(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (addr) & ~2))
+#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr)
+#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \
+ *(value) = *(DL_FIXUP_VALUE_TYPE *) ((uintptr_t) (new_value) & ~2)
#include <string.h>
#include <link.h>
#include <errno.h>
+#include <ldsodefs.h>
#include <dl-fptr.h>
#include <abort-instr.h>
#include <tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* These two definitions must match the definition of the stub in
bfd/elf32-hppa.c (see plt_stub[]).
map->l_mach.fptr_table = boot_table;
}
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \
- __hppa_init_bootstrap_fdesc_table (BOOTSTRAP_MAP); \
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \
+ __hppa_init_bootstrap_fdesc_table (map); \
_dl_fptr_init();
/* Return nonzero iff ELF header is compatible with the running host. */
return (struct fdesc) { value.ip + reloc->r_addend, value.gp };
}
+static inline struct link_map *
+elf_machine_main_map (void)
+{
+ struct link_map *main_map;
+
+#if defined SHARED && IS_IN (rtld)
+ asm (
+" bl 1f,%0\n"
+" addil L'_rtld_local - ($PIC_pcrel$0 - 1),%0\n"
+"1: ldw R'_rtld_local - ($PIC_pcrel$0 - 5)(%%r1),%0\n"
+ : "=r" (main_map) : : "r1");
+#else
+ main_map = NULL;
+#endif
+
+ return main_map;
+}
+
/* Set up the loaded object described by L so its unrelocated PLT
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got = NULL;
Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type, r_sym;
Elf32_Addr i[2];
} sig = {{0x00,0xc0,0xff,0xee, 0xde,0xad,0xbe,0xef}};
+ /* Initialize dp register for main executable. */
+ if (l == elf_machine_main_map ())
+ {
+ register Elf32_Addr dp asm ("%r27");
+
+ dp = D_PTR (l, l_info[DT_PLTGOT]);
+ asm volatile ("" : : "r" (dp));
+ }
+
/* If we don't have a PLT we can just skip all this... */
if (__builtin_expect (l->l_info[DT_JMPREL] == NULL,0))
return lazy;
its return value is the user program's entry point. */
#define RTLD_START \
-/* Set up dp for any non-PIC lib constructors that may be called. */ \
-static struct link_map * __attribute__((used)) \
-set_dp (struct link_map *map) \
-{ \
- register Elf32_Addr dp asm ("%r27"); \
- dp = D_PTR (map, l_info[DT_PLTGOT]); \
- asm volatile ("" : : "r" (dp)); \
- return map; \
-} \
- \
asm ( \
" .text\n" \
" .globl _start\n" \
"_start:\n" \
/* The kernel does not give us an initial stack frame. */ \
" ldo 64(%sp),%sp\n" \
- /* Save the relevant arguments (yes, those are the correct \
- registers, the kernel is weird) in their stack slots. */ \
-" stw %r25,-40(%sp)\n" /* argc */ \
-" stw %r24,-44(%sp)\n" /* argv */ \
\
/* We need the LTP, and we need it now. \
$PIC_pcrel$0 points 8 bytes past the current instruction, \
So, obviously, we can't just pass %sp to _dl_start. That's \
okay, argv-4 will do just fine. \
\
- The pleasant part of this is that if we need to skip \
- arguments we can just decrement argc and move argv, because \
- the stack pointer is utterly unrelated to the location of \
- the environment and argument vectors. */ \
- \
- /* This is always within range so we'll be okay. */ \
+ This is always within range so we'll be okay. */ \
" bl _dl_start,%rp\n" \
" ldo -4(%r24),%r26\n" \
\
/* Save the entry point in %r3. */ \
" copy %ret0,%r3\n" \
\
- /* See if we were called as a command with the executable file \
- name as an extra leading argument. */ \
-" addil LT'_dl_skip_args,%r19\n" \
-" ldw RT'_dl_skip_args(%r1),%r20\n" \
-" ldw 0(%r20),%r20\n" \
+ /* The loader adjusts argc, argv, env, and the aux vectors \
+ directly on the stack to remove any arguments used for \
+ direct loader invocation. Thus, argc and argv must be \
+ reloaded from from _dl_argc and _dl_argv. */ \
\
-" ldw -40(%sp),%r25\n" /* argc */ \
-" comib,= 0,%r20,.Lnofix\n" /* FIXME: Mispredicted branch */\
-" ldw -44(%sp),%r24\n" /* argv (delay slot) */ \
- \
-" sub %r25,%r20,%r25\n" \
+ /* Load argc from _dl_argc. */ \
+" addil LT'_dl_argc,%r19\n" \
+" ldw RT'_dl_argc(%r1),%r20\n" \
+" ldw 0(%r20),%r25\n" \
" stw %r25,-40(%sp)\n" \
-" sh2add %r20,%r24,%r24\n" \
+ \
+ /* Same for argv with _dl_argv. */ \
+" addil LT'_dl_argv,%r19\n" \
+" ldw RT'_dl_argv(%r1),%r20\n" \
+" ldw 0(%r20),%r24\n" \
" stw %r24,-44(%sp)\n" \
\
-".Lnofix:\n" \
+ /* Call _dl_init(main_map, argc, argv, envp). */ \
" addil LT'_rtld_local,%r19\n" \
" ldw RT'_rtld_local(%r1),%r26\n" \
-" bl set_dp, %r2\n" \
" ldw 0(%r26),%r26\n" \
\
- /* Call _dl_init(_dl_loaded, argc, argv, envp). */ \
-" copy %r28,%r26\n" \
- \
/* envp = argv + argc + 1 */ \
" sh2add %r25,%r24,%r23\n" \
" bl _dl_init,%r2\n" \
#define ELF_MACHINE_JMP_SLOT R_PARISC_IPLT
#define ELF_MACHINE_SIZEOF_JMP_SLOT PLT_ENTRY_SIZE
-/* We only use RELA. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Return the address of the entry point. */
#define ELF_MACHINE_START_ADDRESS(map, start) \
({ \
( (((as14) & 0x1fff) << 1) \
| (((as14) & 0x2000) >> 13))
-auto void __attribute__((always_inline))
-elf_machine_rela (struct link_map *map,
+static void __attribute__((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
const Elf32_Rela *reloc,
const Elf32_Sym *sym,
const struct r_found_version *version,
zeros, and an all zero Elf32_Sym has a binding of STB_LOCAL.)
See RESOLVE_MAP definition in elf/dl-reloc.c */
# ifdef RTLD_BOOTSTRAP
- /* RESOLVE_MAP in rtld.c doesn't have the local sym test. */
- sym_map = (ELF32_ST_BIND (sym->st_info) != STB_LOCAL
- ? RESOLVE_MAP (&sym, version, r_type) : map);
+ sym_map = map;
# else
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
# endif
if (sym_map)
/* hppa doesn't have an R_PARISC_RELATIVE reloc, but uses relocs with
ELF32_R_SYM (info) == 0 for a similar purpose. */
-auto void __attribute__((always_inline))
+static void __attribute__((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr,
const Elf32_Rela *reloc,
void *const reloc_addr_arg)
*reloc_addr = value;
}
-auto void __attribute__((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static void __attribute__((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
return that to the caller. The caller will continue on to call
_dl_fixup with the relocation offset. */
-ElfW(Word)
-attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
+ElfW(Word) __attribute ((noinline)) DL_ARCH_FIXUP_ATTRIBUTE
_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l)
{
Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type;
ABORT_INSTRUCTION;
return 0;
}
+rtld_hidden_def (_dl_fix_reloc_arg)
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+ElfW(Word) _dl_fix_reloc_arg (struct fdesc *, struct link_map *);
+rtld_hidden_proto (_dl_fix_reloc_arg)
+
/* Clear PA_GP_RELOC bit in relocation offset. */
static inline uintptr_t
reloc_offset (uintptr_t plt0, uintptr_t pltn)
ldw -4(%sp),%r1
copy %r1, %sp
- /* Arguments to _dl_call_pltexit */
+ /* Arguments to _dl_audit_pltexit */
ldw -116(%sp), %r26 /* (1) got[1] == struct link_map */
ldw -120(%sp), %r25 /* (2) reloc offsets */
ldo -56(%sp), %r24 /* (3) *La_hppa_regs */
ldo -128(%sp), %r1
fstd %fr4,0(%r1)
- /* Call _dl_call_pltexit */
- bl _dl_call_pltexit,%rp
+ /* Call _dl_audit_pltexit */
+ bl _dl_audit_pltexit,%rp
nop
/* Restore *La_hppa_retval */
ldouble: 1
Function: "tgamma_downward":
-double: 8
+double: 9
float: 7
Function: "tgamma_towardzero":
--- /dev/null
+/* ABI specifics for lazy resolution functions. i386 version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_FIXUP_ATTRIBUTE_H
+#define _DL_FIXUP_ATTRIBUTE_H
+
+/* We cannot use this scheme for profiling because the _mcount call destroys
+ the passed register information. */
+#ifndef PROF
+# define DL_ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused))
+#else
+# define DL_ARCH_FIXUP_ATTRIBUTE
+#endif
+
+#endif
--- /dev/null
+/* ELF dynamic relocation type supported by the architecture. ARM version.
+ Copyright (C) 2001-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_MACHINE_REL_H
+#define _DL_MACHINE_REL_H
+
+/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP
+#define ELF_MACHINE_NO_REL 0
+
+/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
+ Prelinked libraries may use Elf32_Rela though. */
+#define ELF_MACHINE_PLT_REL 1
+
+#define PLTREL ElfW(Rel)
+
+#endif
#define ELF_MACHINE_NAME "i386"
+#include <assert.h>
#include <sys/param.h>
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused, always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word) attribute_hidden;
return lazy;
}
-#ifdef IN_DL_RUNTIME
-
-# ifndef PROF
-/* We add a declaration of this function here so that in dl-runtime.c
- the ELF_MACHINE_RUNTIME_TRAMPOLINE macro really can pass the parameters
- in registers.
-
- We cannot use this scheme for profiling because the _mcount call
- destroys the passed register information. */
-#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused))
-
-extern ElfW(Addr) _dl_fixup (struct link_map *l,
- ElfW(Word) reloc_offset)
- ARCH_FIXUP_ATTRIBUTE;
-extern ElfW(Addr) _dl_profile_fixup (struct link_map *l,
- ElfW(Word) reloc_offset,
- ElfW(Addr) retaddr, void *regs,
- long int *framesizep)
- ARCH_FIXUP_ATTRIBUTE;
-# endif
-
-#endif
-
/* Mask identifying addresses reserved for the user program,
where the dynamic linker should not map anything. */
#define ELF_MACHINE_USER_ADDRESS_MASK 0xf0000000UL
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_386_JMP_SLOT
-/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
- Prelinked libraries may use Elf32_Rela though. */
-#define ELF_MACHINE_PLT_REL 1
-
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
#endif /* !dl_machine_h */
-/* The i386 never uses Elf32_Rela relocations for the dynamic linker.
- Prelinked libraries may use Elf32_Rela though. */
-#define ELF_MACHINE_NO_RELA defined RTLD_BOOTSTRAP
-#define ELF_MACHINE_NO_REL 0
-
#ifdef RESOLVE_MAP
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute ((always_inline))
-elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
+elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rel *reloc,
const Elf32_Sym *sym, const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
# ifndef RTLD_BOOTSTRAP
const Elf32_Sym *const refsym = sym;
# endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
# ifndef RTLD_BOOTSTRAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
# ifndef RESOLVE_CONFLICT_FIND_MAP
const Elf32_Sym *const refsym = sym;
# endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
# endif /* !RTLD_BOOTSTRAP */
-auto inline void
+static inline void
__attribute ((always_inline))
elf_machine_rel_relative (Elf32_Addr l_addr, const Elf32_Rel *reloc,
void *const reloc_addr_arg)
}
# ifndef RTLD_BOOTSTRAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
}
# endif /* !RTLD_BOOTSTRAP */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rel *reloc,
int skip_ifunc)
{
const ElfW(Half) *const version =
(const void *) D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
ElfW(Half) ndx = version[ELFW(R_SYM) (r->r_info)] & 0x7fff;
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)],
+ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)],
&map->l_versions[ndx],
(void *) (l_addr + r->r_offset), skip_ifunc);
}
# ifndef RTLD_BOOTSTRAP
else
- elf_machine_rel (map, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
+ elf_machine_rel (map, scope, r, &symtab[ELFW(R_SYM) (r->r_info)], NULL,
(void *) (l_addr + r->r_offset), skip_ifunc);
# endif
}
# ifndef RTLD_BOOTSTRAP
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rela (struct link_map *map,
+elf_machine_lazy_rela (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
/* Always initialize TLS descriptors completely at load time, in
case static TLS is allocated for it that requires locking. */
- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+ skip_ifunc);
}
else if (__glibc_unlikely (r_type == R_386_IRELATIVE))
{
movl (LRV_SIZE + 4 + LR_SIZE)(%esp), %eax
# PLT1
movl (LRV_SIZE + 4 + LR_SIZE + 4)(%esp), %edx
- call _dl_call_pltexit
+ call _dl_audit_pltexit
movl LRV_EAX_OFFSET(%esp), %eax
movl LRV_EDX_OFFSET(%esp), %edx
fldt LRV_ST1_OFFSET(%esp)
Function: Imaginary part of "clog10":
double: 2
-float: 1
+float: 2
float128: 2
ldouble: 2
Function: Imaginary part of "clog10":
double: 2
-float: 1
+float: 2
float128: 2
ldouble: 2
#define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value))
#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr))
+#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr)
+#define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \
+ (*value) = *(struct fdesc *) (st_value)
#include <errno.h>
#include <dl-fptr.h>
#include <tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Translate a processor specific dynamic tag to the index
in l_info array. */
map->l_mach.fptr_table = boot_table;
}
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \
- __ia64_init_bootstrap_fdesc_table (BOOTSTRAP_MAP);
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \
+ __ia64_init_bootstrap_fdesc_table (map);
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused, always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (void);
extern void _dl_runtime_profile (void);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_IA64_IPLTLSB
-/* According to the IA-64 specific documentation, Rela is always used. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Return the address of the entry point. */
#define ELF_MACHINE_START_ADDRESS(map, start) \
({ \
/* Perform the relocation specified by RELOC and SYM (which is fully
resolved). MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute ((always_inline))
-elf_machine_rela (struct link_map *map,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
const Elf64_Rela *reloc,
const Elf64_Sym *sym,
const struct r_found_version *version,
return;
else
{
- struct link_map *sym_map;
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
/* RESOLVE_MAP() will return NULL if it fail to locate the symbol. */
- if ((sym_map = RESOLVE_MAP (&sym, version, r_type)))
+ if (sym_map != NULL)
{
value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend;
can be skipped. */
#define ELF_MACHINE_REL_RELATIVE 1
-auto inline void
+static inline void
__attribute ((always_inline))
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
void *const reloc_addr_arg)
}
/* Perform a RELATIVE reloc on the .got entry that transfers to the .plt. */
-auto inline void
+static inline void
__attribute ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf64_Addr l_addr, const Elf64_Rela *reloc,
int skip_ifunc)
{
/* The fourth argument to _dl_profile_fixup and the third one to
- _dl_call_pltexit are a pointer to La_ia64_regs:
+ _dl_audit_pltexit are a pointer to La_ia64_regs:
8byte r8
8byte r9
8byte sp
The fifth argument to _dl_profile_fixup is a pointer to long int.
- The fourth argument to _dl_call_pltexit is a pointer to
+ The fourth argument to _dl_audit_pltexit is a pointer to
La_ia64_retval:
8byte r8
}
{ .mii
mov r18 = ar.unat /* save it in La_ia64_regs */
- mov loc7 = out3 /* save it for _dl_call_pltexit */
+ mov loc7 = out3 /* save it for _dl_audit_pltexit */
mov loc5 = r11 /* preserve language specific register */
}
{ .mmi
}
{ .mii
mov ar.unat = r17 /* restore it for function call */
- mov loc8 = r16 /* save it for _dl_call_pltexit */
+ mov loc8 = r16 /* save it for _dl_audit_pltexit */
nop.i 0x0
}
{ .mmi
{ .mmi
stf.spill [r2] = f14, 32
stf.spill [r3] = f15, 24
- mov loc9 = out1 /* save it for _dl_call_pltexit */
+ mov loc9 = out1 /* save it for _dl_audit_pltexit */
;;
}
{ .mmb
br.call.sptk.many b0 = b6
}
{ .mii
- /* Prepare stack for _dl_call_pltexit. Loc10 has the original
+ /* Prepare stack for _dl_audit_pltexit. Loc10 has the original
stack pointer. */
adds r12 = -PLTEXIT_FRAME_SIZE, loc10
adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
{ .mmi
stf.spill [r2] = f12, 32
stf.spill [r3] = f13, 32
- /* We need to restore gp for _dl_call_pltexit. */
+ /* We need to restore gp for _dl_audit_pltexit. */
mov gp = loc11
;;
}
{ .mmb
stf.spill [r2] = f14
stf.spill [r3] = f15
- br.call.sptk.many b0 = _dl_call_pltexit
+ br.call.sptk.many b0 = _dl_audit_pltexit
}
{ .mmi
/* Load all the non-floating and floating return values. Skip
#include <sys/param.h>
#include <sysdep.h>
#include <dl-tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_68K_JMP_SLOT
-/* The m68k never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
static inline Elf32_Addr
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void __attribute__ ((unused, always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+static inline void __attribute__ ((unused, always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
else
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
switch (r_type)
}
}
-auto inline void __attribute__ ((unused, always_inline))
+static inline void __attribute__ ((unused, always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
{
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void __attribute__ ((unused, always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__ ((unused, always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
cfi_adjust_cfa_offset (4)
move.l (32+FPSPACE)(%sp), -(%sp)
cfi_adjust_cfa_offset (4)
- jbsr _dl_call_pltexit
+ jbsr _dl_audit_pltexit
lea 16(%sp), %sp
cfi_adjust_cfa_offset (-16)
move.l (%sp)+, %d0
libc_hidden_def (__get_nprocs)
weak_alias (__get_nprocs, get_nprocs)
+int
+__get_nprocs_sched (void)
+{
+ return __get_nprocs ();
+}
+
/* Return the number of physical pages on the system. */
long int
__get_phys_pages (void)
+ CMSG_ALIGN (sizeof (struct cmsghdr)))
#define CMSG_LEN(len) (CMSG_ALIGN (sizeof (struct cmsghdr)) + (len))
+/* Given a length, return the additional padding necessary such that
+ len + __CMSG_PADDING(len) == CMSG_ALIGN (len). */
+#define __CMSG_PADDING(len) ((sizeof (size_t) \
+ - ((len) & (sizeof (size_t) - 1))) \
+ & (sizeof (size_t) - 1))
+
extern struct cmsghdr *__cmsg_nxthdr (struct msghdr *__mhdr,
struct cmsghdr *__cmsg) __THROW;
#ifdef __USE_EXTERN_INLINES
_EXTERN_INLINE struct cmsghdr *
__NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
{
+ /* We may safely assume that __cmsg lies between __mhdr->msg_control and
+ __mhdr->msg_controllen because the user is required to obtain the first
+ cmsg via CMSG_FIRSTHDR, set its length, then obtain subsequent cmsgs
+ via CMSG_NXTHDR, setting lengths along the way. However, we don't yet
+ trust the value of __cmsg->cmsg_len and therefore do not use it in any
+ pointer arithmetic until we check its value. */
+
+ unsigned char * __msg_control_ptr = (unsigned char *) __mhdr->msg_control;
+ unsigned char * __cmsg_ptr = (unsigned char *) __cmsg;
+
+ size_t __size_needed = sizeof (struct cmsghdr)
+ + __CMSG_PADDING (__cmsg->cmsg_len);
+
+ /* The current header is malformed, too small to be a full header. */
if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
- /* The kernel header does this so there may be a reason. */
return (struct cmsghdr *) 0;
+ /* There isn't enough space between __cmsg and the end of the buffer to
+ hold the current cmsg *and* the next one. */
+ if (((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr)
+ < __size_needed)
+ || ((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr
+ - __size_needed)
+ < __cmsg->cmsg_len))
+
+ return (struct cmsghdr *) 0;
+
+ /* Now, we trust cmsg_len and can use it to find the next header. */
__cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
+ CMSG_ALIGN (__cmsg->cmsg_len));
- if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
- + __mhdr->msg_controllen)
- || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
- > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
- /* No more entries. */
- return (struct cmsghdr *) 0;
return __cmsg;
}
#endif /* Use `extern inline'. */
{
void go (intptr_t *argdata)
{
+ char *orig_argv0;
char **p;
/* Cache the information in various global variables. */
_environ = &_dl_argv[_dl_argc + 1];
for (p = _environ; *p++;); /* Skip environ pointers and terminator. */
+ orig_argv0 = _dl_argv[0];
+
if ((void *) p == _dl_argv[0])
{
static struct hurd_startup_data nodata;
/* The call above might screw a few things up.
- First of all, if _dl_skip_args is nonzero, we are ignoring
- the first few arguments. However, if we have no Hurd startup
- data, it is the magical convention that ARGV[0] == P. The
+ P is the location after the terminating NULL of the list of
+ environment variables. It has to point to the Hurd startup
+ data or if that's missing then P == ARGV[0] must hold. The
startup code in init-first.c will get confused if this is not
the case, so we must rearrange things to make it so. We'll
- overwrite the origional ARGV[0] at P with ARGV[_dl_skip_args].
+ recompute P and move the Hurd data or the new ARGV[0] there.
- Secondly, if we need to be secure, it removes some dangerous
- environment variables. If we have no Hurd startup date this
- changes P (since that's the location after the terminating
- NULL in the list of environment variables). We do the same
- thing as in the first case but make sure we recalculate P.
- If we do have Hurd startup data, we have to move the data
- such that it starts just after the terminating NULL in the
- environment list.
+ Note: directly invoked ld.so can move arguments and env vars.
We use memmove, since the locations might overlap. */
- if (__libc_enable_secure || _dl_skip_args)
- {
- char **newp;
- for (newp = _environ; *newp++;);
+ char **newp;
+ for (newp = _environ; *newp++;);
- if (_dl_argv[-_dl_skip_args] == (char *) p)
+ if (newp != p || _dl_argv[0] != orig_argv0)
+ {
+ if (orig_argv0 == (char *) p)
{
if ((char *) newp != _dl_argv[0])
{
+++ /dev/null
-/* Define and initialize the `__libc_enable_secure' flag. Hurd version.
- Copyright (C) 1998-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-/* There is no need for this file in the Hurd; it is just a placeholder
- to prevent inclusion of the sysdeps/generic version.
- In the shared library, the `__libc_enable_secure' variable is defined
- by the dynamic linker in dl-sysdep.c and set there.
- In the static library, it is defined in init-first.c and set there. */
-
-#include <libc-internal.h>
-
-void
-__libc_init_secure (void)
-{
-}
unsigned long int __hurd_threadvar_stack_offset;
unsigned long int __hurd_threadvar_stack_mask;
-#ifndef SHARED
-int __libc_enable_secure;
-#endif
-
extern int __libc_argc attribute_hidden;
extern char **__libc_argv attribute_hidden;
extern char **_dl_argv;
GLIBC_2.2.6 abort F
GLIBC_2.3 ___tls_get_addr F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
__if_nametoindex (const char *ifname)
{
struct ifreq ifr;
- int fd = __opensock ();
+ int fd = __socket (AF_INET, SOCK_DGRAM, 0);
if (fd < 0)
return 0;
error_t err = 0;
char data[2048];
file_t server;
- int fd = __opensock ();
+ int fd = __socket (AF_INET, SOCK_DGRAM, 0);
struct ifconf ifc;
unsigned int nifs, i;
struct if_nameindex *idx = NULL;
__if_indextoname (unsigned int ifindex, char ifname[IF_NAMESIZE])
{
struct ifreq ifr;
- int fd = __opensock ();
+ int fd = __socket (AF_INET, SOCK_DGRAM, 0);
if (fd < 0)
return NULL;
#include <sys/param.h>
#include <tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (Elf32_Word);
extern void _dl_runtime_profile (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_MICROBLAZE_JUMP_SLOT
-/* The microblaze never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
static inline Elf32_Addr
elf_machine_fixup_plt (struct link_map *map, lookup_t t,
const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
((unsigned short *)(rel_addr))[3] = (val) & 0xffff; \
} while (0)
-auto inline void __attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+static inline void __attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
else
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
value += reloc->r_addend;
}
}
-auto inline void
+static inline void
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
{
PUT_REL_64 (reloc_addr, l_addr + reloc->r_addend);
}
-auto inline void
-elf_machine_lazy_rel (struct link_map *map,
+static inline void
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
--- /dev/null
+/* ELF dynamic relocation type supported by the architecture. ARM version.
+ Copyright (C) 2001-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_MACHINE_REL_H
+#define _DL_MACHINE_REL_H
+
+#define ELF_MACHINE_PLT_REL 1
+#define ELF_MACHINE_NO_REL 0
+#define ELF_MACHINE_NO_RELA 0
+#define PLTREL ElfW(Rel)
+
+#endif
#include <sysdep.h>
#include <sys/asm.h>
#include <dl-tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* The offset of gp from GOT might be system-dependent. It's set by
ld. The same value is also */
((((type) == ELF_MACHINE_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \
| (((type) == R_MIPS_COPY) * ELF_RTYPE_CLASS_COPY))
-#define ELF_MACHINE_PLT_REL 1
-#define ELF_MACHINE_NO_REL 0
-#define ELF_MACHINE_NO_RELA 0
-
/* Translate a processor specific dynamic tag to the index
in l_info array. */
#define DT_MIPS(x) (DT_MIPS_##x - DT_LOPROC + DT_NUM)
/* We can't rely on elf_machine_got_rel because _dl_object_relocation_scope
fiddles with global data. */
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(bootstrap_map, dynamic_info) \
do { \
- struct link_map *map = BOOTSTRAP_MAP; \
+ struct link_map *map = bootstrap_map; \
ElfW(Sym) *sym; \
ElfW(Addr) *got; \
int i, n; \
by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_reloc (struct link_map *map, ElfW(Addr) r_info,
- const ElfW(Sym) *sym, const struct r_found_version *version,
- void *reloc_addr, ElfW(Addr) r_addend, int inplace_p)
+elf_machine_reloc (struct link_map *map, struct r_scope_elem *scope[],
+ ElfW(Addr) r_info, const ElfW(Sym) *sym,
+ const struct r_found_version *version, void *reloc_addr,
+ ElfW(Addr) r_addend, int inplace_p)
{
const unsigned long int r_type = ELFW(R_TYPE) (r_info);
ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr;
case R_MIPS_TLS_TPREL32:
# endif
{
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
switch (r_type)
{
_dl_signal_error (0, map->l_name, NULL,
"found jump slot relocation with non-zero addend");
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
*addr_field = value;
ElfW(Addr) value;
/* Calculate the address of the symbol. */
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
if (__builtin_expect (sym == NULL, 0))
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rel (struct link_map *map, const ElfW(Rel) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
- void *const reloc_addr, int skip_ifunc)
+elf_machine_rel (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rel) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version, void *const reloc_addr,
+ int skip_ifunc)
{
- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr, 0, 1);
+ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr, 0, 1);
}
-auto inline void
+static inline void
__attribute__((always_inline))
elf_machine_rel_relative (ElfW(Addr) l_addr, const ElfW(Rel) *reloc,
void *const reloc_addr)
/* XXX Nothing to do. There is no relative relocation, right? */
}
-auto inline void
+static inline void
__attribute__((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr, const ElfW(Rel) *reloc,
int skip_ifunc)
{
_dl_reloc_bad_type (map, r_type, 1);
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[], const ElfW(Rela) *reloc,
const ElfW(Sym) *sym, const struct r_found_version *version,
void *const reloc_addr, int skip_ifunc)
{
- elf_machine_reloc (map, reloc->r_info, sym, version, reloc_addr,
+ elf_machine_reloc (map, scope, reloc->r_info, sym, version, reloc_addr,
reloc->r_addend, 0);
}
-auto inline void
+static inline void
__attribute__((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr)
#ifndef RTLD_BOOTSTRAP
/* Relocate GOT. */
-auto inline void
+static inline void
__attribute__((always_inline))
-elf_machine_got_rel (struct link_map *map, int lazy)
+elf_machine_got_rel (struct link_map *map, struct r_scope_elem *scope[], int lazy)
{
ElfW(Addr) *got;
ElfW(Sym) *sym;
const struct r_found_version *version __attribute__ ((unused)) \
= vernum ? &map->l_versions[vernum[sym_index] & 0x7fff] : NULL; \
struct link_map *sym_map; \
- sym_map = RESOLVE_MAP (&ref, version, reloc); \
+ sym_map = RESOLVE_MAP (map, scope, &ref, version, reloc); \
SYMBOL_ADDRESS (sym_map, ref, true); \
})
/* Set up the loaded object described by L so its stub function
will jump to the on-demand fixup code __dl_runtime_resolve. */
-auto inline int
+static inline int
__attribute__((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
# ifndef RTLD_BOOTSTRAP
ElfW(Addr) *got;
}
/* Relocate global offset table. */
- elf_machine_got_rel (l, lazy);
+ elf_machine_got_rel (l, scope, lazy);
/* If using PLTs, fill in the first two entries of .got.plt. */
if (l->l_info[DT_JMPREL] && lazy)
--- /dev/null
+/* Check if dynamic section should be relocated. MIPS version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_RELOCATE_LD_H
+#define _DL_RELOCATE_LD_H
+
+/* The dynamic section is readonly. */
+#define DL_RO_DYN_SECTION 1
+
+#endif /* _DL_RELOCATE_LD_H */
struct La_mips_64_retval *, \
const char *);
-/* The MIPS ABI specifies that the dynamic section has to be read-only. */
-
-#define DL_RO_DYN_SECTION 1
-
#include_next <ldsodefs.h>
/* The 64-bit MIPS ELF ABI uses an unusual reloc format. Each
#include <string.h>
#include <link.h>
#include <dl-tls.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (Elf32_Word);
ldw r8, %call(_dl_nios2_get_gp_value)(r22)\n\
callr r8\n\
mov gp, r2\n\
-\n\
- /* Find the number of arguments to skip. */\n\
- ldw r8, %got(_dl_skip_args)(r22)\n\
- ldw r8, 0(r8)\n\
\n\
/* Find the main_map from the GOT. */\n\
ldw r4, %got(_rtld_local)(r22)\n\
ldw r4, 0(r4)\n\
\n\
- /* Find argc. */\n\
- ldw r5, 0(sp)\n\
- sub r5, r5, r8\n\
- stw r5, 0(sp)\n\
-\n\
- /* Find the first unskipped argument. */\n\
- slli r8, r8, 2\n\
- addi r6, sp, 4\n\
- add r9, r6, r8\n\
- mov r10, r6\n\
-\n\
- /* Shuffle argv down. */\n\
-3: ldw r11, 0(r9)\n\
- stw r11, 0(r10)\n\
- addi r9, r9, 4\n\
- addi r10, r10, 4\n\
- bne r11, zero, 3b\n\
-\n\
- /* Shuffle envp down. */\n\
- mov r7, r10\n\
-4: ldw r11, 0(r9)\n\
- stw r11, 0(r10)\n\
- addi r9, r9, 4\n\
- addi r10, r10, 4\n\
- bne r11, zero, 4b\n\
+ /* Load adjusted argc. */\n\
+ ldw r2, %got(_dl_argc)(r22)\n\
+ ldw r5, 0(r2)\n\
\n\
- /* Shuffle auxv down. */\n\
-5: ldw r11, 4(r9)\n\
- stw r11, 4(r10)\n\
- ldw r11, 0(r9)\n\
- stw r11, 0(r10)\n\
- addi r9, r9, 8\n\
- addi r10, r10, 8\n\
- bne r11, zero, 5b\n\
-\n\
- /* Update _dl_argv. */\n\
+ /* Load adjsuted argv. */\n\
ldw r2, %got(_dl_argv)(r22)\n\
- stw r6, 0(r2)\n\
+ ldw r6, 0(r2)\n\
+\n\
+ /* envp = argv + argc + 1 */\n\
+ addi r7, r5, 1\n\
+ slli r7, r7, 2\n\
+ add r7, r7, r6\n\
\n\
/* Call _dl_init through the PLT. */\n\
ldw r8, %call(_dl_init)(r22)\n\
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_NIOS2_JUMP_SLOT
-/* The Nios II never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Fixup a PLT entry to bounce directly to the function at VALUE. */
static inline Elf32_Addr
LOADADDR is the load address of the object; INFO is an array indexed
by DT_* of the .dynamic section info. */
-auto inline void __attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
- void *const reloc_addr_arg, int skip_ifunc)
+static inline void __attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
+ void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
const unsigned int r_type = ELF32_R_TYPE (reloc->r_info);
else
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
switch (r_type)
}
}
-auto inline void __attribute__((always_inline))
+static inline void __attribute__((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr_arg)
{
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void __attribute__((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
int skip_ifunc)
{
It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
purposes this is good enough. */
THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end);
-
- THREAD_SETMEM (pd, cancelstate, PTHREAD_CANCEL_ENABLE);
- THREAD_SETMEM (pd, canceltype, PTHREAD_CANCEL_DEFERRED);
}
# ifdef __USE_GNU
# ifdef __REDIRECT_NTH
extern int __REDIRECT_NTH (pthread_mutexattr_getrobust_np,
- (pthread_mutex_t *, int *),
+ (pthread_mutexattr_t *, int *),
pthread_mutexattr_getrobust) __nonnull ((1))
__attribute_deprecated_msg__ ("\
pthread_mutexattr_getrobust_np is deprecated, use pthread_mutexattr_getrobust");
# ifdef __USE_GNU
# ifdef __REDIRECT_NTH
extern int __REDIRECT_NTH (pthread_mutexattr_setrobust_np,
- (pthread_mutex_t *, int),
+ (pthread_mutexattr_t *, int),
pthread_mutexattr_setrobust) __nonnull ((1))
__attribute_deprecated_msg__ ("\
pthread_mutexattr_setrobust_np is deprecated, use pthread_mutexattr_setrobust");
struct pthread *self = THREAD_SELF;
/* Make sure we get no more cancellations. */
- THREAD_ATOMIC_BIT_SET (self, cancelhandling, EXITING_BIT);
+ atomic_bit_set (&self->cancelhandling, EXITING_BIT);
__pthread_unwind ((__pthread_unwind_buf_t *)
THREAD_GETMEM (self, cleanup_jmp_buf));
#ifdef _POSIX_ASYNC_IO
{
/* AIO is only allowed on regular files and block devices. */
- struct stat64 st;
+ struct __stat64_t64 st;
- if (__fstat64 (fd, &st) < 0
+ if (__fstat64_time64 (fd, &st) < 0
|| (! S_ISREG (st.st_mode) && ! S_ISBLK (st.st_mode)))
return -1;
else
l = l->next;
}
free_prefixlist (labellist);
+ labellist = NULL;
/* Sort the entries so that the most specific ones are at
the beginning. */
l = l->next;
}
free_prefixlist (precedencelist);
+ precedencelist = NULL;
/* Sort the entries so that the most specific ones are at
the beginning. */
size_t allocated = size;
size_t used;
+ /* A size of 1 byte is never useful. */
+ if (allocated == 1)
+ {
+ __set_errno (ERANGE);
+ return NULL;
+ }
+
#if HAVE_MINIMALLY_WORKING_GETCWD
/* If AT_FDCWD is not defined, the algorithm below is O(N**2) and
this is much slower than the system getcwd (at least on
int
isfdtype (int fildes, int fdtype)
{
- struct stat64 st;
+ struct __stat64_t64 st;
int result;
{
int save_error = errno;
- result = __fstat64 (fildes, &st);
+ result = __fstat64_time64 (fildes, &st);
__set_errno (save_error);
}
int
posix_fallocate (int fd, __off_t offset, __off_t len)
{
- struct stat64 st;
+ struct __stat64_t64 st;
if (offset < 0 || len < 0)
return EINVAL;
}
/* We have to make sure that this is really a regular file. */
- if (__fstat64 (fd, &st) != 0)
+ if (__fstat64_time64 (fd, &st) != 0)
return EBADF;
if (S_ISFIFO (st.st_mode))
return ESPIPE;
int
__posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
{
- struct stat64 st;
+ struct __stat64_t64 st;
if (offset < 0 || len < 0)
return EINVAL;
}
/* We have to make sure that this is really a regular file. */
- if (__fstat64 (fd, &st) != 0)
+ if (__fstat64_time64 (fd, &st) != 0)
return EBADF;
if (S_ISFIFO (st.st_mode))
return ESPIPE;
sysdep_headers += sys/platform/ppc.h
tests += test-gettimebase
tests += tst-set_ppr
-
-# This test is expected to run and exit with EXIT_UNSUPPORTED on
-# processors that do not implement the Power ISA 2.06 or greater.
-# But the test makes use of instructions from Power ISA 2.06 and 2.07.
-CFLAGS-tst-set_ppr.c += -Wa,-many
endif
ifeq ($(subdir),wcsmbs)
--- /dev/null
+/* Configuration of lookup functions. PowerPC version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define DL_FIXUP_VALUE_TYPE ElfW(Addr)
+#define DL_FIXUP_MAKE_VALUE(map, addr) (addr)
+#define DL_FIXUP_VALUE_CODE_ADDR(value) (value)
+#define DL_FIXUP_VALUE_ADDR(value) (value)
+#define DL_FIXUP_ADDR_VALUE(addr) (addr)
+#if __WORDSIZE == 64 && _CALL_ELF == 1
+/* We need to correctly set the audit modules value for bind-now. */
+# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) \
+ (((Elf64_FuncDesc *)(addr))->fd_func)
+# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \
+ ({ \
+ Elf64_FuncDesc *opd = (Elf64_FuncDesc *) (value); \
+ opd->fd_func = (st_value); \
+ if ((new_value) != (uintptr_t) (st_value)) \
+ opd->fd_toc = ((Elf64_FuncDesc *)(new_value))->fd_toc; \
+ })
+#else
+# define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr)
+# define DL_FIXUP_BINDNOW_RELOC(value, new_value, st_value) \
+ (*value) = st_value;
+#endif
#include <dl-tls.h>
#include <dl-irel.h>
#include <hwcapinfo.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Translate a processor specific dynamic tag to the index
in l_info array. */
return runtime_dynamic - elf_machine_dynamic ();
}
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */
-
/* The PLT uses Elf32_Rela relocs. */
#define elf_machine_relplt elf_machine_rela
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_PPC_JMP_SLOT
-/* The PowerPC never uses REL relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization function to initialize HWCAP/HWCAP2 and
platform data so it can be copied into the TCB later. This is called
very early in _dl_sysdep_start for dynamically linked binaries. */
int lazy, int profile);
static inline int
-elf_machine_runtime_setup (struct link_map *map,
+elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
int lazy, int profile)
{
if (map->l_info[DT_JMPREL] == 0)
LOADADDR is the load address of the object; INFO is an array indexed
by DT_* of the .dynamic section info. */
-auto inline void __attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+static inline void __attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
}
else
{
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
}
value += reloc->r_addend;
}
}
-auto inline void __attribute__ ((always_inline))
+static inline void __attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
{
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void __attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
--- /dev/null
+/* PowerPC ELFv1 function descriptor definition.
+ Copyright (C) 2009-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_FUNCDESC_H
+#define _DL_FUNCDESC_H
+
+#if _CALL_ELF != 2
+/* A PowerPC64 function descriptor. The .plt (procedure linkage
+ table) and .opd (official procedure descriptor) sections are
+ arrays of these. */
+typedef struct
+{
+ Elf64_Addr fd_func;
+ Elf64_Addr fd_toc;
+ Elf64_Addr fd_aux;
+} Elf64_FuncDesc;
+#endif
+
+#endif
#include <stdio.h>
#include <unistd.h>
#include <ldsodefs.h>
-#include <dl-machine.h>
+#include <dl-funcdesc.h>
#define ELF_MACHINE_IRELA 1
#include <sysdep.h>
#include <hwcapinfo.h>
#include <cpu-features.c>
+#include <dl-static-tls.h>
+#include <dl-funcdesc.h>
+#include <dl-machine-rel.h>
/* Translate a processor specific dynamic tag to the index
in l_info array. */
#define DT_PPC64(x) (DT_PPC64_##x - DT_LOPROC + DT_NUM)
-#if _CALL_ELF != 2
-/* A PowerPC64 function descriptor. The .plt (procedure linkage
- table) and .opd (official procedure descriptor) sections are
- arrays of these. */
-typedef struct
-{
- Elf64_Addr fd_func;
- Elf64_Addr fd_toc;
- Elf64_Addr fd_aux;
-} Elf64_FuncDesc;
-#endif
-
#define ELF_MULT_MACHINES_SUPPORTED
/* Return nonzero iff ELF header is compatible with the running host. */
return runtime_dynamic - elf_machine_load_address() ;
}
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) /* nothing */
-
/* The PLT uses Elf64_Rela relocs. */
#define elf_machine_relplt elf_machine_rela
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_PPC64_JMP_SLOT
-/* The PowerPC never uses REL relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization function to initialize HWCAP/HWCAP2 and
platform data so it can be copied into the TCB later. This is called
very early in _dl_sysdep_start for dynamically linked binaries. */
/* Set up the loaded object described by MAP so its unrelocated PLT
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *map, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
if (map->l_info[DT_JMPREL])
{
Elf64_Addr *const reloc_addr,
const Elf64_Sym *refsym);
-auto inline void __attribute__ ((always_inline))
+static inline void __attribute__ ((always_inline))
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
void *const reloc_addr_arg)
{
}
/* This computes the value used by TPREL* relocs. */
-auto inline Elf64_Addr __attribute__ ((always_inline, const))
+static inline Elf64_Addr __attribute__ ((always_inline, const))
elf_machine_tprel (struct link_map *map,
struct link_map *sym_map,
const Elf64_Sym *sym,
}
/* Call function at address VALUE (an OPD entry) to resolve ifunc relocs. */
-auto inline Elf64_Addr __attribute__ ((always_inline))
+static inline Elf64_Addr __attribute__ ((always_inline))
resolve_ifunc (Elf64_Addr value,
const struct link_map *map, const struct link_map *sym_map)
{
/* Perform the relocation specified by RELOC and SYM (which is fully
resolved). MAP is the object containing the reloc. */
-auto inline void __attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map,
+static inline void __attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
const Elf64_Rela *reloc,
const Elf64_Sym *sym,
const struct r_found_version *version,
/* We need SYM_MAP even in the absence of TLS, for elf_machine_fixup_plt
and STT_GNU_IFUNC. */
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend;
if (sym != NULL
MODIFIED_CODE_NOQUEUE (reloc_addr);
}
-auto inline void __attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+static inline void __attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf64_Addr l_addr, const Elf64_Rela *reloc,
int skip_ifunc)
{
#ifndef PROF
ENTRY (_dl_profile_resolve, 4)
/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
- need to call _dl_call_pltexit. */
+ need to call _dl_audit_pltexit. */
std r31,-8(r1)
std r30,-16(r1)
/* We need to save the registers used to pass parameters, ie. r3 thru
L(callpltexit):
addi r5,r1,INT_PARMS
addi r6,r1,INT_RTN
- bl JUMPTARGET(_dl_call_pltexit)
+ bl JUMPTARGET(_dl_audit_pltexit)
#ifndef SHARED
nop
#endif
cmpldi cr6,r5,16 /* Check if length was reached. */
ble cr6,L(zero_padding_end)
- stxv v18,0(r11)
+ stxv 32+v18,0(r11)
addi r11,r11,16
addi r5,r5,-16
L(zero_padding_end):
sldi r10,r5,56 /* stxvl wants size in top 8 bits */
- stxvl v18,r11,r10 /* Partial store */
+ stxvl 32+v18,r11,r10 /* Partial store */
blr
.align 4
/* Allocate frame and save register */
#define NVOLREG_SAVE \
stdu r1,-SCV_FRAME_SIZE(r1); \
+ cfi_adjust_cfa_offset(SCV_FRAME_SIZE); \
std r31,SCV_FRAME_NVOLREG_SAVE(r1); \
- cfi_adjust_cfa_offset(SCV_FRAME_SIZE);
+ cfi_rel_offset(r31,SCV_FRAME_NVOLREG_SAVE);
/* Restore register and destroy frame */
#define NVOLREG_RESTORE \
ld r31,SCV_FRAME_NVOLREG_SAVE(r1); \
+ cfi_restore(r31); \
addi r1,r1,SCV_FRAME_SIZE; \
cfi_adjust_cfa_offset(-SCV_FRAME_SIZE);
#define DO_CALL_SCV \
mflr r9; \
- std r9,FRAME_LR_SAVE(r1); \
- cfi_offset(lr,FRAME_LR_SAVE); \
+ std r9,SCV_FRAME_SIZE+FRAME_LR_SAVE(r1); \
+ cfi_rel_offset(lr,SCV_FRAME_SIZE+FRAME_LR_SAVE); \
.machine "push"; \
.machine "power9"; \
scv 0; \
.machine "pop"; \
- ld r9,FRAME_LR_SAVE(r1); \
+ ld r9,SCV_FRAME_SIZE+FRAME_LR_SAVE(r1); \
mtlr r9; \
cfi_restore(lr);
/* Set SAT bit in VSCR register. */
asm volatile (".machine push;\n"
".machine \"power5\";\n"
+ ".machine altivec;\n"
"vspltisb %0,0;\n"
"vspltisb %1,-1;\n"
"vpkuwus %0,%0,%1;\n"
{
/* Read the PPR. */
ppr_t ppr;
- asm volatile (MFPPR" %0" : "=r"(ppr));
+ asm volatile (".machine push; .machine power7; "MFPPR" %0; .machine pop"
+ : "=r"(ppr));
/* Return the thread priority value. */
return EXTRACT_THREAD_PRIORITY (ppr);
}
tst-cancel12 tst-cancel13 tst-cancel14 tst-cancel15 tst-cancel16 \
tst-cancel18 tst-cancel19 tst-cancel20 tst-cancel21 \
tst-cancel22 tst-cancel23 tst-cancel26 tst-cancel27 tst-cancel28 \
+ tst-cancel29 \
tst-cleanup0 tst-cleanup1 tst-cleanup2 tst-cleanup3 \
tst-clock1 \
tst-cond-except \
tst-join8 tst-join9 tst-join10 tst-join11 tst-join12 tst-join13 \
tst-join14 tst-join15 \
tst-key1 tst-key2 tst-key3 tst-key4 \
- tst-kill1 tst-kill2 tst-kill3 tst-kill4 tst-kill5 tst-kill6 \
+ tst-kill1 tst-kill2 tst-kill3 tst-kill5 tst-kill6 \
tst-locale1 tst-locale2 \
tst-memstream \
tst-mutex-errorcheck tst-mutex1 tst-mutex2 tst-mutex3 tst-mutex4 \
tst-unload \
tst-unwind-thread \
tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
+ tst-pthread-exit-signal \
+ tst-pthread-setuid-loop \
+ tst-pthread_cancel-exited \
+ tst-pthread_cancel-select-loop \
+ tst-pthread-raise-blocked-self \
+ tst-pthread_kill-exited \
+ tst-pthread_kill-exiting \
+ tst-cancel30 \
+ # tests
tests-time64 := \
tst-abstime-time64 \
tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3
ifeq ($(build-shared),yes)
-tests += tst-atfork2 tst-pt-tls4 tst-_res1 tst-fini1
+tests += \
+ tst-atfork2 \
+ tst-pt-tls4 \
+ tst-_res1 \
+ tst-fini1 \
+ tst-create1 \
+ tst-atfork3 \
+ tst-atfork4 \
+# tests
+
tests-nolibpthread += tst-fini1
endif
-modules-names += tst-atfork2mod tst-tls4moda tst-tls4modb \
- tst-_res1mod1 tst-_res1mod2 tst-fini1mod
+modules-names += \
+ tst-atfork2mod \
+ tst-tls4moda \
+ tst-tls4modb \
+ tst-_res1mod1 \
+ tst-_res1mod2 \
+ tst-fini1mod \
+ tst-create1mod \
+ tst-atfork3mod \
+ tst-atfork4mod \
+# module-names
+
test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
tst-atfork2mod.so-no-z-defs = yes
+tst-atfork3mod.so-no-z-defs = yes
+tst-atfork4mod.so-no-z-defs = yes
+tst-create1mod.so-no-z-defs = yes
ifeq ($(build-shared),yes)
# Build all the modules even when not actually running test programs.
LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
$(objpfx)tst-atfork2mod.so: $(shared-thread-library)
+$(objpfx)tst-atfork3: $(shared-thread-library)
+LDFLAGS-tst-atfork3 = -rdynamic
+$(objpfx)tst-atfork3mod.so: $(shared-thread-library)
+
+$(objpfx)tst-atfork4: $(shared-thread-library)
+LDFLAGS-tst-atfork4 = -rdynamic
+$(objpfx)tst-atfork4mod.so: $(shared-thread-library)
+
ifeq ($(build-shared),yes)
$(objpfx)tst-atfork2.out: $(objpfx)tst-atfork2mod.so
+$(objpfx)tst-atfork3.out: $(objpfx)tst-atfork3mod.so
+$(objpfx)tst-atfork4.out: $(objpfx)tst-atfork4mod.so
endif
ifeq ($(build-shared),yes)
CFLAGS-tst-unwind-thread.c += -funwind-tables
+LDFLAGS-tst-create1 = -Wl,-export-dynamic
+$(objpfx)tst-create1: $(shared-thread-library)
+$(objpfx)tst-create1.out: $(objpfx)tst-create1mod.so
+
endif
--- /dev/null
+/* Check if pthread_atfork handler can call dlclose (BZ#24595).
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/capture_subprocess.h>
+#include <support/xdlfcn.h>
+
+/* Check if pthread_atfork handlers do not deadlock when calling a function
+ that might alter the internal fork handle list, such as dlclose.
+
+ The test registers a callback set with pthread_atfork(), dlopen() a shared
+ library (nptl/tst-atfork3mod.c), calls an exported symbol from the library
+ (which in turn also registers atfork handlers), and calls fork to trigger
+ the callbacks. */
+
+static void *handler;
+static bool run_dlclose_prepare;
+static bool run_dlclose_parent;
+static bool run_dlclose_child;
+
+static void
+prepare (void)
+{
+ if (run_dlclose_prepare)
+ xdlclose (handler);
+}
+
+static void
+parent (void)
+{
+ if (run_dlclose_parent)
+ xdlclose (handler);
+}
+
+static void
+child (void)
+{
+ if (run_dlclose_child)
+ xdlclose (handler);
+}
+
+static void
+proc_func (void *closure)
+{
+}
+
+static void
+do_test_generic (bool dlclose_prepare, bool dlclose_parent, bool dlclose_child)
+{
+ run_dlclose_prepare = dlclose_prepare;
+ run_dlclose_parent = dlclose_parent;
+ run_dlclose_child = dlclose_child;
+
+ handler = xdlopen ("tst-atfork3mod.so", RTLD_NOW);
+
+ int (*atfork3mod_func)(void);
+ atfork3mod_func = xdlsym (handler, "atfork3mod_func");
+
+ atfork3mod_func ();
+
+ struct support_capture_subprocess proc
+ = support_capture_subprocess (proc_func, NULL);
+ support_capture_subprocess_check (&proc, "tst-atfork3", 0, sc_allow_none);
+
+ handler = atfork3mod_func = NULL;
+
+ support_capture_subprocess_free (&proc);
+}
+
+static void *
+thread_func (void *closure)
+{
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ {
+ /* Make the process acts as multithread. */
+ pthread_attr_t attr;
+ xpthread_attr_init (&attr);
+ xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+ xpthread_create (&attr, thread_func, NULL);
+ }
+
+ TEST_COMPARE (pthread_atfork (prepare, parent, child), 0);
+
+ do_test_generic (true /* prepare */, false /* parent */, false /* child */);
+ do_test_generic (false /* prepare */, true /* parent */, false /* child */);
+ do_test_generic (false /* prepare */, false /* parent */, true /* child */);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include <support/check.h>
+
+static void
+mod_prepare (void)
+{
+}
+
+static void
+mod_parent (void)
+{
+}
+
+static void
+mod_child (void)
+{
+}
+
+int atfork3mod_func (void)
+{
+ TEST_COMPARE (pthread_atfork (mod_prepare, mod_parent, mod_child), 0);
+
+ return 0;
+}
--- /dev/null
+/* pthread_atfork supports handlers that call pthread_atfork or dlclose.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/xdlfcn.h>
+#include <stdio.h>
+#include <support/xthread.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <support/xunistd.h>
+#include <support/check.h>
+#include <stdlib.h>
+
+static void *
+thread_func (void *x)
+{
+ return NULL;
+}
+
+static unsigned int second_atfork_handler_runcount = 0;
+
+static void
+second_atfork_handler (void)
+{
+ second_atfork_handler_runcount++;
+}
+
+static void *h = NULL;
+
+static unsigned int atfork_handler_runcount = 0;
+
+static void
+prepare (void)
+{
+ /* These atfork handlers are registered while atfork handlers are being
+ executed and thus will not be executed during the corresponding
+ fork. */
+ TEST_VERIFY_EXIT (pthread_atfork (second_atfork_handler,
+ second_atfork_handler,
+ second_atfork_handler) == 0);
+
+ /* This will de-register the atfork handlers registered by the dlopen'd
+ library and so they will not be executed. */
+ if (h != NULL)
+ {
+ xdlclose (h);
+ h = NULL;
+ }
+
+ atfork_handler_runcount++;
+}
+
+static void
+after (void)
+{
+ atfork_handler_runcount++;
+}
+
+static int
+do_test (void)
+{
+ /* Make sure __libc_single_threaded is 0. */
+ pthread_attr_t attr;
+ xpthread_attr_init (&attr);
+ xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+ xpthread_create (&attr, thread_func, NULL);
+
+ void (*reg_atfork_handlers) (void);
+
+ h = xdlopen ("tst-atfork4mod.so", RTLD_LAZY);
+
+ reg_atfork_handlers = xdlsym (h, "reg_atfork_handlers");
+
+ reg_atfork_handlers ();
+
+ /* We register our atfork handlers *after* loading the module so that our
+ prepare handler is called first at fork, where we then dlclose the
+ module before its prepare handler has a chance to be called. */
+ TEST_VERIFY_EXIT (pthread_atfork (prepare, after, after) == 0);
+
+ pid_t pid = xfork ();
+
+ /* Both the parent and the child processes should observe this. */
+ TEST_VERIFY_EXIT (atfork_handler_runcount == 2);
+ TEST_VERIFY_EXIT (second_atfork_handler_runcount == 0);
+
+ if (pid > 0)
+ {
+ int childstat;
+
+ xwaitpid (-1, &childstat, 0);
+ TEST_VERIFY_EXIT (WIFEXITED (childstat)
+ && WEXITSTATUS (childstat) == 0);
+
+ /* This time, the second set of atfork handlers should also be called
+ since the handlers are already in place before fork is called. */
+
+ pid = xfork ();
+
+ TEST_VERIFY_EXIT (atfork_handler_runcount == 4);
+ TEST_VERIFY_EXIT (second_atfork_handler_runcount == 2);
+
+ if (pid > 0)
+ {
+ xwaitpid (-1, &childstat, 0);
+ TEST_VERIFY_EXIT (WIFEXITED (childstat)
+ && WEXITSTATUS (childstat) == 0);
+ }
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* pthread_atfork supports handlers that call pthread_atfork or dlclose.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <pthread.h>
+#include <stdlib.h>
+
+/* This dynamically loaded library simply registers its atfork handlers when
+ asked to. The atfork handlers should never be executed because the
+ library is unloaded before fork is called by the test program. */
+
+static void
+prepare (void)
+{
+ abort ();
+}
+
+static void
+parent (void)
+{
+ abort ();
+}
+
+static void
+child (void)
+{
+ abort ();
+}
+
+void
+reg_atfork_handlers (void)
+{
+ pthread_atfork (prepare, parent, child);
+}
xpthread_cancel (timer_thread);
- xpthread_barrier_init (&barrier, NULL, 2);
xpthread_barrier_wait (&barrier);
return 0;
--- /dev/null
+/* Check if a thread that disables cancellation and which call functions
+ that might be interrupted by a signal do not see the internal SIGCANCEL.
+
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/temp_file.h>
+#include <support/xthread.h>
+#include <sys/socket.h>
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+/* On Linux some interfaces are never restarted after being interrupted by
+ a signal handler, regardless of the use of SA_RESTART. It means that
+ if asynchronous cancellation is not enabled, the pthread_cancel can not
+ set the internal SIGCANCEL otherwise the interface might see a spurious
+ EINTR failure. */
+
+static pthread_barrier_t b;
+
+/* Cleanup handling test. */
+static int cl_called;
+static void
+cl (void *arg)
+{
+ ++cl_called;
+}
+
+static void *
+tf_sigtimedwait (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ sigset_t mask;
+ sigemptyset (&mask);
+ r = sigtimedwait (&mask, NULL, &(struct timespec) { 0, 250000000 });
+ if (r != -1)
+ return (void*) -1;
+ if (errno != EAGAIN)
+ return (void*) -2;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+static void *
+tf_poll (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ r = poll (NULL, 0, 250);
+ if (r != 0)
+ return (void*) -1;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+static void *
+tf_ppoll (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ r = ppoll (NULL, 0, &(struct timespec) { 0, 250000000 }, NULL);
+ if (r != 0)
+ return (void*) -1;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+static void *
+tf_select (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ r = select (0, NULL, NULL, NULL, &(struct timeval) { 0, 250000 });
+ if (r != 0)
+ return (void*) -1;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+static void *
+tf_pselect (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ r = pselect (0, NULL, NULL, NULL, &(struct timespec) { 0, 250000000 }, NULL);
+ if (r != 0)
+ return (void*) -1;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+static void *
+tf_clock_nanosleep (void *arg)
+{
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+ xpthread_barrier_wait (&b);
+
+ int r;
+ pthread_cleanup_push (cl, NULL);
+
+ r = clock_nanosleep (CLOCK_REALTIME, 0, &(struct timespec) { 0, 250000000 },
+ NULL);
+ if (r != 0)
+ return (void*) -1;
+
+ pthread_cleanup_pop (0);
+ return NULL;
+}
+
+struct cancel_test_t
+{
+ const char *name;
+ void * (*cf) (void *);
+} tests[] =
+{
+ { "sigtimedwait", tf_sigtimedwait, },
+ { "poll", tf_poll, },
+ { "ppoll", tf_ppoll, },
+ { "select", tf_select, },
+ { "pselect", tf_pselect , },
+ { "clock_nanosleep", tf_clock_nanosleep, },
+};
+
+static int
+do_test (void)
+{
+ for (int i = 0; i < array_length (tests); i++)
+ {
+ xpthread_barrier_init (&b, NULL, 2);
+
+ cl_called = 0;
+
+ pthread_t th = xpthread_create (NULL, tests[i].cf, NULL);
+
+ xpthread_barrier_wait (&b);
+
+ struct timespec ts = { .tv_sec = 0, .tv_nsec = 100000000 };
+ while (nanosleep (&ts, &ts) != 0)
+ continue;
+
+ xpthread_cancel (th);
+
+ void *status = xpthread_join (th);
+ if (status != NULL)
+ printf ("test '%s' failed: %" PRIdPTR "\n", tests[i].name,
+ (intptr_t) status);
+ TEST_VERIFY (status == NULL);
+
+ xpthread_barrier_destroy (&b);
+
+ TEST_COMPARE (cl_called, 0);
+
+ printf ("in-time cancel test of '%s' successful\n", tests[i].name);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Check if printf like functions does not disable asynchronous cancellation
+ mode (BZ#29214).
+
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/xstdio.h>
+#include <support/xthread.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+static pthread_barrier_t b;
+
+static void *
+tf (void *arg)
+{
+ int old;
+
+ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, NULL), 0);
+
+ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, &old), 0);
+ TEST_COMPARE (old, PTHREAD_CANCEL_ASYNCHRONOUS);
+
+ /* Check if internal lock cleanup routines restore the cancellation type
+ correctly. */
+ printf ("...\n");
+ TEST_COMPARE (pthread_setcanceltype (PTHREAD_CANCEL_ASYNCHRONOUS, &old), 0);
+ TEST_COMPARE (old, PTHREAD_CANCEL_ASYNCHRONOUS);
+
+ xpthread_barrier_wait (&b);
+
+ /* Wait indefinitely for cancellation, which only works if asynchronous
+ cancellation is enabled. */
+#ifdef SYS_pause
+ syscall (SYS_pause);
+#elif defined SYS_ppoll || defined SYS_ppoll_time64
+# ifndef SYS_ppoll_time64
+# define SYS_ppoll_time64 SYS_ppoll
+# endif
+ syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL);
+#else
+ for (;;);
+#endif
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ xpthread_barrier_init (&b, NULL, 2);
+
+ pthread_t th = xpthread_create (NULL, tf, NULL);
+
+ xpthread_barrier_wait (&b);
+
+ xpthread_cancel (th);
+
+ void *status = xpthread_join (th);
+ TEST_VERIFY (status == PTHREAD_CANCELED);
+
+ return 0;
+}
+
+/* There is no need to wait full TIMEOUT if asynchronous is not working. */
+#define TIMEOUT 3
+#include <support/test-driver.c>
--- /dev/null
+/* Verify that pthread_create does not deadlock when ctors take locks.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <support/xdlfcn.h>
+#include <support/xthread.h>
+
+/*
+Check if ctor and pthread_create deadlocks in
+
+thread 1: dlopen -> ctor -> lock(user_lock)
+thread 2: lock(user_lock) -> pthread_create
+
+or in
+
+thread 1: dlclose -> dtor -> lock(user_lock)
+thread 2: lock(user_lock) -> pthread_create
+*/
+
+static pthread_barrier_t bar_ctor;
+static pthread_barrier_t bar_ctor_finish;
+static pthread_barrier_t bar_dtor;
+static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void
+ctor (void)
+{
+ xpthread_barrier_wait (&bar_ctor);
+ dprintf (1, "thread 1: in ctor: started.\n");
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 1: in ctor: locked user_lock.\n");
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 1: in ctor: unlocked user_lock.\n");
+ dprintf (1, "thread 1: in ctor: done.\n");
+ xpthread_barrier_wait (&bar_ctor_finish);
+}
+
+void
+dtor (void)
+{
+ xpthread_barrier_wait (&bar_dtor);
+ dprintf (1, "thread 1: in dtor: started.\n");
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 1: in dtor: locked user_lock.\n");
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 1: in dtor: unlocked user_lock.\n");
+ dprintf (1, "thread 1: in dtor: done.\n");
+}
+
+static void *
+thread3 (void *a)
+{
+ dprintf (1, "thread 3: started.\n");
+ dprintf (1, "thread 3: done.\n");
+ return 0;
+}
+
+static void *
+thread2 (void *a)
+{
+ pthread_t t3;
+ dprintf (1, "thread 2: started.\n");
+
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 2: locked user_lock.\n");
+ xpthread_barrier_wait (&bar_ctor);
+ t3 = xpthread_create (0, thread3, 0);
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 2: unlocked user_lock.\n");
+ xpthread_join (t3);
+ xpthread_barrier_wait (&bar_ctor_finish);
+
+ xpthread_mutex_lock (&user_lock);
+ dprintf (1, "thread 2: locked user_lock.\n");
+ xpthread_barrier_wait (&bar_dtor);
+ t3 = xpthread_create (0, thread3, 0);
+ xpthread_mutex_unlock (&user_lock);
+ dprintf (1, "thread 2: unlocked user_lock.\n");
+ xpthread_join (t3);
+
+ dprintf (1, "thread 2: done.\n");
+ return 0;
+}
+
+static void
+thread1 (void)
+{
+ dprintf (1, "thread 1: started.\n");
+ xpthread_barrier_init (&bar_ctor, NULL, 2);
+ xpthread_barrier_init (&bar_ctor_finish, NULL, 2);
+ xpthread_barrier_init (&bar_dtor, NULL, 2);
+ pthread_t t2 = xpthread_create (0, thread2, 0);
+ void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL);
+ dprintf (1, "thread 1: dlopen done.\n");
+ xdlclose (p);
+ dprintf (1, "thread 1: dlclose done.\n");
+ xpthread_join (t2);
+ dprintf (1, "thread 1: done.\n");
+}
+
+static int
+do_test (void)
+{
+ thread1 ();
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Verify that pthread_create does not deadlock when ctors take locks.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+/* Require TLS setup for the module. */
+__thread int tlsvar;
+
+void ctor (void);
+void dtor (void);
+
+static void __attribute__ ((constructor))
+do_init (void)
+{
+ dprintf (1, "constructor started: %d.\n", tlsvar++);
+ ctor ();
+ dprintf (1, "constructor done: %d.\n", tlsvar++);
+}
+
+static void __attribute__ ((destructor))
+do_end (void)
+{
+ dprintf (1, "destructor started: %d.\n", tlsvar++);
+ dtor ();
+ dprintf (1, "destructor done: %d.\n", tlsvar++);
+}
+++ /dev/null
-/* Copyright (C) 2003-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <errno.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-
-static void *
-tf (void *a)
-{
- return NULL;
-}
-
-
-int
-do_test (void)
-{
- pthread_attr_t at;
- if (pthread_attr_init (&at) != 0)
- {
- puts ("attr_create failed");
- exit (1);
- }
-
- /* Limit thread stack size, because if it is too large, pthread_join
- will free it immediately rather than put it into stack cache. */
- if (pthread_attr_setstacksize (&at, 2 * 1024 * 1024) != 0)
- {
- puts ("setstacksize failed");
- exit (1);
- }
-
- pthread_t th;
- if (pthread_create (&th, &at, tf, NULL) != 0)
- {
- puts ("create failed");
- exit (1);
- }
-
- pthread_attr_destroy (&at);
-
- if (pthread_join (th, NULL) != 0)
- {
- puts ("join failed");
- exit (1);
- }
-
- /* The following only works because we assume here something about
- the implementation. Namely, that the memory allocated for the
- thread descriptor is not going away, that the TID field is
- cleared and therefore the signal is sent to process 0, and that
- we can savely assume there is no other process with this ID at
- that time. */
- int e = pthread_kill (th, 0);
- if (e == 0)
- {
- puts ("pthread_kill succeeded");
- exit (1);
- }
- if (e != ESRCH)
- {
- puts ("pthread_kill didn't return ESRCH");
- exit (1);
- }
-
- return 0;
-}
-
-
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
--- /dev/null
+/* Test that pending signals are not delivered on thread exit (bug 28607).
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the
+ signal mask during during thread exit, triggering the delivery of a
+ blocked pending signal (SIGUSR1 in this test). */
+
+#include <support/xthread.h>
+#include <support/xsignal.h>
+
+static void *
+threadfunc (void *closure)
+{
+ sigset_t sigmask;
+ sigfillset (&sigmask);
+ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL);
+ xpthread_kill (pthread_self (), SIGUSR1);
+ pthread_exit (NULL);
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ pthread_t thr = xpthread_create (NULL, threadfunc, NULL);
+ xpthread_join (thr);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that raise sends signal to current thread even if blocked.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <signal.h>
+#include <support/check.h>
+#include <support/xsignal.h>
+#include <support/xthread.h>
+#include <pthread.h>
+#include <unistd.h>
+
+/* Used to create a dummy thread ID distinct from all other thread
+ IDs. */
+static void *
+noop (void *ignored)
+{
+ return NULL;
+}
+
+static volatile pthread_t signal_thread;
+
+static void
+signal_handler (int signo)
+{
+ signal_thread = pthread_self ();
+}
+
+/* Used to ensure that waiting_thread has launched and can accept
+ signals. */
+static pthread_barrier_t barrier;
+
+static void *
+waiting_thread (void *ignored)
+{
+ xpthread_barrier_wait (&barrier);
+ pause ();
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ xsignal (SIGUSR1, signal_handler);
+ xpthread_barrier_init (&barrier, NULL, 2);
+
+ /* Distinct thread ID value to */
+ pthread_t dummy = xpthread_create (NULL, noop, NULL);
+ signal_thread = dummy;
+
+ pthread_t helper = xpthread_create (NULL, waiting_thread, NULL);
+
+ /* Make sure that the thread is running. */
+ xpthread_barrier_wait (&barrier);
+
+ /* Block signals on this thread. */
+ sigset_t set;
+ sigfillset (&set);
+ xpthread_sigmask (SIG_BLOCK, &set, NULL);
+
+ /* Send the signal to this thread. It must not be delivered. */
+ raise (SIGUSR1);
+ TEST_VERIFY (signal_thread == dummy);
+
+ /* Wait a bit to give a chance for signal delivery (increases
+ chances of failure with bug 28407). */
+ usleep (50 * 1000);
+
+ /* Unblocking should cause synchronous delivery of the signal. */
+ xpthread_sigmask (SIG_UNBLOCK, &set, NULL);
+ TEST_VERIFY (signal_thread == pthread_self ());
+
+ xpthread_cancel (helper);
+ xpthread_join (helper);
+ xpthread_join (dummy);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that setuid, pthread_create, thread exit do not deadlock (bug 28361).
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <support/check.h>
+#include <support/xthread.h>
+#include <unistd.h>
+
+/* How many threads to launch during each iteration. */
+enum { threads = 4 };
+
+/* How many iterations to perform. This value seems to reproduce
+ bug 28361 in a bout one in three runs. */
+enum { iterations = 5000 };
+
+/* Cache of the real user ID used by setuid_thread. */
+static uid_t uid;
+
+/* Start routine for the threads. */
+static void *
+setuid_thread (void *closure)
+{
+ TEST_COMPARE (setuid (uid), 0);
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ /* The setxid machinery is still invoked even if the UID is
+ unchanged. (The kernel might reset other credentials as part of
+ the system call.) */
+ uid = getuid ();
+
+ for (int i = 0; i < iterations; ++i)
+ {
+ pthread_t thread_ids[threads];
+ for (int j = 0; j < threads; ++j)
+ thread_ids[j] = xpthread_create (NULL, setuid_thread, NULL);
+ for (int j = 0; j < threads; ++j)
+ xpthread_join (thread_ids[j]);
+ }
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that pthread_kill succeeds for an exited thread.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This test verifies that pthread_kill returns 0 (and not ESRCH) for
+ a thread that has exited on the kernel side. */
+
+#include <stddef.h>
+#include <support/support.h>
+#include <support/xthread.h>
+
+static void *
+noop_thread (void *closure)
+{
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ pthread_t thr = xpthread_create (NULL, noop_thread, NULL);
+
+ support_wait_for_thread_exit ();
+
+ xpthread_cancel (thr);
+ xpthread_join (thr);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that pthread_cancel succeeds during thread exit.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This test tries to trigger an internal race condition in
+ pthread_cancel, where the cancellation signal is sent after the
+ thread has begun the cancellation process. This can result in a
+ spurious ESRCH error. For the original bug 12889, the window is
+ quite small, so the bug was not reproduced in every run. */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xunistd.h>
+#include <sys/select.h>
+#include <unistd.h>
+
+/* Set to true by timeout_thread_function when the test should
+ terminate. */
+static bool timeout;
+
+static void *
+timeout_thread_function (void *unused)
+{
+ usleep (5 * 1000 * 1000);
+ __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
+ return NULL;
+}
+
+/* Used for blocking the select function below. */
+static int pipe_fds[2];
+
+static void *
+canceled_thread_function (void *unused)
+{
+ while (true)
+ {
+ fd_set rfs;
+ fd_set wfs;
+ fd_set efs;
+ FD_ZERO (&rfs);
+ FD_ZERO (&wfs);
+ FD_ZERO (&efs);
+ FD_SET (pipe_fds[0], &rfs);
+
+ /* If the cancellation request is recognized early, the thread
+ begins exiting while the cancellation signal arrives. */
+ select (FD_SETSIZE, &rfs, &wfs, &efs, NULL);
+ }
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ xpipe (pipe_fds);
+ pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
+
+ while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
+ {
+ pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL);
+ xpthread_cancel (thr);
+ TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED);
+ }
+
+ xpthread_join (thr_timeout);
+ xclose (pipe_fds[0]);
+ xclose (pipe_fds[1]);
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that pthread_kill succeeds for an exited thread.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This test verifies that the default pthread_kill returns 0 (and not
+ ESRCH) for a thread that has exited on the kernel side. */
+
+#include <errno.h>
+#include <pthread.h>
+#include <shlib-compat.h>
+#include <signal.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/support.h>
+#include <support/xthread.h>
+
+static void *
+noop_thread (void *closure)
+{
+ return NULL;
+}
+
+#if TEST_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_34) && PTHREAD_IN_LIBC
+extern __typeof (pthread_kill) compat_pthread_kill;
+compat_symbol_reference (libpthread, compat_pthread_kill, pthread_kill,
+ GLIBC_2_0);
+#endif
+
+static int
+do_test (void)
+{
+ pthread_t thr = xpthread_create (NULL, noop_thread, NULL);
+
+ support_wait_for_thread_exit ();
+
+ /* NB: Always uses the default symbol due to separate compilation. */
+ xpthread_kill (thr, SIGUSR1);
+
+#if TEST_COMPAT (libpthread, GLIBC_2_0, GLIBC_2_34) && PTHREAD_IN_LIBC
+ /* Old binaries need the non-conforming ESRCH error code. */
+ TEST_COMPARE (compat_pthread_kill (thr, SIGUSR1), ESRCH);
+#endif
+
+ xpthread_join (thr);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Test that pthread_kill succeeds during thread exit.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This test verifies that pthread_kill for a thread that is exiting
+ succeeds (with or without actually delivering the signal). */
+
+#include <array_length.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <support/xsignal.h>
+#include <support/xthread.h>
+#include <unistd.h>
+
+/* Set to true by timeout_thread_function when the test should
+ terminate. */
+static bool timeout;
+
+static void *
+timeout_thread_function (void *unused)
+{
+ usleep (1000 * 1000);
+ __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
+ return NULL;
+}
+
+/* Used to synchronize the sending threads with the target thread and
+ main thread. */
+static pthread_barrier_t barrier_1;
+static pthread_barrier_t barrier_2;
+
+/* The target thread to which signals are to be sent. */
+static pthread_t target_thread;
+
+/* Set by the main thread to true after timeout has been set to
+ true. */
+static bool exiting;
+
+static void *
+sender_thread_function (void *unused)
+{
+ while (true)
+ {
+ /* Wait until target_thread has been initialized. The target
+ thread and main thread participate in this barrier. */
+ xpthread_barrier_wait (&barrier_1);
+
+ if (exiting)
+ break;
+
+ xpthread_kill (target_thread, SIGUSR1);
+
+ /* Communicate that the signal has been sent. The main thread
+ participates in this barrier. */
+ xpthread_barrier_wait (&barrier_2);
+ }
+ return NULL;
+}
+
+static void *
+target_thread_function (void *unused)
+{
+ target_thread = pthread_self ();
+ xpthread_barrier_wait (&barrier_1);
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ xsignal (SIGUSR1, SIG_IGN);
+
+ pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
+
+ pthread_t threads[4];
+ xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2);
+ xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1);
+
+ for (int i = 0; i < array_length (threads); ++i)
+ threads[i] = xpthread_create (NULL, sender_thread_function, NULL);
+
+ while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
+ {
+ xpthread_create (NULL, target_thread_function, NULL);
+
+ /* Wait for the target thread to be set up and signal sending to
+ start. */
+ xpthread_barrier_wait (&barrier_1);
+
+ /* Wait for signal sending to complete. */
+ xpthread_barrier_wait (&barrier_2);
+
+ xpthread_join (target_thread);
+ }
+
+ exiting = true;
+
+ /* Signal the sending threads to exit. */
+ xpthread_create (NULL, target_thread_function, NULL);
+ xpthread_barrier_wait (&barrier_1);
+
+ for (int i = 0; i < array_length (threads); ++i)
+ xpthread_join (threads[i]);
+ xpthread_join (thr_timeout);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
#include <sys/asm.h>
#include <dl-tls.h>
#include <dl-irel.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
#ifndef _RTLD_PROLOGUE
# define _RTLD_PROLOGUE(entry) \
|| (__WORDSIZE == 64 && (type) == R_RISCV_TLS_TPREL64))) \
| (ELF_RTYPE_CLASS_COPY * ((type) == R_RISCV_COPY)))
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute_used__
elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
sll a3, a1, " STRINGXP (PTRLOG) "\n\
add a3, a3, a2\n\
add a3, a3, " STRINGXP (SZREG) "\n\
+ # Stash the stack pointer in s1.\n\
+ mv s1, sp\n\
+ # Align stack to 128 bits for the _dl_init call.\n\
+ andi sp, sp,-16\n\
# Call the function to run the initializers.\n\
jal _dl_init\n\
+ # Restore the stack pointer for _start.\n\
+ mv sp, s1\n\
# Pass our finalizer function to _start.\n\
lla a0, _dl_fini\n\
# Jump to the user entry point.\n\
by RELOC_ADDR. SYM is the relocation symbol specified by R_INFO and
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
void *const reloc_addr, int skip_ifunc)
{
ElfW(Addr) r_info = reloc->r_info;
const unsigned long int r_type = ELFW (R_TYPE) (r_info);
ElfW(Addr) *addr_field = (ElfW(Addr) *) reloc_addr;
const ElfW(Sym) *const __attribute__ ((unused)) refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
ElfW(Addr) value = 0;
if (sym_map != NULL)
value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend;
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr)
*(ElfW(Addr) *) reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map, ElfW(Addr) l_addr,
- const ElfW(Rela) *reloc, int skip_ifunc)
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+ ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
+ int skip_ifunc)
{
ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset);
const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
/* Set up the loaded object described by L so its stub function
will jump to the on-demand fixup code __dl_runtime_resolve. */
-auto inline int
+static inline int
__attribute__ ((always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
#ifndef RTLD_BOOTSTRAP
/* If using PLTs, fill in the first two entries of .got.plt. */
--- /dev/null
+/* Check if dynamic section should be relocated. RISC-V version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _DL_RELOCATE_LD_H
+#define _DL_RELOCATE_LD_H
+
+/* The dynamic section is readonly for ABI compatibility. */
+#define DL_RO_DYN_SECTION 1
+
+#endif /* _DL_RELOCATE_LD_H */
struct La_riscv_retval *, \
const char *);
-/* Although the RISC-V ABI does not specify that the dynamic section has
- to be read-only, it needs to be kept for ABI compatibility. */
-
-#define DL_RO_DYN_SECTION 1
-
#include_next <ldsodefs.h>
#endif
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_cap_flags
#else
-PROCINFO_CLASS const char _dl_s390_cap_flags[21][9]
+PROCINFO_CLASS const char _dl_s390_cap_flags[23][9]
#endif
#ifndef PROCINFO_DECL
= {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh",
"highgprs", "te", "vx", "vxd", "vxe", "gs", "vxe2", "vxp", "sort", "dflt",
- "vxp2", "nnpa"
+ "vxp2", "nnpa", "pcimio", "sie"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_platforms
#else
-PROCINFO_CLASS const char _dl_s390_platforms[10][7]
+PROCINFO_CLASS const char _dl_s390_platforms[11][7]
#endif
#ifndef PROCINFO_DECL
= {
- "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15"
+ "g5", "z900", "z990", "z9-109", "z10", "z196", "zEC12", "z13", "z14", "z15",
+ "z16"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
-#define _DL_HWCAP_COUNT 21
+#define _DL_HWCAP_COUNT 23
-#define _DL_PLATFORMS_COUNT 10
+#define _DL_PLATFORMS_COUNT 11
/* The kernel provides up to 32 capability bits with elf_hwcap. */
#define _DL_FIRST_PLATFORM 32
HWCAP_S390_DFLT = 1 << 18,
HWCAP_S390_VXRS_PDE2 = 1 << 19,
HWCAP_S390_NNPA = 1 << 20,
+ HWCAP_S390_PCI_MIO = 1 << 21,
+ HWCAP_S390_SIE = 1 << 22,
};
#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \
# error The arch13 variant of memmem needs the z13 variant of memmem!
# endif
clgfi %r5,9
- jh MEMMEM_Z13
+ jgh MEMMEM_Z13
aghik %r0,%r5,-1 /* vll needs highest index. */
bc 4,0(%r14) /* cc==1: return if needle-len == 0. */
#include <link.h>
#include <sysdeps/s390/dl-procinfo.h>
#include <dl-irel.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* This is an older, now obsolete value. */
#define EM_S390_OLD 0xA390
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (Elf32_Word);
extern void _dl_runtime_profile (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT
-/* The S390 never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
/* Only needed for R_390_COPY below. */
const Elf32_Sym *const refsym = sym;
#endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf32_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
basr %r1,0
5: l %r14,7f-5b(%r1)
la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_32_retval *
- bas %r14,0(%r14,%r1) # call _dl_call_pltexit
+ bas %r14,0(%r14,%r1) # call _dl_audit_pltexit
lr %r15,%r12 # remove stack frame
# undef FRAME_SIZE
br %r14
6: .long _dl_profile_fixup - 0b
-7: .long _dl_call_pltexit - 5b
+7: .long _dl_audit_pltexit - 5b
cfi_endproc
.size _dl_runtime_profile, .-_dl_runtime_profile
# undef SIZEOF_STRUCT_LA_S390_32_REGS
CFLAGS-dl-load.c += -Wno-unused
CFLAGS-dl-reloc.c += -Wno-unused
-$(objpfx)tst-glibc-hwcaps: $(objpfx)libmarkermod2-1.so \
- $(objpfx)libmarkermod3-1.so $(objpfx)libmarkermod4-1.so
+$(objpfx)tst-glibc-hwcaps: \
+ $(objpfx)libmarkermod2-1.so \
+ $(objpfx)libmarkermod3-1.so \
+ $(objpfx)libmarkermod4-1.so \
+ $(objpfx)libmarkermod5-1.so
$(objpfx)tst-glibc-hwcaps.out: \
$(objpfx)libmarkermod2.so \
$(objpfx)glibc-hwcaps/z13/libmarkermod2.so \
$(objpfx)glibc-hwcaps/z13/libmarkermod4.so \
$(objpfx)glibc-hwcaps/z14/libmarkermod4.so \
$(objpfx)glibc-hwcaps/z15/libmarkermod4.so \
+ $(objpfx)libmarkermod5.so \
+ $(objpfx)glibc-hwcaps/z13/libmarkermod5.so \
+ $(objpfx)glibc-hwcaps/z14/libmarkermod5.so \
+ $(objpfx)glibc-hwcaps/z15/libmarkermod5.so \
+ $(objpfx)glibc-hwcaps/z16/libmarkermod5.so
$(objpfx)glibc-hwcaps/z13/libmarkermod2.so: $(objpfx)libmarkermod2-2.so
$(make-target-directory)
$(objpfx)glibc-hwcaps/z15/libmarkermod4.so: $(objpfx)libmarkermod4-4.so
$(make-target-directory)
cp $< $@
+$(objpfx)glibc-hwcaps/z13/libmarkermod5.so: $(objpfx)libmarkermod5-2.so
+ $(make-target-directory)
+ cp $< $@
+$(objpfx)glibc-hwcaps/z14/libmarkermod5.so: $(objpfx)libmarkermod5-3.so
+ $(make-target-directory)
+ cp $< $@
+$(objpfx)glibc-hwcaps/z15/libmarkermod5.so: $(objpfx)libmarkermod5-4.so
+ $(make-target-directory)
+ cp $< $@
+$(objpfx)glibc-hwcaps/z16/libmarkermod5.so: $(objpfx)libmarkermod5-5.so
+ $(make-target-directory)
+ cp $< $@
+
ifeq (no,$(build-hardcoded-path-in-tests))
# This is an ld.so.cache test, and RPATH/RUNPATH in the executable
--- /dev/null
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/s390/s390-64.
+
+# Minimal checking for static PIE support in ld.
+# Compare to ld testcase/bugzilla:
+# <binutils-source>/ld/testsuite/ld-elf/pr22263-1.rd
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for s390-specific static PIE requirements" >&5
+$as_echo_n "checking for s390-specific static PIE requirements... " >&6; }
+if { as_var=\
+libc_cv_s390x_staticpie_req; eval \${$as_var+:} false; }; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest1.c <<EOF
+__thread int * foo;
+
+void
+bar (void)
+{
+ *foo = 1;
+}
+EOF
+ cat > conftest2.c <<EOF
+extern __thread int *foo;
+extern void bar (void);
+static int x;
+
+int
+main ()
+{
+ foo = &x;
+ return 0;
+}
+EOF
+ libc_cv_s390x_staticpie_req=no
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -fPIE -c conftest1.c -o conftest1.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } \
+ && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -fPIE -c conftest2.c -o conftest2.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } \
+ && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -pie -o conftest conftest1.o conftest2.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } \
+ && { ac_try='! readelf -Wr conftest | grep R_390_TLS_TPOFF'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ libc_cv_s390x_staticpie_req=yes
+ fi
+ rm -rf conftest.*
+fi
+eval ac_res=\$\
+libc_cv_s390x_staticpie_req
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+if test $libc_cv_s390x_staticpie_req = yes; then
+ # Static PIE is supported only on 64bit.
+ # Ensure you also have those patches for:
+ # - binutils (ld)
+ # - "[PR ld/22263] s390: Avoid dynamic TLS relocs in PIE"
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=26b1426577b5dcb32d149c64cca3e603b81948a9
+ # (Tested by configure check above)
+ # Otherwise there will be a R_390_TLS_TPOFF relocation, which fails to
+ # be processed in _dl_relocate_static_pie() as static TLS map is not setup.
+ # - "s390: Add DT_JMPREL pointing to .rela.[i]plt with static-pie"
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d942d8db12adf4c9e5c7d9ed6496a779ece7149e
+ # (We can't test it in configure as we are not able to link a static PIE
+ # executable if the system glibc lacks static PIE support)
+ # Otherwise there won't be DT_JMPREL, DT_PLTRELA, DT_PLTRELASZ entries
+ # and the IFUNC symbols are not processed, which leads to crashes.
+ #
+ # - kernel (the mentioned links to the commits belong to 5.19 merge window):
+ # - "s390/mmap: increase stack/mmap gap to 128MB"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=f2f47d0ef72c30622e62471903ea19446ea79ee2
+ # - "s390/vdso: move vdso mapping to its own function"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=57761da4dc5cd60bed2c81ba0edb7495c3c740b8
+ # - "s390/vdso: map vdso above stack"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=9e37a2e8546f9e48ea76c839116fa5174d14e033
+ # - "s390/vdso: add vdso randomization"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=41cd81abafdc4e58a93fcb677712a76885e3ca25
+ # (We can't test the kernel of the target system)
+ # Otherwise if /proc/sys/kernel/randomize_va_space is turned off (0),
+ # static PIE executables like ldconfig will crash. While startup sbrk is
+ # used to enlarge the HEAP. Unfortunately the underlying brk syscall fails
+ # as there is not enough space after the HEAP. Then the address of the TLS
+ # image is invalid and the following memcpy in __libc_setup_tls() leads
+ # to a segfault.
+ # If /proc/sys/kernel/randomize_va_space is activated (default: 2), there
+ # is enough space after HEAP.
+ #
+ # - glibc
+ # - "Linux: Define MMAP_CALL_INTERNAL"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=c1b68685d438373efe64e5f076f4215723004dfb
+ # - "i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=6e5c7a1e262961adb52443ab91bd2c9b72316402
+ # - "i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=60f0f2130d30cfd008ca39743027f1e200592dff
+ # - "ia64: Always define IA64_USE_NEW_STUB as a flag macro"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=18bd9c3d3b1b6a9182698c85354578d1d58e9d64
+ # - "Linux: Implement a useful version of _startup_fatal"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=a2a6bce7d7e52c1c34369a7da62c501cc350bc31
+ # - "Linux: Introduce __brk_call for invoking the brk system call"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=b57ab258c1140bc45464b4b9908713e3e0ee35aa
+ # - "csu: Implement and use _dl_early_allocate during static startup"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=f787e138aa0bf677bf74fa2a08595c446292f3d7
+ # The mentioned patch series by Florian Weimer avoids the mentioned failing
+ # sbrk syscall by falling back to mmap.
+ $as_echo "#define SUPPORT_STATIC_PIE 1" >>confdefs.h
+
+fi
--- /dev/null
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/s390/s390-64.
+
+# Minimal checking for static PIE support in ld.
+# Compare to ld testcase/bugzilla:
+# <binutils-source>/ld/testsuite/ld-elf/pr22263-1.rd
+AC_CACHE_CHECK([for s390-specific static PIE requirements], \
+[libc_cv_s390x_staticpie_req], [dnl
+ cat > conftest1.c <<EOF
+__thread int * foo;
+
+void
+bar (void)
+{
+ *foo = 1;
+}
+EOF
+ cat > conftest2.c <<EOF
+extern __thread int *foo;
+extern void bar (void);
+static int x;
+
+int
+main ()
+{
+ foo = &x;
+ return 0;
+}
+EOF
+ libc_cv_s390x_staticpie_req=no
+ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -fPIE -c conftest1.c -o conftest1.o]) \
+ && AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -fPIE -c conftest2.c -o conftest2.o]) \
+ && AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -pie -o conftest conftest1.o conftest2.o]) \
+ && AC_TRY_COMMAND([! readelf -Wr conftest | grep R_390_TLS_TPOFF])
+ then
+ libc_cv_s390x_staticpie_req=yes
+ fi
+ rm -rf conftest.*])
+if test $libc_cv_s390x_staticpie_req = yes; then
+ # Static PIE is supported only on 64bit.
+ # Ensure you also have those patches for:
+ # - binutils (ld)
+ # - "[PR ld/22263] s390: Avoid dynamic TLS relocs in PIE"
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=26b1426577b5dcb32d149c64cca3e603b81948a9
+ # (Tested by configure check above)
+ # Otherwise there will be a R_390_TLS_TPOFF relocation, which fails to
+ # be processed in _dl_relocate_static_pie() as static TLS map is not setup.
+ # - "s390: Add DT_JMPREL pointing to .rela.[i]plt with static-pie"
+ # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d942d8db12adf4c9e5c7d9ed6496a779ece7149e
+ # (We can't test it in configure as we are not able to link a static PIE
+ # executable if the system glibc lacks static PIE support)
+ # Otherwise there won't be DT_JMPREL, DT_PLTRELA, DT_PLTRELASZ entries
+ # and the IFUNC symbols are not processed, which leads to crashes.
+ #
+ # - kernel (the mentioned links to the commits belong to 5.19 merge window):
+ # - "s390/mmap: increase stack/mmap gap to 128MB"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=f2f47d0ef72c30622e62471903ea19446ea79ee2
+ # - "s390/vdso: move vdso mapping to its own function"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=57761da4dc5cd60bed2c81ba0edb7495c3c740b8
+ # - "s390/vdso: map vdso above stack"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=9e37a2e8546f9e48ea76c839116fa5174d14e033
+ # - "s390/vdso: add vdso randomization"
+ # https://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git/commit/?h=features&id=41cd81abafdc4e58a93fcb677712a76885e3ca25
+ # (We can't test the kernel of the target system)
+ # Otherwise if /proc/sys/kernel/randomize_va_space is turned off (0),
+ # static PIE executables like ldconfig will crash. While startup sbrk is
+ # used to enlarge the HEAP. Unfortunately the underlying brk syscall fails
+ # as there is not enough space after the HEAP. Then the address of the TLS
+ # image is invalid and the following memcpy in __libc_setup_tls() leads
+ # to a segfault.
+ # If /proc/sys/kernel/randomize_va_space is activated (default: 2), there
+ # is enough space after HEAP.
+ #
+ # - glibc
+ # - "Linux: Define MMAP_CALL_INTERNAL"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=c1b68685d438373efe64e5f076f4215723004dfb
+ # - "i386: Remove OPTIMIZE_FOR_GCC_5 from Linux libc-do-syscall.S"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=6e5c7a1e262961adb52443ab91bd2c9b72316402
+ # - "i386: Honor I386_USE_SYSENTER for 6-argument Linux system calls"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=60f0f2130d30cfd008ca39743027f1e200592dff
+ # - "ia64: Always define IA64_USE_NEW_STUB as a flag macro"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=18bd9c3d3b1b6a9182698c85354578d1d58e9d64
+ # - "Linux: Implement a useful version of _startup_fatal"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=a2a6bce7d7e52c1c34369a7da62c501cc350bc31
+ # - "Linux: Introduce __brk_call for invoking the brk system call"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=b57ab258c1140bc45464b4b9908713e3e0ee35aa
+ # - "csu: Implement and use _dl_early_allocate during static startup"
+ # https://sourceware.org/git/?p=glibc.git;a=commit;h=f787e138aa0bf677bf74fa2a08595c446292f3d7
+ # The mentioned patch series by Florian Weimer avoids the mentioned failing
+ # sbrk syscall by falling back to mmap.
+ AC_DEFINE(SUPPORT_STATIC_PIE)
+fi
dl_hwcap_check (void)
{
#if defined __ARCH__
-# if __ARCH__ >= 13
+# if __ARCH__ >= 14
+ if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_PDE2))
+ _dl_fatal_printf ("\
+Fatal glibc error: CPU lacks VXRS_PDE2 support (z16 or later required)\n");
+# elif __ARCH__ >= 13
if (!(GLRO(dl_hwcap) & HWCAP_S390_VXRS_EXT2))
_dl_fatal_printf ("\
Fatal glibc error: CPU lacks VXRS_EXT2 support (z15 or later required)\n");
#include <dl-hwcaps.h>
#include <ldsodefs.h>
-const char _dl_hwcaps_subdirs[] = "z15:z14:z13";
-enum { subdirs_count = 3 }; /* Number of components in _dl_hwcaps_subdirs. */
+const char _dl_hwcaps_subdirs[] = "z16:z15:z14:z13";
+enum { subdirs_count = 4 }; /* Number of components in _dl_hwcaps_subdirs. */
uint32_t
_dl_hwcaps_subdirs_active (void)
return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
++active;
+ /* z16.
+ Note: We do not list HWCAP_S390_NNPA here as, according to the Principles of
+ Operation, those instructions may be replaced or removed in future. */
+ if (!(GLRO (dl_hwcap) & HWCAP_S390_VXRS_PDE2))
+ return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
+ ++active;
+
return _dl_hwcaps_subdirs_build_bitmask (subdirs_count, active);
}
#include <link.h>
#include <sysdeps/s390/dl-procinfo.h>
#include <dl-irel.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
#define ELF_MACHINE_IRELATIVE R_390_IRELATIVE
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
extern void _dl_runtime_resolve (Elf64_Word);
extern void _dl_runtime_profile (Elf64_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_390_JMP_SLOT
-/* The 64 bit S/390 never uses Elf64_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization functions. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
- const Elf64_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf64_Rela *reloc, const Elf64_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf64_Addr *const reloc_addr = reloc_addr_arg;
/* Only needed for R_390_COPY below. */
const Elf64_Sym *const refsym = sym;
#endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
Elf64_Addr value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
void *const reloc_addr_arg)
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf64_Addr l_addr, const Elf64_Rela *reloc,
int skip_ifunc)
{
lmg %r2,%r4,CFA_OFF+PLT1_OFF(%r12) # r2, r3: args saved by PLT
# r4: struct La_s390_64_regs *
la %r5,CFA_OFF+RETVAL_OFF(%r12) # struct La_s390_64_retval *
- brasl %r14,_dl_call_pltexit
+ brasl %r14,_dl_audit_pltexit
lgr %r15,%r12 # remove stack frame
# undef FRAME_SIZE
/* Ok, now branch to the libc main routine. */
#ifdef PIC
+# ifdef SHARED
+ /* Used for dynamic linked position independent executable.
+ => Scrt1.o */
larl %r2,main@GOTENT # load pointer to main
lg %r2,0(%r2)
+# else
+ /* Used for dynamic linked position dependent executable.
+ => crt1.o (glibc configured without --disable-default-pie:
+ PIC is defined)
+ Or for static linked position independent executable.
+ => rcrt1.o (only available if glibc configured without
+ --disable-default-pie: PIC is defined) */
+ larl %r2,__wrap_main
+# endif
brasl %r14,__libc_start_main@plt
#else
+ /* Used for dynamic/static linked position dependent executable.
+ => crt1.o (glibc configured with --disable-default-pie:
+ PIC and SHARED are not defined) */
larl %r2,main # load pointer to main
brasl %r14,__libc_start_main
#endif
cfi_endproc
+#if defined PIC && !defined SHARED
+ /* When main is not defined in the executable but in a shared library
+ then a wrapper is needed in crt1.o of the static-pie enabled libc,
+ because crt1.o and rcrt1.o share code and the later must avoid the
+ use of GOT relocations before __libc_start_main is called. */
+__wrap_main:
+ cfi_startproc
+ larl %r1,main@GOTENT # load pointer to main
+ lg %r1,0(%r1)
+ br %r1
+ cfi_endproc
+#endif
+
/* Define a symbol for the first piece of initialized data. */
.data
.globl __data_start
extern int marker2 (void);
extern int marker3 (void);
extern int marker4 (void);
+extern int marker5 (void);
/* Return the arch level, 10 for the baseline libmarkermod*.so's. */
static int
return 12;
if (strcmp (platform, "z15") == 0)
return 13;
+ if (strcmp (platform, "z16") == 0)
+ return 14;
printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform);
- /* Assume that the new platform supports z15. */
- return 13;
+ /* Assume that the new platform supports z16. */
+ return 14;
}
static int
TEST_COMPARE (marker2 (), MIN (level - 9, 2));
TEST_COMPARE (marker3 (), MIN (level - 9, 3));
TEST_COMPARE (marker4 (), MIN (level - 9, 4));
+ TEST_COMPARE (marker5 (), MIN (level - 9, 5));
return 0;
}
# error The arch13 variant of strstr needs the z13 variant of strstr!
# endif
clgfi %r4,9
- jh STRSTR_Z13
+ jgh STRSTR_Z13
/* In case of a partial match, the vstrs instruction returns the index
of the partial match in a vector-register. Then we have to
#include <sys/param.h>
#include <sysdep.h>
#include <assert.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused, always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *got;
extern void _dl_runtime_resolve (Elf32_Word);
#endif /* !dl_machine_h */
-/* SH never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
#ifdef RESOLVE_MAP
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
else
{
const Elf32_Sym *const refsym = sym;
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
value += reloc->r_addend;
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
#undef COPY_UNALIGNED_WORD
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
.align 2
#ifdef SHARED
7: .long _GLOBAL_OFFSET_TABLE_
-8: .long _dl_call_pltexit@GOTOFF
+8: .long _dl_audit_pltexit@GOTOFF
#else
-8: .long _dl_call_pltexit
+8: .long _dl_audit_pltexit
#endif
.size _dl_runtime_profile, .-_dl_runtime_profile
ldouble: 4
Function: "tgamma_downward":
-double: 8
+double: 9
float: 7
ldouble: 5
#include <tls.h>
#include <dl-plt.h>
#include <elf/dl-hwcaps.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf32_Addr *plt;
extern void _dl_runtime_resolve (Elf32_Word);
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT
-/* The SPARC never uses Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Undo the sub %sp, 6*4, %sp; add %sp, 22*4, %o0 below to get at the
value we want in __libc_stack_end. */
#define DL_STACK_END(cookie) \
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
- const Elf32_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf32_Rela *reloc, const Elf32_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf32_Addr *const reloc_addr = reloc_addr_arg;
}
else
{
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
}
#else
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
void *const reloc_addr_arg)
*reloc_addr += l_addr + reloc->r_addend;
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf32_Addr l_addr, const Elf32_Rela *reloc,
int skip_ifunc)
{
mov %l5, %o0
mov %l6, %o1
add %sp, (11 * 8), %o2
- call _dl_call_pltexit
+ call _dl_audit_pltexit
add %sp, ( 9 * 8), %o3
ldd [%sp + ( 9 * 8)], %i0
#include <ldsodefs.h>
#include <sysdep.h>
#include <dl-plt.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
#define ELF64_R_TYPE_ID(info) ((info) & 0xff)
#define ELF64_R_TYPE_DATA(info) ((info) >> 8)
/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
#define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT
-/* The SPARC never uses Elf64_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* Set up the loaded object described by L so its unrelocated PLT
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
if (l->l_info[DT_JMPREL] && lazy)
{
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
- const Elf64_Sym *sym, const struct r_found_version *version,
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf64_Rela *reloc, const Elf64_Sym *sym,
+ const struct r_found_version *version,
void *const reloc_addr_arg, int skip_ifunc)
{
Elf64_Addr *const reloc_addr = reloc_addr_arg;
}
else
{
- sym_map = RESOLVE_MAP (&sym, version, r_type);
+ sym_map = RESOLVE_MAP (map, scope, &sym, version, r_type);
value = SYMBOL_ADDRESS (sym_map, sym, true);
}
#else
}
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
void *const reloc_addr_arg)
*reloc_addr = l_addr + reloc->r_addend;
}
-auto inline void
+static inline void
__attribute__ ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
Elf64_Addr l_addr, const Elf64_Rela *reloc,
int skip_ifunc)
{
mov %l5, %o0
mov %l6, %o1
add %sp, STACK_BIAS + (24 * 8), %o2
- call _dl_call_pltexit
+ call _dl_audit_pltexit
add %sp, STACK_BIAS + (16 * 8), %o3
ldx [%sp + STACK_BIAS + (16 * 8)], %i0
tst-close_range \
tst-prctl \
tst-scm_rights \
+ tst-getauxval \
# tests
# Test for the symbol version of fcntl that was replaced in glibc 2.28.
CFLAGS-recvmmsg.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-sendmmsg.c = -fexceptions -fasynchronous-unwind-tables
+tests += \
+ tst-socket-timestamp \
+ tst-socket-timestamp-compat \
+ # tests
+tests-time64 += \
+ tst-socket-timestamp-time64 \
+ tst-socket-timestamp-compat-time64
+ # tests-time64
+
tests-special += $(objpfx)tst-socket-consts.out
$(objpfx)tst-socket-consts.out: ../sysdeps/unix/sysv/linux/tst-socket-consts.py
PYTHONPATH=../scripts \
sysdep_headers += bits/fcntl-linux.h
-tests += tst-fallocate tst-fallocate64 tst-o_path-locks
+tests += \
+ tst-fallocate \
+ tst-fallocate64 \
+ tst-getcwd-smallbuff \
+ tst-o_path-locks \
+# tests
endif
ifeq ($(subdir),elf)
#define __NR_fsync 82
#define __NR_ftruncate 46
#define __NR_futex 98
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_mbind 235
#define __NR_membarrier 283
#define __NR_memfd_create 279
+#define __NR_memfd_secret 447
#define __NR_migrate_pages 238
#define __NR_mincore 232
#define __NR_mkdirat 34
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6 72
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_tid_address 96
#define __NR_setdomainname 162
#define HWCAP2_RNG (1 << 16)
#define HWCAP2_BTI (1 << 17)
#define HWCAP2_MTE (1 << 18)
+#define HWCAP2_ECV (1 << 19)
+#define HWCAP2_AFP (1 << 20)
+#define HWCAP2_RPRES (1 << 21)
GLIBC_2.17 __tls_get_addr F
GLIBC_2.17 _dl_mcount F
GLIBC_2.17 _r_debug D 0x28
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_fsync 95
#define __NR_ftruncate 130
#define __NR_futex 394
+#define __NR_futex_waitv 559
#define __NR_futimesat 454
#define __NR_get_kernel_syms 309
#define __NR_get_mempolicy 430
#define __NR_preadv2 520
#define __NR_prlimit64 496
#define __NR_process_madvise 550
+#define __NR_process_mrelease 558
#define __NR_process_vm_readv 504
#define __NR_process_vm_writev 505
#define __NR_pselect6 463
#define __NR_pwritev2 521
#define __NR_query_module 347
#define __NR_quotactl 148
+#define __NR_quotactl_fd 553
#define __NR_read 3
#define __NR_readahead 379
#define __NR_readlink 58
#define __NR_sendmsg 114
#define __NR_sendto 133
#define __NR_set_mempolicy 431
+#define __NR_set_mempolicy_home_node 560
#define __NR_set_robust_list 466
#define __NR_set_tid_address 411
#define __NR_setdomainname 166
+++ /dev/null
-/* Change data segment size. Linux/Alpha.
- Copyright (C) 2020-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <errno.h>
-#include <unistd.h>
-#include <sysdep.h>
-
-void *__curbrk = 0;
-
-int
-__brk (void *addr)
-{
- /* Alpha brk returns -ENOMEM in case of failure. */
- __curbrk = (void *) INTERNAL_SYSCALL_CALL (brk, addr);
- if ((unsigned long) __curbrk == -ENOMEM)
- {
- __set_errno (ENOMEM);
- return -1;
- }
-
- return 0;
-}
-weak_alias (__brk, brk)
--- /dev/null
+/* Invoke the brk system call. Alpha version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+static inline void *
+__brk_call (void *addr)
+{
+ unsigned long int result = INTERNAL_SYSCALL_CALL (brk, addr);
+ if (result == -ENOMEM)
+ /* Mimic the generic error reporting behavior. */
+ result = INTERNAL_SYSCALL_CALL (brk, 0);
+ return (void *) result;
+}
extern long __libc_alpha_cache_shape[4];
-#define DL_PLATFORM_AUXV \
- case AT_L1I_CACHESHAPE: \
- __libc_alpha_cache_shape[0] = av->a_un.a_val; \
- break; \
- case AT_L1D_CACHESHAPE: \
- __libc_alpha_cache_shape[1] = av->a_un.a_val; \
- break; \
- case AT_L2_CACHESHAPE: \
- __libc_alpha_cache_shape[2] = av->a_un.a_val; \
- break; \
- case AT_L3_CACHESHAPE: \
- __libc_alpha_cache_shape[3] = av->a_un.a_val; \
- break;
+#define DL_PLATFORM_AUXV \
+ __libc_alpha_cache_shape[0] = auxv_values[AT_L1I_CACHESHAPE]; \
+ __libc_alpha_cache_shape[1] = auxv_values[AT_L1D_CACHESHAPE]; \
+ __libc_alpha_cache_shape[2] = auxv_values[AT_L2_CACHESHAPE]; \
+ __libc_alpha_cache_shape[3] = auxv_values[AT_L3_CACHESHAPE];
GLIBC_2.1 __libc_stack_end D 0x8
GLIBC_2.1 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x8
#define __NR_fsync 82
#define __NR_ftruncate64 46
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6_time64 413
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_tid_address 96
#define __NR_setdomainname 162
GLIBC_2.32 __tls_get_addr F
GLIBC_2.32 _dl_mcount F
GLIBC_2.32 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_ftruncate64 194
#define __NR_futex 240
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 326
#define __NR_get_mempolicy 320
#define __NR_get_robust_list 339
#define __NR_preadv2 392
#define __NR_prlimit64 369
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 376
#define __NR_process_vm_writev 377
#define __NR_pselect6 335
#define __NR_pwritev 362
#define __NR_pwritev2 393
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 225
#define __NR_readlink 85
#define __NR_sendmsg 296
#define __NR_sendto 290
#define __NR_set_mempolicy 321
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 338
#define __NR_set_tid_address 256
#define __NR_set_tls 983045
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __libc_stack_end D 0x4
GLIBC_2.4 __stack_chk_guard D 0x4
GLIBC_2.4 __tls_get_addr F
/* Store FP regs. Much of the FP code is copied from arm/setjmp.S. */
-#ifdef PIC
+#ifdef SHARED
ldr r2, 1f
ldr r1, .Lrtld_global_ro
0: add r2, pc, r2
END(__getcontext)
-#ifdef PIC
+#ifdef SHARED
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __libc_stack_end D 0x4
GLIBC_2.4 __stack_chk_guard D 0x4
GLIBC_2.4 __tls_get_addr F
add r0, r0, #UCONTEXT_REGSPACE
/* Restore the VFP registers. Copied from arm/__longjmp.S. */
-#ifdef PIC
+#ifdef SHARED
ldr r2, 1f
ldr r1, .Lrtld_global_ro
0: add r2, pc, r2
.fnend
END(__startcontext)
-#ifdef PIC
+#ifdef SHARED
1: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS
.Lrtld_global_ro:
.long C_SYMBOL_NAME(_rtld_global_ro)(GOT)
# define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK. */
# define MADV_COLD 20 /* Deactivate these pages. */
# define MADV_PAGEOUT 21 /* Reclaim these pages. */
+# define MADV_POPULATE_READ 22 /* Populate (prefault) page tables
+ readable. */
+# define MADV_POPULATE_WRITE 23 /* Populate (prefault) page tables
+ writable. */
# define MADV_HWPOISON 100 /* Poison a page for testing. */
#endif
#define PF_QIPCRTR 42 /* Qualcomm IPC Router. */
#define PF_SMC 43 /* SMC sockets. */
#define PF_XDP 44 /* XDP sockets. */
-#define PF_MAX 45 /* For now.. */
+#define PF_MCTP 45 /* Management component transport protocol. */
+#define PF_MAX 46 /* For now.. */
/* Address families. */
#define AF_UNSPEC PF_UNSPEC
#define AF_QIPCRTR PF_QIPCRTR
#define AF_SMC PF_SMC
#define AF_XDP PF_XDP
+#define AF_MCTP PF_MCTP
#define AF_MAX PF_MAX
/* Socket level values. Others are defined in the appropriate headers.
#define SOL_KCM 281
#define SOL_TLS 282
#define SOL_XDP 283
+#define SOL_MPTCP 284
+#define SOL_MCTP 285
/* Maximum queue length specifiable by listen. */
#define SOMAXCONN 4096
+ CMSG_ALIGN (sizeof (struct cmsghdr)))
#define CMSG_LEN(len) (CMSG_ALIGN (sizeof (struct cmsghdr)) + (len))
+/* Given a length, return the additional padding necessary such that
+ len + __CMSG_PADDING(len) == CMSG_ALIGN (len). */
+#define __CMSG_PADDING(len) ((sizeof (size_t) \
+ - ((len) & (sizeof (size_t) - 1))) \
+ & (sizeof (size_t) - 1))
+
extern struct cmsghdr *__cmsg_nxthdr (struct msghdr *__mhdr,
struct cmsghdr *__cmsg) __THROW;
#ifdef __USE_EXTERN_INLINES
_EXTERN_INLINE struct cmsghdr *
__NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg))
{
+ /* We may safely assume that __cmsg lies between __mhdr->msg_control and
+ __mhdr->msg_controllen because the user is required to obtain the first
+ cmsg via CMSG_FIRSTHDR, set its length, then obtain subsequent cmsgs
+ via CMSG_NXTHDR, setting lengths along the way. However, we don't yet
+ trust the value of __cmsg->cmsg_len and therefore do not use it in any
+ pointer arithmetic until we check its value. */
+
+ unsigned char * __msg_control_ptr = (unsigned char *) __mhdr->msg_control;
+ unsigned char * __cmsg_ptr = (unsigned char *) __cmsg;
+
+ size_t __size_needed = sizeof (struct cmsghdr)
+ + __CMSG_PADDING (__cmsg->cmsg_len);
+
+ /* The current header is malformed, too small to be a full header. */
if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
- /* The kernel header does this so there may be a reason. */
return (struct cmsghdr *) 0;
+ /* There isn't enough space between __cmsg and the end of the buffer to
+ hold the current cmsg *and* the next one. */
+ if (((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr)
+ < __size_needed)
+ || ((size_t)
+ (__msg_control_ptr + __mhdr->msg_controllen - __cmsg_ptr
+ - __size_needed)
+ < __cmsg->cmsg_len))
+
+ return (struct cmsghdr *) 0;
+
+ /* Now, we trust cmsg_len and can use it to find the next header. */
__cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg
+ CMSG_ALIGN (__cmsg->cmsg_len));
- if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control
- + __mhdr->msg_controllen)
- || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)
- > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)))
- /* No more entries. */
- return (struct cmsghdr *) 0;
return __cmsg;
}
#endif /* Use `extern inline'. */
struct timex
{
-# ifdef __USE_TIME_BITS64
+# if defined __USE_TIME_BITS64 || (__TIMESIZE == 64 && __WORDSIZE == 32)
unsigned int modes; /* mode selector */
int :32; /* pad */
long long offset; /* time offset (usec) */
#include <errno.h>
#include <unistd.h>
#include <sysdep.h>
+#include <brk_call.h>
/* This must be initialized data because commons can't have aliases. */
void *__curbrk = 0;
int
__brk (void *addr)
{
- __curbrk = (void *) INTERNAL_SYSCALL_CALL (brk, addr);
+ __curbrk = __brk_call (addr);
if (__curbrk < addr)
{
__set_errno (ENOMEM);
--- /dev/null
+/* Invoke the brk system call. Generic Linux version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+static inline void *
+__brk_call (void *addr)
+{
+ /* The default implementation reports errors through an unchanged
+ break. */
+ return (void *) INTERNAL_SYSCALL_CALL (brk, addr);
+}
{
struct timespec tp32;
r = INTERNAL_VSYSCALL_CALL (vdso_time, 2, clock_id, &tp32);
- if (r == 0 && tp32.tv_sec > 0)
+ if (r == 0 && tp32.tv_sec >= 0)
{
*tp = valid_timespec_to_timespec64 (tp32);
return 0;
struct cmsghdr *
__cmsg_nxthdr (struct msghdr *mhdr, struct cmsghdr *cmsg)
{
+ /* We may safely assume that cmsg lies between mhdr->msg_control and
+ mhdr->msg_controllen because the user is required to obtain the first
+ cmsg via CMSG_FIRSTHDR, set its length, then obtain subsequent cmsgs
+ via CMSG_NXTHDR, setting lengths along the way. However, we don't yet
+ trust the value of cmsg->cmsg_len and therefore do not use it in any
+ pointer arithmetic until we check its value. */
+
+ unsigned char * msg_control_ptr = (unsigned char *) mhdr->msg_control;
+ unsigned char * cmsg_ptr = (unsigned char *) cmsg;
+
+ size_t size_needed = sizeof (struct cmsghdr)
+ + __CMSG_PADDING (cmsg->cmsg_len);
+
+ /* The current header is malformed, too small to be a full header. */
if ((size_t) cmsg->cmsg_len < sizeof (struct cmsghdr))
- /* The kernel header does this so there may be a reason. */
- return NULL;
+ return (struct cmsghdr *) 0;
+
+ /* There isn't enough space between cmsg and the end of the buffer to
+ hold the current cmsg *and* the next one. */
+ if (((size_t)
+ (msg_control_ptr + mhdr->msg_controllen - cmsg_ptr)
+ < size_needed)
+ || ((size_t)
+ (msg_control_ptr + mhdr->msg_controllen - cmsg_ptr
+ - size_needed)
+ < cmsg->cmsg_len))
+
+ return (struct cmsghdr *) 0;
+ /* Now, we trust cmsg_len and can use it to find the next header. */
cmsg = (struct cmsghdr *) ((unsigned char *) cmsg
+ CMSG_ALIGN (cmsg->cmsg_len));
- if ((unsigned char *) (cmsg + 1) > ((unsigned char *) mhdr->msg_control
- + mhdr->msg_controllen)
- || ((unsigned char *) cmsg + CMSG_ALIGN (cmsg->cmsg_len)
- > ((unsigned char *) mhdr->msg_control + mhdr->msg_controllen)))
- /* No more entries. */
- return NULL;
return cmsg;
}
libc_hidden_def (__cmsg_nxthdr)
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <kernel-features.h>
+#include <bits/wordsize.h>
+#include <bits/timesize.h>
-#ifndef __ASSUME_TIME64_SYSCALLS
+#if __TIMESIZE != 64
# include <stdint.h>
# include <string.h>
# include <sys/socket.h>
cmsg != NULL;
cmsg = CMSG_NXTHDR (msg, cmsg))
{
+ last = cmsg;
+
if (cmsg->cmsg_level != SOL_SOCKET)
continue;
tvts[1] = tmp[1];
break;
}
-
- last = cmsg;
}
- if (last == NULL || type == 0)
+ if (type == 0)
return;
if (CMSG_SPACE (sizeof tvts) > msgsize - msg->msg_controllen)
return;
}
+ /* Zero memory for the new cmsghdr, so reading cmsg_len field
+ by CMSG_NXTHDR does not trigger UB. */
+ memset (msg->msg_control + msg->msg_controllen, 0,
+ CMSG_SPACE (sizeof tvts));
msg->msg_controllen += CMSG_SPACE (sizeof tvts);
- cmsg = CMSG_NXTHDR(msg, last);
- if (cmsg == NULL)
- return;
+ cmsg = CMSG_NXTHDR (msg, last);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = type;
cmsg->cmsg_len = CMSG_LEN (sizeof tvts);
#define __NR_ftruncate64 46
#define __NR_futex 98
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6 72
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_thread_area 244
#define __NR_set_tid_address 96
GLIBC_2.29 __tls_get_addr F
GLIBC_2.29 _dl_mcount F
GLIBC_2.29 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
--- /dev/null
+/* Early memory allocation for the dynamic loader. Generic version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Mark symbols hidden in static PIE for early self relocation to work. */
+#if BUILD_PIE_DEFAULT
+# pragma GCC visibility push(hidden)
+#endif
+#include <startup.h>
+
+#include <ldsodefs.h>
+#include <stddef.h>
+#include <string.h>
+#include <sysdep.h>
+#include <unistd.h>
+
+#include <brk_call.h>
+#include <mmap_call.h>
+
+/* Defined in brk.c. */
+extern void *__curbrk;
+
+void *
+_dl_early_allocate (size_t size)
+{
+ void *result;
+
+ if (__curbrk != NULL)
+ /* If the break has been initialized, brk must have run before,
+ so just call it once more. */
+ {
+ result = __sbrk (size);
+ if (result == (void *) -1)
+ result = NULL;
+ }
+ else
+ {
+ /* If brk has not been invoked, there is no need to update
+ __curbrk. The first call to brk will take care of that. */
+ void *previous = __brk_call (0);
+ result = __brk_call (previous + size);
+ if (result == previous)
+ result = NULL;
+ else
+ result = previous;
+ }
+
+ /* If brk fails, fall back to mmap. This can happen due to
+ unfortunate ASLR layout decisions and kernel bugs, particularly
+ for static PIE. */
+ if (result == NULL)
+ {
+ long int ret;
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#ifdef __NR_mmap2
+ ret = MMAP_CALL_INTERNAL (mmap2, 0, size, prot, flags, -1, 0);
+#else
+ ret = MMAP_CALL_INTERNAL (mmap, 0, size, prot, flags, -1, 0);
+#endif
+ if (INTERNAL_SYSCALL_ERROR_P (ret))
+ result = NULL;
+ else
+ result = (void *) ret;
+ }
+
+ return result;
+}
--- /dev/null
+/* Parse the Linux auxiliary vector.
+ Copyright (C) 1995-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <elf.h>
+#include <entry.h>
+#include <fpu_control.h>
+#include <ldsodefs.h>
+#include <link.h>
+
+typedef ElfW(Addr) dl_parse_auxv_t[AT_MINSIGSTKSZ + 1];
+
+/* Copy the auxiliary vector into AUXV_VALUES and set up GLRO
+ variables. */
+static inline
+void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values)
+{
+ auxv_values[AT_ENTRY] = (ElfW(Addr)) ENTRY_POINT;
+ auxv_values[AT_PAGESZ] = EXEC_PAGESIZE;
+ auxv_values[AT_FPUCW] = _FPU_DEFAULT;
+
+ /* NB: Default to a constant CONSTANT_MINSIGSTKSZ. */
+ _Static_assert (__builtin_constant_p (CONSTANT_MINSIGSTKSZ),
+ "CONSTANT_MINSIGSTKSZ is constant");
+ auxv_values[AT_MINSIGSTKSZ] = CONSTANT_MINSIGSTKSZ;
+
+ for (; av->a_type != AT_NULL; av++)
+ if (av->a_type <= AT_MINSIGSTKSZ)
+ auxv_values[av->a_type] = av->a_un.a_val;
+
+ GLRO(dl_pagesize) = auxv_values[AT_PAGESZ];
+ __libc_enable_secure = auxv_values[AT_SECURE];
+ GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM];
+ GLRO(dl_hwcap) = auxv_values[AT_HWCAP];
+ GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2];
+ GLRO(dl_clktck) = auxv_values[AT_CLKTCK];
+ GLRO(dl_fpu_control) = auxv_values[AT_FPUCW];
+ _dl_random = (void *) auxv_values[AT_RANDOM];
+ GLRO(dl_minsigstacksize) = auxv_values[AT_MINSIGSTKSZ];
+ GLRO(dl_sysinfo_dso) = (void *) auxv_values[AT_SYSINFO_EHDR];
+#ifdef NEED_DL_SYSINFO
+ if (GLRO(dl_sysinfo_dso) != NULL)
+ GLRO(dl_sysinfo) = auxv_values[AT_SYSINFO];
+#endif
+
+ DL_PLATFORM_AUXV
+}
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-/* Linux needs some special initialization, but otherwise uses
- the generic dynamic linker system interface code. */
-
-#include <string.h>
+#include <_itoa.h>
+#include <assert.h>
+#include <dl-auxv.h>
+#include <dl-osinfo.h>
+#include <dl-parse_auxv.h>
+#include <dl-procinfo.h>
+#include <dl-tunables.h>
+#include <elf.h>
+#include <errno.h>
#include <fcntl.h>
-#include <unistd.h>
-#include <sys/param.h>
-#include <sys/utsname.h>
#include <ldsodefs.h>
+#include <libc-internal.h>
+#include <libintl.h>
#include <not-cancel.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <tls.h>
+#include <unistd.h>
+
+#include <dl-machine.h>
+#include <dl-hwcap-check.h>
#ifdef SHARED
-# define DL_SYSDEP_INIT frob_brk ()
+extern char **_environ attribute_hidden;
+extern char _end[] attribute_hidden;
+
+/* Protect SUID program against misuse of file descriptors. */
+extern void __libc_check_standard_fds (void);
+
+int __libc_enable_secure attribute_relro = 0;
+rtld_hidden_data_def (__libc_enable_secure)
+/* This variable contains the lowest stack address ever used. */
+void *__libc_stack_end attribute_relro = NULL;
+rtld_hidden_data_def(__libc_stack_end)
+void *_dl_random attribute_relro = NULL;
+
+#ifndef DL_STACK_END
+# define DL_STACK_END(cookie) ((void *) (cookie))
+#endif
-static inline void
-frob_brk (void)
+/* Arguments passed to dl_main. */
+struct dl_main_arguments
{
- __brk (0); /* Initialize the break. */
+ const ElfW(Phdr) *phdr;
+ ElfW(Word) phnum;
+ ElfW(Addr) user_entry;
+};
+
+/* Separate function, so that dl_main can be called without the large
+ array on the stack. */
+static void
+_dl_sysdep_parse_arguments (void **start_argptr,
+ struct dl_main_arguments *args)
+{
+ _dl_argc = (intptr_t) *start_argptr;
+ _dl_argv = (char **) (start_argptr + 1); /* Necessary aliasing violation. */
+ _environ = _dl_argv + _dl_argc + 1;
+ for (char **tmp = _environ; ; ++tmp)
+ if (*tmp == NULL)
+ {
+ /* Another necessary aliasing violation. */
+ GLRO(dl_auxv) = (ElfW(auxv_t) *) (tmp + 1);
+ break;
+ }
+
+ dl_parse_auxv_t auxv_values = { 0, };
+ _dl_parse_auxv (GLRO(dl_auxv), auxv_values);
+
+ args->phdr = (const ElfW(Phdr) *) auxv_values[AT_PHDR];
+ args->phnum = auxv_values[AT_PHNUM];
+ args->user_entry = auxv_values[AT_ENTRY];
}
-# include <elf/dl-sysdep.c>
+ElfW(Addr)
+_dl_sysdep_start (void **start_argptr,
+ void (*dl_main) (const ElfW(Phdr) *phdr, ElfW(Word) phnum,
+ ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv))
+{
+ __libc_stack_end = DL_STACK_END (start_argptr);
+
+ struct dl_main_arguments dl_main_args;
+ _dl_sysdep_parse_arguments (start_argptr, &dl_main_args);
+
+ dl_hwcap_check ();
+
+ __tunables_init (_environ);
+
+ __brk (0); /* Initialize the break. */
+
+#ifdef DL_PLATFORM_INIT
+ DL_PLATFORM_INIT;
#endif
+ /* Determine the length of the platform name. */
+ if (GLRO(dl_platform) != NULL)
+ GLRO(dl_platformlen) = strlen (GLRO(dl_platform));
+
+ if (__sbrk (0) == _end)
+ /* The dynamic linker was run as a program, and so the initial break
+ starts just after our bss, at &_end. The malloc in dl-minimal.c
+ will consume the rest of this page, so tell the kernel to move the
+ break up that far. When the user program examines its break, it
+ will see this new value and not clobber our data. */
+ __sbrk (GLRO(dl_pagesize)
+ - ((_end - (char *) 0) & (GLRO(dl_pagesize) - 1)));
+
+ /* If this is a SUID program we make sure that FDs 0, 1, and 2 are
+ allocated. If necessary we are doing it ourself. If it is not
+ possible we stop the program. */
+ if (__builtin_expect (__libc_enable_secure, 0))
+ __libc_check_standard_fds ();
+
+ (*dl_main) (dl_main_args.phdr, dl_main_args.phnum,
+ &dl_main_args.user_entry, GLRO(dl_auxv));
+ return dl_main_args.user_entry;
+}
+
+void
+_dl_sysdep_start_cleanup (void)
+{
+}
+
+void
+_dl_show_auxv (void)
+{
+ char buf[64];
+ ElfW(auxv_t) *av;
+
+ /* Terminate string. */
+ buf[63] = '\0';
+
+ /* The following code assumes that the AT_* values are encoded
+ starting from 0 with AT_NULL, 1 for AT_IGNORE, and all other values
+ close by (otherwise the array will be too large). In case we have
+ to support a platform where these requirements are not fulfilled
+ some alternative implementation has to be used. */
+ for (av = GLRO(dl_auxv); av->a_type != AT_NULL; ++av)
+ {
+ static const struct
+ {
+ const char label[22];
+ enum { unknown = 0, dec, hex, str, ignore } form : 8;
+ } auxvars[] =
+ {
+ [AT_EXECFD - 2] = { "EXECFD: ", dec },
+ [AT_EXECFN - 2] = { "EXECFN: ", str },
+ [AT_PHDR - 2] = { "PHDR: 0x", hex },
+ [AT_PHENT - 2] = { "PHENT: ", dec },
+ [AT_PHNUM - 2] = { "PHNUM: ", dec },
+ [AT_PAGESZ - 2] = { "PAGESZ: ", dec },
+ [AT_BASE - 2] = { "BASE: 0x", hex },
+ [AT_FLAGS - 2] = { "FLAGS: 0x", hex },
+ [AT_ENTRY - 2] = { "ENTRY: 0x", hex },
+ [AT_NOTELF - 2] = { "NOTELF: ", hex },
+ [AT_UID - 2] = { "UID: ", dec },
+ [AT_EUID - 2] = { "EUID: ", dec },
+ [AT_GID - 2] = { "GID: ", dec },
+ [AT_EGID - 2] = { "EGID: ", dec },
+ [AT_PLATFORM - 2] = { "PLATFORM: ", str },
+ [AT_HWCAP - 2] = { "HWCAP: ", hex },
+ [AT_CLKTCK - 2] = { "CLKTCK: ", dec },
+ [AT_FPUCW - 2] = { "FPUCW: ", hex },
+ [AT_DCACHEBSIZE - 2] = { "DCACHEBSIZE: 0x", hex },
+ [AT_ICACHEBSIZE - 2] = { "ICACHEBSIZE: 0x", hex },
+ [AT_UCACHEBSIZE - 2] = { "UCACHEBSIZE: 0x", hex },
+ [AT_IGNOREPPC - 2] = { "IGNOREPPC", ignore },
+ [AT_SECURE - 2] = { "SECURE: ", dec },
+ [AT_BASE_PLATFORM - 2] = { "BASE_PLATFORM: ", str },
+ [AT_SYSINFO - 2] = { "SYSINFO: 0x", hex },
+ [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex },
+ [AT_RANDOM - 2] = { "RANDOM: 0x", hex },
+ [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex },
+ [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec },
+ [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec },
+ [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex },
+ [AT_L1D_CACHESIZE - 2] = { "L1D_CACHESIZE: ", dec },
+ [AT_L1D_CACHEGEOMETRY - 2] = { "L1D_CACHEGEOMETRY: 0x", hex },
+ [AT_L2_CACHESIZE - 2] = { "L2_CACHESIZE: ", dec },
+ [AT_L2_CACHEGEOMETRY - 2] = { "L2_CACHEGEOMETRY: 0x", hex },
+ [AT_L3_CACHESIZE - 2] = { "L3_CACHESIZE: ", dec },
+ [AT_L3_CACHEGEOMETRY - 2] = { "L3_CACHEGEOMETRY: 0x", hex },
+ };
+ unsigned int idx = (unsigned int) (av->a_type - 2);
+
+ if ((unsigned int) av->a_type < 2u
+ || (idx < sizeof (auxvars) / sizeof (auxvars[0])
+ && auxvars[idx].form == ignore))
+ continue;
+
+ assert (AT_NULL == 0);
+ assert (AT_IGNORE == 1);
+
+ /* Some entries are handled in a special way per platform. */
+ if (_dl_procinfo (av->a_type, av->a_un.a_val) == 0)
+ continue;
+
+ if (idx < sizeof (auxvars) / sizeof (auxvars[0])
+ && auxvars[idx].form != unknown)
+ {
+ const char *val = (char *) av->a_un.a_val;
+
+ if (__builtin_expect (auxvars[idx].form, dec) == dec)
+ val = _itoa ((unsigned long int) av->a_un.a_val,
+ buf + sizeof buf - 1, 10, 0);
+ else if (__builtin_expect (auxvars[idx].form, hex) == hex)
+ val = _itoa ((unsigned long int) av->a_un.a_val,
+ buf + sizeof buf - 1, 16, 0);
+
+ _dl_printf ("AT_%s%s\n", auxvars[idx].label, val);
+
+ continue;
+ }
+
+ /* Unknown value: print a generic line. */
+ char buf2[17];
+ buf2[sizeof (buf2) - 1] = '\0';
+ const char *val2 = _itoa ((unsigned long int) av->a_un.a_val,
+ buf2 + sizeof buf2 - 1, 16, 0);
+ const char *val = _itoa ((unsigned long int) av->a_type,
+ buf + sizeof buf - 1, 16, 0);
+ _dl_printf ("AT_??? (0x%s): 0x%s\n", val, val2);
+ }
+}
+
+#endif /* SHARED */
+
int
attribute_hidden
_dl_discover_osversion (void)
{
-#if defined NEED_DL_SYSINFO_DSO && defined SHARED
+#ifdef SHARED
if (GLRO(dl_sysinfo_map) != NULL)
{
/* If the kernel-supplied DSO contains a note indicating the kernel's
}
}
}
-#endif
+#endif /* SHARED */
char bufmem[64];
char *buf = bufmem;
if ((flag == 0 || ((flag & ~AT_EACCESS) == 0 && ! __libc_enable_secure)))
return INLINE_SYSCALL (faccessat, 3, fd, file, mode);
- struct stat64 stats;
- if (__fstatat64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW))
+ struct __stat64_t64 stats;
+ if (__fstatat64_time64 (fd, file, &stats, flag & AT_SYMLINK_NOFOLLOW))
return -1;
mode &= (X_OK | W_OK | R_OK); /* Clear any bogus bits. */
/* Use fstatat because fstat does not work on O_PATH descriptors
before Linux 3.6. */
- struct stat64 st;
- if (__fstatat64 (pathfd, "", &st, AT_EMPTY_PATH) != 0)
+ struct __stat64_t64 st;
+ if (__fstatat64_time64 (pathfd, "", &st, AT_EMPTY_PATH) != 0)
{
__close_nocancel (pathfd);
return -1;
}
# Skip pseudo-system calls which describe ranges.
-/^#define __NR_(syscalls|arch_specific_syscall|(OABI_)?SYSCALL_BASE) / {
+/^#define __NR_(syscalls|arch_specific_syscall|(OABI_)?SYSCALL_BASE|SYSCALL_MASK) / {
next;
}
/^#define __NR_(|64_|[NO]32_)Linux(_syscalls)? / {
<https://www.gnu.org/licenses/>. */
#include <array_length.h>
+#include <assert.h>
+#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <ldsodefs.h>
#include <sys/sysinfo.h>
#include <sysdep.h>
-/* Compute the population count of the entire array. */
-static int
-__get_nprocs_count (const unsigned long int *array, size_t length)
+int
+__get_nprocs_sched (void)
{
- int count = 0;
- for (size_t i = 0; i < length; ++i)
- if (__builtin_add_overflow (count, __builtin_popcountl (array[i]),
- &count))
- return INT_MAX;
- return count;
-}
+ enum
+ {
+ max_num_cpus = 32768,
+ cpu_bits_size = CPU_ALLOC_SIZE (32768)
+ };
-/* __get_nprocs with a large buffer. */
-static int
-__get_nprocs_large (void)
-{
- /* This code cannot use scratch_buffer because it is used during
- malloc initialization. */
- size_t pagesize = GLRO (dl_pagesize);
- unsigned long int *page = __mmap (0, pagesize, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (page == MAP_FAILED)
- return 2;
- int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, pagesize, page);
- int count;
+ /* This cannot use malloc because it is used on malloc initialization. */
+ __cpu_mask cpu_bits[cpu_bits_size / sizeof (__cpu_mask)];
+ int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0, cpu_bits_size,
+ cpu_bits);
if (r > 0)
- count = __get_nprocs_count (page, pagesize / sizeof (unsigned long int));
+ return CPU_COUNT_S (r, (cpu_set_t*) cpu_bits);
else if (r == -EINVAL)
- /* One page is still not enough to store the bits. A more-or-less
- arbitrary value. This assumes t hat such large systems never
- happen in practice. */
- count = GLRO (dl_pagesize) * CHAR_BIT;
- else
- count = 2;
- __munmap (page, GLRO (dl_pagesize));
- return count;
+ /* The input buffer is still not enough to store the number of cpus. This
+ is an arbitrary values assuming such systems should be rare and there
+ is no offline cpus. */
+ return max_num_cpus;
+ /* Some other error. */
+ return 0;
}
-int
-__get_nprocs (void)
+static char *
+next_line (int fd, char *const buffer, char **cp, char **re,
+ char *const buffer_end)
{
- /* Fast path for most systems. The kernel expects a buffer size
- that is a multiple of 8. */
- unsigned long int small_buffer[1024 / CHAR_BIT / sizeof (unsigned long int)];
- int r = INTERNAL_SYSCALL_CALL (sched_getaffinity, 0,
- sizeof (small_buffer), small_buffer);
- if (r > 0)
- return __get_nprocs_count (small_buffer, r / sizeof (unsigned long int));
- else if (r == -EINVAL)
- /* The kernel requests a larger buffer to store the data. */
- return __get_nprocs_large ();
- else
- /* Some other error. 2 is conservative (not a uniprocessor
- system, so atomics are needed). */
- return 2;
+ char *res = *cp;
+ char *nl = memchr (*cp, '\n', *re - *cp);
+ if (nl == NULL)
+ {
+ if (*cp != buffer)
+ {
+ if (*re == buffer_end)
+ {
+ memmove (buffer, *cp, *re - *cp);
+ *re = buffer + (*re - *cp);
+ *cp = buffer;
+
+ ssize_t n = __read_nocancel (fd, *re, buffer_end - *re);
+ if (n < 0)
+ return NULL;
+
+ *re += n;
+
+ nl = memchr (*cp, '\n', *re - *cp);
+ while (nl == NULL && *re == buffer_end)
+ {
+ /* Truncate too long lines. */
+ *re = buffer + 3 * (buffer_end - buffer) / 4;
+ n = __read_nocancel (fd, *re, buffer_end - *re);
+ if (n < 0)
+ return NULL;
+
+ nl = memchr (*re, '\n', n);
+ **re = '\n';
+ *re += n;
+ }
+ }
+ else
+ nl = memchr (*cp, '\n', *re - *cp);
+
+ res = *cp;
+ }
+
+ if (nl == NULL)
+ nl = *re - 1;
+ }
+
+ *cp = nl + 1;
+ assert (*cp <= *re);
+
+ return res == *re ? NULL : res;
}
-libc_hidden_def (__get_nprocs)
-weak_alias (__get_nprocs, get_nprocs)
+static int
+get_nproc_stat (void)
+{
+ enum { buffer_size = 1024 };
+ char buffer[buffer_size];
+ char *buffer_end = buffer + buffer_size;
+ char *cp = buffer_end;
+ char *re = buffer_end;
+ int result = 0;
-/* On some architectures it is possible to distinguish between configured
- and active cpus. */
-int
-__get_nprocs_conf (void)
+ const int flags = O_RDONLY | O_CLOEXEC;
+ int fd = __open_nocancel ("/proc/stat", flags);
+ if (fd != -1)
+ {
+ char *l;
+ while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
+ /* The current format of /proc/stat has all the cpu* entries
+ at the front. We assume here that stays this way. */
+ if (strncmp (l, "cpu", 3) != 0)
+ break;
+ else if (isdigit (l[3]))
+ ++result;
+
+ __close_nocancel_nostatus (fd);
+ }
+
+ return result;
+}
+
+static int
+get_nprocs_cpu_online (void)
{
- /* Try to use the sysfs filesystem. It has actual information about
- online processors. */
+ enum { buffer_size = 1024 };
+ char buffer[buffer_size];
+ char *buffer_end = buffer + buffer_size;
+ char *cp = buffer_end;
+ char *re = buffer_end;
+
+ const int flags = O_RDONLY | O_CLOEXEC;
+ /* This file contains comma-separated ranges. */
+ int fd = __open_nocancel ("/sys/devices/system/cpu/online", flags);
+ char *l;
+ int result = 0;
+ if (fd != -1)
+ {
+ l = next_line (fd, buffer, &cp, &re, buffer_end);
+ if (l != NULL)
+ do
+ {
+ char *endp;
+ unsigned long int n = strtoul (l, &endp, 10);
+ if (l == endp)
+ {
+ result = 0;
+ break;
+ }
+
+ unsigned long int m = n;
+ if (*endp == '-')
+ {
+ l = endp + 1;
+ m = strtoul (l, &endp, 10);
+ if (l == endp)
+ {
+ result = 0;
+ break;
+ }
+ }
+
+ if (m >= n)
+ result += m - n + 1;
+
+ l = endp;
+ if (l < re && *l == ',')
+ ++l;
+ }
+ while (l < re && *l != '\n');
+
+ __close_nocancel_nostatus (fd);
+ }
+
+ return result;
+}
+
+static int
+get_nprocs_cpu (void)
+{
+ int count = 0;
DIR *dir = __opendir ("/sys/devices/system/cpu");
if (dir != NULL)
{
- int count = 0;
struct dirent64 *d;
while ((d = __readdir64 (dir)) != NULL)
__closedir (dir);
- return count;
}
+ return count;
+}
+
+static int
+get_nprocs_fallback (void)
+{
+ int result;
+
+ /* Try /proc/stat first. */
+ result = get_nproc_stat ();
+ if (result != 0)
+ return result;
+
+ /* Try sched_getaffinity. */
+ result = __get_nprocs_sched ();
+ if (result != 0)
+ return result;
+
+ /* We failed to obtain an accurate number. Be conservative: return
+ the smallest number meaning that this is not a uniprocessor system,
+ so atomics are needed. */
+ return 2;
+}
+
+int
+__get_nprocs (void)
+{
+ /* Try /sys/devices/system/cpu/online first. */
+ int result = get_nprocs_cpu_online ();
+ if (result != 0)
+ return result;
+
+ /* Fall back to /proc/stat and sched_getaffinity. */
+ return get_nprocs_fallback ();
+}
+libc_hidden_def (__get_nprocs)
+weak_alias (__get_nprocs, get_nprocs)
+
+/* On some architectures it is possible to distinguish between configured
+ and active cpus. */
+int
+__get_nprocs_conf (void)
+{
+ /* Try /sys/devices/system/cpu/ first. */
+ int result = get_nprocs_cpu ();
+ if (result != 0)
+ return result;
- return 1;
+ /* Fall back to /proc/stat and sched_getaffinity. */
+ return get_nprocs_fallback ();
}
libc_hidden_def (__get_nprocs_conf)
weak_alias (__get_nprocs_conf, get_nprocs_conf)
(unused|reserved)[0-9]+
# Pseudo-system call which describes a range.
- |(syscalls|arch_specific_syscall|(OABI_)?SYSCALL_BASE)
+ |(syscalls|arch_specific_syscall|(OABI_)?SYSCALL_BASE|SYSCALL_MASK)
|(|64_|[NO]32_)Linux(_syscalls)?
)""", re.X)
# define GLOB_LSTAT gl_lstat
# define GLOB_STAT64 __stat64_time64
# define GLOB_LSTAT64 __lstat64_time64
+# define GLOB_FSTATAT64 __fstatat64_time64
# define COMPILE_GLOB64 1
#define __NR_ftruncate64 200
#define __NR_futex 210
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 279
#define __NR_get_mempolicy 261
#define __NR_get_robust_list 290
#define __NR_preadv2 347
#define __NR_prlimit64 321
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 330
#define __NR_process_vm_writev 331
#define __NR_pselect6 273
#define __NR_pwritev 316
#define __NR_pwritev2 348
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 207
#define __NR_readlink 85
#define __NR_sendmsg 183
#define __NR_sendto 82
#define __NR_set_mempolicy 262
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 289
#define __NR_set_tid_address 237
#define __NR_setdomainname 121
#include "ucontext_i.h"
- /* Trampoline function. Non-standard calling ABI. */
+ /* Trampoline function. Non-standard calling ABI. */
/* Can not use ENTRY(__getcontext_ret) here. */
.type __getcontext_ret, @function
.hidden __getcontext_ret
__getcontext_ret:
.proc
.callinfo FRAME=0,NO_CALLS
- /* r26-r23 contain original r3-r6, but because setcontext
- does not reload r3-r6 (it's using them as temporaries)
- we must save them elsewhere and swap them back in. */
- copy %r23, %r3
- copy %r24, %r4
- copy %r25, %r5
- copy %r26, %r6
- /* r20 contains original return pointer. */
- bv 0(%r20)
+ /* Because setcontext does not reload r3-r6 (it's using them
+ as temporaries), we must load them ourself. */
+ ldw oR3(%r26), %r3
+ ldw oR4(%r26), %r4
+ ldw oR5(%r26), %r5
+ ldw oR6(%r26), %r6
+
+ /* Also reload registers clobbered by $$dyncall. */
+ ldw oR21(%r26), %r21
+ ldw oR22(%r26), %r22
+ ldw oR31(%r26), %r31
+
+ /* oR0 contains original return pointer. */
+ ldw oR0(%r26), %rp
+ bv 0(%rp)
copy %r0, %ret0
.procend
.size __getcontext_ret, .-__getcontext_ret
stw %r17, oR17(%r26)
stw %r18, oR18(%r26)
stw %r19, oR19(%r26)
- /* stw %r20, oR20(%r26) - used for trampoline. */
+ stw %r20, oR20(%r26)
stw %r21, oR21(%r26)
stw %r22, oR22(%r26)
- /* stw %r23, oR23(%r26) - used for trampoline. */
- /* stw %r24, oR24(%r26) - used for trampoline. */
- /* stw %r25, oR25(%r26) - used for trampoline. */
- /* stw %r26, oR26(%r26) - used for trampoline. */
+ stw %r23, oR23(%r26)
+ stw %r24, oR24(%r26)
+ stw %r25, oR25(%r26)
+ stw %r26, oR26(%r26)
stw %r27, oR27(%r26)
stw %r28, oR28(%r26)
stw %r29, oR29(%r26)
stw %r0, oIASQ1(%r26)
stw %r0, oIAOQ0(%r26)
stw %r0, oIAOQ1(%r26)
- stw %r0, oSAR(%r26) /* used as flag in swapcontext(). */
+
+ /* Save SAR register. */
+ mfctl %sar, %r1
+ stw %r1, oSAR(%r26) /* MSB used as flag in swapcontext(). */
/* Store floating-point regs. */
stw %r19, -32(%sp)
.cfi_offset 19, 32
#endif
+ stw %ret1, -60(%sp)
+ .cfi_offset 29, 4
/* Set up the trampoline registers.
- r20, r23, r24, r25, r26 and r2 are clobbered
- by call to getcontext() anyway. Reuse them. */
- stw %r2, oR20(%r26)
- stw %r3, oR23(%r26)
- stw %r4, oR24(%r26)
- stw %r5, oR25(%r26)
- stw %r6, oR26(%r26)
+ Use oR0 context slot to save return value. */
+ stw %r2, oR0(%r26)
#ifdef PIC
addil LT%__getcontext_ret, %r19
ldw RT%__getcontext_ret(%r1), %r1
#ifdef PIC
ldw -32(%sp), %r19
#endif
+ ldw -60(%sp), %ret1
bv %r0(%r2)
ldwm -64(%sp), %r4
END(__getcontext)
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x14
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
stw %r19, -32(%sp)
.cfi_offset 19, 32
#endif
+ stw %ret1, -60(%sp)
+ .cfi_offset 29, 4
/* Save ucp. */
copy %r26, %r3
ldw oR18(%r3), %r18
ldw oR19(%r3), %r19
ldw oR20(%r3), %r20
- ldw oR21(%r3), %r21
+ ldw oR21(%r3), %r21 /* maybe clobbered by dyncall */
/* ldw oR22(%r3), %r22 - dyncall arg. */
ldw oR23(%r3), %r23
ldw oR24(%r3), %r24
ldw oR30(%r3), %sp
/* ldw oR31(%r3), %r31 - dyncall scratch register */
+ /* Restore SAR register. */
+ ldw oSAR(%r3), %r22
+ mtsar %r22
+
/* Restore floating-point registers. */
ldo oFPREGS31(%r3), %r22
fldds 0(%r22), %fr31
#ifdef PIC
ldw -32(%r30), %r19
#endif
+ ldw -60(%r30), %ret1
bv %r0(%r2)
ldwm -64(%r30), %r3
L(pseudo_end):
--- /dev/null
+/* Swap to new context.
+ Copyright (C) 2008-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "ucontext_i.h"
+
+ .text
+ENTRY(__swapcontext)
+
+ /* Copy rp to ret0 (r28). */
+ copy %rp,%ret0
+
+ /* Create a frame. */
+ ldo 64(%sp),%sp
+ .cfi_def_cfa_offset -64
+
+ /* Save the current machine context to oucp. */
+ bl __getcontext,%rp
+
+ /* Copy oucp to register ret1 (r29). __getcontext saves and
+ restores it on a normal return. It is restored from oR29
+ on reactivation. */
+ copy %r26,%ret1
+
+ /* Pop frame. */
+ ldo -64(%sp),%sp
+ .cfi_def_cfa_offset 0
+
+ /* Load return pointer from oR28. */
+ ldw oR28(%ret1),%rp
+
+ /* Return if error. */
+ or,= %r0,%ret0,%r0
+ bv,n %r0(%rp)
+
+ /* Load sc_sar flag. */
+ ldb oSAR(%ret1),%r20
+
+ /* Return if oucp context has been reactivated. */
+ or,= %r0,%r20,%r0
+ bv,n %r0(%rp)
+
+ /* Mark sc_sar flag. */
+ ldi 1,%r20
+ stb %r20,oSAR(%ret1)
+
+ /* Activate the machine context in ucp. */
+ bl __setcontext,%rp
+ ldw oR25(%ret1),%r26
+
+ /* Load return pointer. */
+ ldw oR28(%ret1),%rp
+ bv,n %r0(%rp)
+
+END(__swapcontext)
+
+weak_alias (__swapcontext, swapcontext)
+++ /dev/null
-/* Swap to new context.
- Copyright (C) 2008-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Helge Deller <deller@gmx.de>, 2008.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <ucontext.h>
-
-extern int __getcontext (ucontext_t *ucp);
-extern int __setcontext (const ucontext_t *ucp);
-
-int
-__swapcontext (ucontext_t *oucp, const ucontext_t *ucp)
-{
- /* Save the current machine context to oucp. */
- __getcontext (oucp);
-
- /* mark sc_sar flag to skip the setcontext call on reactivation. */
- if (oucp->uc_mcontext.sc_sar == 0) {
- oucp->uc_mcontext.sc_sar++;
-
- /* Restore the machine context in ucp. */
- __setcontext (ucp);
- }
-
- return 0;
-}
-
-weak_alias (__swapcontext, swapcontext)
endif
ifeq ($(subdir),io)
-sysdep_routines += libc-do-syscall
+sysdep_routines += libc-do-syscall libc-do-syscall-int80
endif
ifeq ($(subdir),stdlib)
#define __NR_ftruncate64 194
#define __NR_futex 240
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 299
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 275
#define __NR_mbind 274
#define __NR_membarrier 375
#define __NR_memfd_create 356
+#define __NR_memfd_secret 447
#define __NR_migrate_pages 294
#define __NR_mincore 218
#define __NR_mkdir 39
#define __NR_preadv2 378
#define __NR_prlimit64 340
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 347
#define __NR_process_vm_writev 348
#define __NR_prof 44
#define __NR_pwritev2 379
#define __NR_query_module 167
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 225
#define __NR_readdir 89
#define __NR_sendmsg 370
#define __NR_sendto 369
#define __NR_set_mempolicy 276
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 311
#define __NR_set_thread_area 243
#define __NR_set_tid_address 258
# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
# Local configure fragment for sysdeps/unix/sysv/linux/i386.
-# Check if CFLAGS allows compiler to use ebp register in inline assembly.
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if compiler flags allows ebp in inline assembly" >&5
-$as_echo_n "checking if compiler flags allows ebp in inline assembly... " >&6; }
-if ${libc_cv_can_use_register_asm_ebp+:} false; then :
- $as_echo_n "(cached) " >&6
-else
-
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
- void foo (int i)
- {
- register int reg asm ("ebp") = i;
- asm ("# %0" : : "r" (reg));
- }
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- libc_cv_can_use_register_asm_ebp=yes
-else
- libc_cv_can_use_register_asm_ebp=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_can_use_register_asm_ebp" >&5
-$as_echo "$libc_cv_can_use_register_asm_ebp" >&6; }
-if test $libc_cv_can_use_register_asm_ebp = yes; then
- $as_echo "#define CAN_USE_REGISTER_ASM_EBP 1" >>confdefs.h
-
-fi
-
libc_cv_gcc_unwind_find_fde=yes
ldd_rewrite_script=sysdeps/unix/sysv/linux/ldd-rewrite.sed
GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
# Local configure fragment for sysdeps/unix/sysv/linux/i386.
-# Check if CFLAGS allows compiler to use ebp register in inline assembly.
-AC_CACHE_CHECK([if compiler flags allows ebp in inline assembly],
- libc_cv_can_use_register_asm_ebp, [
-AC_COMPILE_IFELSE(
- [AC_LANG_PROGRAM([
- void foo (int i)
- {
- register int reg asm ("ebp") = i;
- asm ("# %0" : : "r" (reg));
- }])],
- [libc_cv_can_use_register_asm_ebp=yes],
- [libc_cv_can_use_register_asm_ebp=no])
-])
-if test $libc_cv_can_use_register_asm_ebp = yes; then
- AC_DEFINE(CAN_USE_REGISTER_ASM_EBP)
-fi
-
libc_cv_gcc_unwind_find_fde=yes
ldd_rewrite_script=sysdeps/unix/sysv/linux/ldd-rewrite.sed
GLIBC_2.1 _dl_mcount F
GLIBC_2.3 ___tls_get_addr F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
--- /dev/null
+/* Out-of-line syscall stub for six-argument syscalls from C. For static PIE.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef SHARED
+# define I386_USE_SYSENTER 0
+# include <sysdep.h>
+
+# define __libc_do_syscall __libc_do_syscall_int80
+# include "libc-do-syscall.S"
+#endif
#include <sysdep.h>
-#ifndef OPTIMIZE_FOR_GCC_5
-
/* %eax, %ecx, %edx and %esi contain the values expected by the kernel.
%edi points to a structure with the values of %ebx, %edi and %ebp. */
cfi_restore (ebx)
ret
END (__libc_do_syscall)
-#endif
/* Linux/i386 definitions of functions used by static libc main startup.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ Copyright (C) 2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#if BUILD_PIE_DEFAULT
-/* Can't use "call *%gs:SYSINFO_OFFSET" during statup in static PIE. */
-# define I386_USE_SYSENTER 0
+/* Can't use "call *%gs:SYSINFO_OFFSET" during startup. */
+#define I386_USE_SYSENTER 0
-# include <sysdep.h>
-# include <abort-instr.h>
-
-__attribute__ ((__noreturn__))
-static inline void
-_startup_fatal (const char *message __attribute__ ((unused)))
-{
- /* This is only called very early during startup in static PIE.
- FIXME: How can it be improved? */
- ABORT_INSTRUCTION;
- __builtin_unreachable ();
-}
-
-static inline uid_t
-startup_getuid (void)
-{
- return (uid_t) INTERNAL_SYSCALL_CALL (getuid32);
-}
-
-static inline uid_t
-startup_geteuid (void)
-{
- return (uid_t) INTERNAL_SYSCALL_CALL (geteuid32);
-}
-
-static inline gid_t
-startup_getgid (void)
-{
- return (gid_t) INTERNAL_SYSCALL_CALL (getgid32);
-}
-
-static inline gid_t
-startup_getegid (void)
-{
- return (gid_t) INTERNAL_SYSCALL_CALL (getegid32);
-}
-#else
-# include_next <startup.h>
-#endif
+#include_next <startup.h>
# endif
#endif
-/* Since GCC 5 and above can properly spill %ebx with PIC when needed,
- we can inline syscalls with 6 arguments if GCC 5 or above is used
- to compile glibc. Disable GCC 5 optimization when compiling for
- profiling or when -fno-omit-frame-pointer is used since asm ("ebp")
- can't be used to put the 6th argument in %ebp for syscall. */
-#if !defined PROF && CAN_USE_REGISTER_ASM_EBP
-# define OPTIMIZE_FOR_GCC_5
+#if !I386_USE_SYSENTER && IS_IN (libc) && !defined SHARED
+/* Inside static libc, we have two versions. For compilation units
+ with !I386_USE_SYSENTER, the vDSO entry mechanism cannot be
+ used. */
+# define I386_DO_SYSCALL_STRING "__libc_do_syscall_int80"
+#else
+# define I386_DO_SYSCALL_STRING "__libc_do_syscall"
#endif
#ifdef __ASSEMBLER__
extern int __syscall_error (int)
attribute_hidden __attribute__ ((__regparm__ (1)));
-#ifndef OPTIMIZE_FOR_GCC_5
-/* We need some help from the assembler to generate optimal code. We
- define some macros here which later will be used. */
-asm (".L__X'%ebx = 1\n\t"
- ".L__X'%ecx = 2\n\t"
- ".L__X'%edx = 2\n\t"
- ".L__X'%eax = 3\n\t"
- ".L__X'%esi = 3\n\t"
- ".L__X'%edi = 3\n\t"
- ".L__X'%ebp = 3\n\t"
- ".L__X'%esp = 3\n\t"
- ".macro bpushl name reg\n\t"
- ".if 1 - \\name\n\t"
- ".if 2 - \\name\n\t"
- "error\n\t"
- ".else\n\t"
- "xchgl \\reg, %ebx\n\t"
- ".endif\n\t"
- ".endif\n\t"
- ".endm\n\t"
- ".macro bpopl name reg\n\t"
- ".if 1 - \\name\n\t"
- ".if 2 - \\name\n\t"
- "error\n\t"
- ".else\n\t"
- "xchgl \\reg, %ebx\n\t"
- ".endif\n\t"
- ".endif\n\t"
- ".endm\n\t");
-
/* Six-argument syscalls use an out-of-line helper, because an inline
asm using all registers apart from %esp cannot work reliably and
the assembler does not support describing an asm that saves and
{
int ebx, edi, ebp;
};
-#endif
# define VDSO_NAME "LINUX_2.6"
# define VDSO_HASH 61765110
/* Each object using 6-argument inline syscalls must include a
definition of __libc_do_syscall. */
-#ifdef OPTIMIZE_FOR_GCC_5
-# define INTERNAL_SYSCALL_MAIN_6(name, args...) \
- INTERNAL_SYSCALL_MAIN_INLINE(name, 6, args)
-# define INTERNAL_SYSCALL_MAIN_NCS_6(name, args...) \
- INTERNAL_SYSCALL_MAIN_NCS(name, 6, args)
-#else /* GCC 5 */
-# define INTERNAL_SYSCALL_MAIN_6(name, arg1, arg2, arg3, \
- arg4, arg5, arg6) \
+#define INTERNAL_SYSCALL_MAIN_6(name, arg1, arg2, arg3, \
+ arg4, arg5, arg6) \
struct libc_do_syscall_args _xv = \
{ \
(int) (arg1), \
}; \
asm volatile ( \
"movl %1, %%eax\n\t" \
- "call __libc_do_syscall" \
+ "call " I386_DO_SYSCALL_STRING \
: "=a" (resultvar) \
: "i" (__NR_##name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \
: "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS_6(name, arg1, arg2, arg3, \
- arg4, arg5, arg6) \
+#define INTERNAL_SYSCALL_MAIN_NCS_6(name, arg1, arg2, arg3, \
+ arg4, arg5, arg6) \
struct libc_do_syscall_args _xv = \
{ \
(int) (arg1), \
}; \
asm volatile ( \
"movl %1, %%eax\n\t" \
- "call __libc_do_syscall" \
+ "call " I386_DO_SYSCALL_STRING \
: "=a" (resultvar) \
: "a" (name), "c" (arg2), "d" (arg3), "S" (arg4), "D" (&_xv) \
: "memory", "cc")
-#endif /* GCC 5 */
#define INTERNAL_SYSCALL(name, nr, args...) \
({ \
(int) resultvar; })
#if I386_USE_SYSENTER
-# ifdef OPTIMIZE_FOR_GCC_5
-# ifdef PIC
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
+# ifdef PIC
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"call *%%gs:%P2" \
: "=a" (resultvar) \
: "a" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \
ASMARGS_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
+# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"call *%%gs:%P2" \
: "=a" (resultvar) \
: "a" (name), "i" (offsetof (tcbhead_t, sysinfo)) \
ASMARGS_##nr(args) : "memory", "cc")
-# else
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
+# else /* I386_USE_SYSENTER && !PIC */
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"call *_dl_sysinfo" \
: "=a" (resultvar) \
: "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
+# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"call *_dl_sysinfo" \
: "=a" (resultvar) \
: "a" (name) ASMARGS_##nr(args) : "memory", "cc")
-# endif
-# else /* GCC 5 */
-# ifdef PIC
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "movl %1, %%eax\n\t" \
- "call *%%gs:%P2\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "i" (__NR_##name), "i" (offsetof (tcbhead_t, sysinfo)) \
- ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "call *%%gs:%P2\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "0" (name), "i" (offsetof (tcbhead_t, sysinfo)) \
- ASMFMT_##nr(args) : "memory", "cc")
-# else
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "movl %1, %%eax\n\t" \
- "call *_dl_sysinfo\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "call *_dl_sysinfo\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "0" (name) ASMFMT_##nr(args) : "memory", "cc")
-# endif
-# endif /* GCC 5 */
-#else
-# ifdef OPTIMIZE_FOR_GCC_5
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
+# endif /* I386_USE_SYSENTER && !PIC */
+#else /* !I386_USE_SYSENTER */
+# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"int $0x80" \
: "=a" (resultvar) \
: "a" (__NR_##name) ASMARGS_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
+# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
LOADREGS_##nr(args) \
asm volatile ( \
"int $0x80" \
: "=a" (resultvar) \
: "a" (name) ASMARGS_##nr(args) : "memory", "cc")
-# else /* GCC 5 */
-# define INTERNAL_SYSCALL_MAIN_INLINE(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "movl %1, %%eax\n\t" \
- "int $0x80\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc")
-# define INTERNAL_SYSCALL_MAIN_NCS(name, nr, args...) \
- EXTRAVAR_##nr \
- asm volatile ( \
- LOADARGS_##nr \
- "int $0x80\n\t" \
- RESTOREARGS_##nr \
- : "=a" (resultvar) \
- : "0" (name) ASMFMT_##nr(args) : "memory", "cc")
-# endif /* GCC 5 */
-#endif
-
-#define LOADARGS_0
-#ifdef __PIC__
-# if I386_USE_SYSENTER && defined PIC
-# define LOADARGS_1 \
- "bpushl .L__X'%k3, %k3\n\t"
-# define LOADARGS_5 \
- "movl %%ebx, %4\n\t" \
- "movl %3, %%ebx\n\t"
-# else
-# define LOADARGS_1 \
- "bpushl .L__X'%k2, %k2\n\t"
-# define LOADARGS_5 \
- "movl %%ebx, %3\n\t" \
- "movl %2, %%ebx\n\t"
-# endif
-# define LOADARGS_2 LOADARGS_1
-# define LOADARGS_3 \
- "xchgl %%ebx, %%edi\n\t"
-# define LOADARGS_4 LOADARGS_3
-#else
-# define LOADARGS_1
-# define LOADARGS_2
-# define LOADARGS_3
-# define LOADARGS_4
-# define LOADARGS_5
-#endif
-
-#define RESTOREARGS_0
-#ifdef __PIC__
-# if I386_USE_SYSENTER && defined PIC
-# define RESTOREARGS_1 \
- "bpopl .L__X'%k3, %k3\n\t"
-# define RESTOREARGS_5 \
- "movl %4, %%ebx"
-# else
-# define RESTOREARGS_1 \
- "bpopl .L__X'%k2, %k2\n\t"
-# define RESTOREARGS_5 \
- "movl %3, %%ebx"
-# endif
-# define RESTOREARGS_2 RESTOREARGS_1
-# define RESTOREARGS_3 \
- "xchgl %%edi, %%ebx\n\t"
-# define RESTOREARGS_4 RESTOREARGS_3
-#else
-# define RESTOREARGS_1
-# define RESTOREARGS_2
-# define RESTOREARGS_3
-# define RESTOREARGS_4
-# define RESTOREARGS_5
-#endif
+#endif /* !I386_USE_SYSENTER */
-#ifdef OPTIMIZE_FOR_GCC_5
-# define LOADREGS_0()
-# define ASMARGS_0()
-# define LOADREGS_1(arg1) \
+#define LOADREGS_0()
+#define ASMARGS_0()
+#define LOADREGS_1(arg1) \
LOADREGS_0 ()
-# define ASMARGS_1(arg1) \
+#define ASMARGS_1(arg1) \
ASMARGS_0 (), "b" ((unsigned int) (arg1))
-# define LOADREGS_2(arg1, arg2) \
+#define LOADREGS_2(arg1, arg2) \
LOADREGS_1 (arg1)
-# define ASMARGS_2(arg1, arg2) \
+#define ASMARGS_2(arg1, arg2) \
ASMARGS_1 (arg1), "c" ((unsigned int) (arg2))
-# define LOADREGS_3(arg1, arg2, arg3) \
+#define LOADREGS_3(arg1, arg2, arg3) \
LOADREGS_2 (arg1, arg2)
-# define ASMARGS_3(arg1, arg2, arg3) \
+#define ASMARGS_3(arg1, arg2, arg3) \
ASMARGS_2 (arg1, arg2), "d" ((unsigned int) (arg3))
-# define LOADREGS_4(arg1, arg2, arg3, arg4) \
+#define LOADREGS_4(arg1, arg2, arg3, arg4) \
LOADREGS_3 (arg1, arg2, arg3)
-# define ASMARGS_4(arg1, arg2, arg3, arg4) \
+#define ASMARGS_4(arg1, arg2, arg3, arg4) \
ASMARGS_3 (arg1, arg2, arg3), "S" ((unsigned int) (arg4))
-# define LOADREGS_5(arg1, arg2, arg3, arg4, arg5) \
+#define LOADREGS_5(arg1, arg2, arg3, arg4, arg5) \
LOADREGS_4 (arg1, arg2, arg3, arg4)
-# define ASMARGS_5(arg1, arg2, arg3, arg4, arg5) \
+#define ASMARGS_5(arg1, arg2, arg3, arg4, arg5) \
ASMARGS_4 (arg1, arg2, arg3, arg4), "D" ((unsigned int) (arg5))
-# define LOADREGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
- register unsigned int _a6 asm ("ebp") = (unsigned int) (arg6); \
- LOADREGS_5 (arg1, arg2, arg3, arg4, arg5)
-# define ASMARGS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
- ASMARGS_5 (arg1, arg2, arg3, arg4, arg5), "r" (_a6)
-#endif /* GCC 5 */
#define ASMFMT_0()
#ifdef __PIC__
+ifeq ($(subdir),elf)
+# ia64 does not support PT_GNU_RELRO.
+test-xfail-tst-relro-ldso = yes
+test-xfail-tst-relro-libc = yes
+endif
+
ifeq ($(subdir),misc)
sysdep_headers += sys/rse.h
endif
#define __NR_fsync 1051
#define __NR_ftruncate 1098
#define __NR_futex 1230
+#define __NR_futex_waitv 1473
#define __NR_futimesat 1285
#define __NR_get_mempolicy 1260
#define __NR_get_robust_list 1299
#define __NR_preadv2 1348
#define __NR_prlimit64 1325
#define __NR_process_madvise 1464
+#define __NR_process_mrelease 1472
#define __NR_process_vm_readv 1332
#define __NR_process_vm_writev 1333
#define __NR_pselect6 1294
#define __NR_pwritev 1320
#define __NR_pwritev2 1349
#define __NR_quotactl 1137
+#define __NR_quotactl_fd 1467
#define __NR_read 1026
#define __NR_readahead 1216
#define __NR_readlink 1092
#define __NR_sendmsg 1205
#define __NR_sendto 1199
#define __NR_set_mempolicy 1261
+#define __NR_set_mempolicy_home_node 1474
#define __NR_set_robust_list 1298
#define __NR_set_tid_address 1233
#define __NR_setdomainname 1129
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <dl-sysdep.h>
-/* brk is used by statup before TCB is properly set. */
-#undef USE_DL_SYSINFO
+/* brk is used by startup before TCB is properly set up. */
+#define IA64_USE_NEW_STUB 0
#include <sysdeps/unix/sysv/linux/brk.c>
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x28
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
--- /dev/null
+/* Linux/ia64 definitions of functions used by static libc main startup.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This code is used before the TCB is set up. */
+#define IA64_USE_NEW_STUB 0
+
+#include_next <startup.h>
#undef SYS_ify
#define SYS_ify(syscall_name) __NR_##syscall_name
-#if defined USE_DL_SYSINFO \
- && (IS_IN (libc) \
- || IS_IN (libpthread) || IS_IN (librt))
-# define IA64_USE_NEW_STUB
-#else
-# undef IA64_USE_NEW_STUB
+#ifndef IA64_USE_NEW_STUB
+# if defined USE_DL_SYSINFO && IS_IN (libc)
+# define IA64_USE_NEW_STUB 1
+# else
+# define IA64_USE_NEW_STUB 0
+# endif
+#endif
+#if IA64_USE_NEW_STUB && !USE_DL_SYSINFO
+# error IA64_USE_NEW_STUB needs USE_DL_SYSINFO
#endif
#ifdef __ASSEMBLER__
mov r15=num; \
break __IA64_BREAK_SYSCALL
-#ifdef IA64_USE_NEW_STUB
+#if IA64_USE_NEW_STUB
# ifdef SHARED
# define DO_CALL(num) \
.prologue; \
(non-negative) errno on error or the return value on success.
*/
-#ifdef IA64_USE_NEW_STUB
+#if IA64_USE_NEW_STUB
# define INTERNAL_SYSCALL_NCS(name, nr, args...) \
({ \
#define ASM_OUTARGS_5 ASM_OUTARGS_4, "=r" (_out4)
#define ASM_OUTARGS_6 ASM_OUTARGS_5, "=r" (_out5)
-#ifdef IA64_USE_NEW_STUB
+#if IA64_USE_NEW_STUB
#define ASM_ARGS_0
#define ASM_ARGS_1 ASM_ARGS_0, "4" (_out0)
#define ASM_ARGS_2 ASM_ARGS_1, "5" (_out1)
/* Branch registers. */ \
"b6"
-#ifdef IA64_USE_NEW_STUB
+#if IA64_USE_NEW_STUB
# define ASM_CLOBBERS_6 ASM_CLOBBERS_6_COMMON
#else
# define ASM_CLOBBERS_6 ASM_CLOBBERS_6_COMMON , "b7"
/* Get the real definitions. */
#include_next <ldsodefs.h>
-/* We can assume that the kernel always provides the AT_UID, AT_EUID,
- AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on. */
-#define HAVE_AUX_XID
-
-/* We can assume that the kernel always provides the AT_SECURE value
- in the auxiliary vector from 2.5.74 or so on. */
-#define HAVE_AUX_SECURE
-
-/* Starting with one of the 2.4.0 pre-releases the Linux kernel passes
- up the page size information. */
-#define HAVE_AUX_PAGESIZE
-
#endif /* ldsodefs.h */
#define __NR_ftruncate64 194
#define __NR_futex 235
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 292
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 269
#define __NR_preadv2 377
#define __NR_prlimit64 339
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 345
#define __NR_process_vm_writev 346
#define __NR_pselect6 301
#define __NR_pwritev2 378
#define __NR_query_module 167
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 240
#define __NR_readdir 89
#define __NR_sendmsg 367
#define __NR_sendto 366
#define __NR_set_mempolicy 270
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 304
#define __NR_set_thread_area 334
#define __NR_set_tid_address 253
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __libc_stack_end D 0x4
GLIBC_2.4 __stack_chk_guard D 0x4
GLIBC_2.4 __tls_get_addr F
GLIBC_2.1 __libc_stack_end D 0x4
GLIBC_2.1 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
#define PTR_MANGLE(var) (void) (var)
#define PTR_DEMANGLE(var) (void) (var)
-#if defined NEED_DL_SYSINFO || defined NEED_DL_SYSINFO_DSO
/* M68K needs system-supplied DSO to access TLS helpers
even when statically linked. */
-# define NEED_STATIC_SYSINFO_DSO 1
-#endif
+#define NEED_STATIC_SYSINFO_DSO 1
#define __NR_ftruncate64 194
#define __NR_futex 240
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 299
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 275
#define __NR_preadv2 393
#define __NR_prlimit64 370
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 377
#define __NR_process_vm_writev 378
#define __NR_prof 44
#define __NR_pwritev2 394
#define __NR_query_module 167
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 225
#define __NR_readdir 89
#define __NR_sendmsg 360
#define __NR_sendto 353
#define __NR_set_mempolicy 276
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 311
#define __NR_set_thread_area 243
#define __NR_set_tid_address 258
GLIBC_2.18 __tls_get_addr F
GLIBC_2.18 _dl_mcount F
GLIBC_2.18 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
struct stat
{
+# ifdef __USE_TIME_BITS64
+# include <bits/struct_stat_time64_helper.h>
+# else
__dev_t st_dev;
int st_pad1[3]; /* Reserved for st_dev expansion */
-# ifndef __USE_FILE_OFFSET64
+# ifndef __USE_FILE_OFFSET64
__ino_t st_ino;
-# else
+# else
__ino64_t st_ino;
-# endif
+# endif
__mode_t st_mode;
__nlink_t st_nlink;
__uid_t st_uid;
__gid_t st_gid;
__dev_t st_rdev;
-# if !defined __USE_FILE_OFFSET64
+# if !defined __USE_FILE_OFFSET64
unsigned int st_pad2[2]; /* Reserved for st_rdev expansion */
__off_t st_size;
int st_pad3;
-# else
+# else
unsigned int st_pad2[3]; /* Reserved for st_rdev expansion */
__off64_t st_size;
-# endif
-# ifdef __USE_XOPEN2K8
+# endif
+# ifdef __USE_XOPEN2K8
/* Nanosecond resolution timestamps are stored in a format
equivalent to 'struct timespec'. This is the type used
whenever possible but the Unix namespace rules do not allow the
struct timespec st_atim; /* Time of last access. */
struct timespec st_mtim; /* Time of last modification. */
struct timespec st_ctim; /* Time of last status change. */
-# define st_atime st_atim.tv_sec /* Backward compatibility. */
-# define st_mtime st_mtim.tv_sec
-# define st_ctime st_ctim.tv_sec
-# else
+# define st_atime st_atim.tv_sec /* Backward compatibility. */
+# define st_mtime st_mtim.tv_sec
+# define st_ctime st_ctim.tv_sec
+# else
__time_t st_atime; /* Time of last access. */
unsigned long int st_atimensec; /* Nscecs of last access. */
__time_t st_mtime; /* Time of last modification. */
unsigned long int st_mtimensec; /* Nsecs of last modification. */
__time_t st_ctime; /* Time of last status change. */
unsigned long int st_ctimensec; /* Nsecs of last status change. */
-# endif
+# endif
__blksize_t st_blksize;
unsigned int st_pad4;
-# ifndef __USE_FILE_OFFSET64
+# ifndef __USE_FILE_OFFSET64
__blkcnt_t st_blocks;
-# else
+# else
__blkcnt64_t st_blocks;
-# endif
+# endif
int st_pad5[14];
+# endif
};
#ifdef __USE_LARGEFILE64
struct stat64
{
+# ifdef __USE_TIME_BITS64
+# include <bits/struct_stat_time64_helper.h>
+# else
__dev_t st_dev;
unsigned int st_pad1[3]; /* Reserved for st_dev expansion */
__ino64_t st_ino;
unsigned int st_pad3;
__blkcnt64_t st_blocks;
int st_pad4[14];
+# endif /* __USE_TIME_BITS64 */
};
#endif
.set at
#endif
+ /* Align stack to 8/16 bytes per the ABI. */
+#if _MIPS_SIM == _ABIO32
+ li t0,-8
+#else
+ li t0,-16
+#endif
+ and a1,a1,t0
/* Sanity check arguments. */
li v0,EINVAL
{
struct kernel_stat kbuf;
int r = INTERNAL_SYSCALL_CALL (fstat, fd, &kbuf);
- return r ?: __xstat_conv (vers, &kbuf, buf);
+ if (r == 0)
+ return __xstat_conv (vers, &kbuf, buf);
+ return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
}
}
}
{
struct kernel_stat kbuf;
int r = INTERNAL_SYSCALL_CALL (lstat, name, &kbuf);
- return r ?: __xstat_conv (vers, &kbuf, buf);
+ if (r == 0)
+ return __xstat_conv (vers, &kbuf, buf);
+ return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
}
}
}
#define __NR_ftruncate64 4212
#define __NR_futex 4238
#define __NR_futex_time64 4422
+#define __NR_futex_waitv 4449
#define __NR_futimesat 4292
#define __NR_get_kernel_syms 4130
#define __NR_get_mempolicy 4269
#define __NR_preadv2 4361
#define __NR_prlimit64 4338
#define __NR_process_madvise 4440
+#define __NR_process_mrelease 4448
#define __NR_process_vm_readv 4345
#define __NR_process_vm_writev 4346
#define __NR_prof 4044
#define __NR_pwritev2 4362
#define __NR_query_module 4187
#define __NR_quotactl 4131
+#define __NR_quotactl_fd 4443
#define __NR_read 4003
#define __NR_readahead 4223
#define __NR_readdir 4089
#define __NR_sendmsg 4179
#define __NR_sendto 4180
#define __NR_set_mempolicy 4270
+#define __NR_set_mempolicy_home_node 4450
#define __NR_set_robust_list 4309
#define __NR_set_thread_area 4283
#define __NR_set_tid_address 4252
GLIBC_2.2 __libc_stack_end D 0x4
GLIBC_2.2 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
#define __NR_ftruncate 6075
#define __NR_futex 6194
#define __NR_futex_time64 6422
+#define __NR_futex_waitv 6449
#define __NR_futimesat 6255
#define __NR_get_kernel_syms 6170
#define __NR_get_mempolicy 6232
#define __NR_preadv2 6325
#define __NR_prlimit64 6302
#define __NR_process_madvise 6440
+#define __NR_process_mrelease 6448
#define __NR_process_vm_readv 6309
#define __NR_process_vm_writev 6310
#define __NR_pselect6 6264
#define __NR_pwritev2 6326
#define __NR_query_module 6171
#define __NR_quotactl 6172
+#define __NR_quotactl_fd 6443
#define __NR_read 6000
#define __NR_readahead 6179
#define __NR_readlink 6087
#define __NR_sendmsg 6045
#define __NR_sendto 6043
#define __NR_set_mempolicy 6233
+#define __NR_set_mempolicy_home_node 6450
#define __NR_set_robust_list 6272
#define __NR_set_thread_area 6246
#define __NR_set_tid_address 6213
GLIBC_2.2 __libc_stack_end D 0x4
GLIBC_2.2 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
#define __NR_fsync 5072
#define __NR_ftruncate 5075
#define __NR_futex 5194
+#define __NR_futex_waitv 5449
#define __NR_futimesat 5251
#define __NR_get_kernel_syms 5170
#define __NR_get_mempolicy 5228
#define __NR_preadv2 5321
#define __NR_prlimit64 5297
#define __NR_process_madvise 5440
+#define __NR_process_mrelease 5448
#define __NR_process_vm_readv 5304
#define __NR_process_vm_writev 5305
#define __NR_pselect6 5260
#define __NR_pwritev2 5322
#define __NR_query_module 5171
#define __NR_quotactl 5172
+#define __NR_quotactl_fd 5443
#define __NR_read 5000
#define __NR_readahead 5179
#define __NR_readlink 5087
#define __NR_sendmsg 5045
#define __NR_sendto 5043
#define __NR_set_mempolicy 5229
+#define __NR_set_mempolicy_home_node 5450
#define __NR_set_robust_list 5268
#define __NR_set_thread_area 5242
#define __NR_set_tid_address 5212
GLIBC_2.2 __libc_stack_end D 0x8
GLIBC_2.2 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x8
{
struct kernel_stat kbuf;
int r = INTERNAL_SYSCALL_CALL (stat, name, &kbuf);
- return r ?: __xstat_conv (vers, &kbuf, buf);
+ if (r == 0)
+ return __xstat_conv (vers, &kbuf, buf);
+ return INLINE_SYSCALL_ERROR_RETURN_VALUE (-r);
}
}
}
--- /dev/null
+/* Generic definition of MMAP_CALL and MMAP_CALL_INTERNAL.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
+ INLINE_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset)
+#define MMAP_CALL_INTERNAL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
+ INTERNAL_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset)
/* Do not accept offset not multiple of page size. */
#define MMAP_OFF_LOW_MASK (MMAP2_PAGE_UNIT - 1)
-/* An architecture may override this. */
-#ifndef MMAP_CALL
-# define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
- INLINE_SYSCALL_CALL (__nr, __addr, __len, __prot, __flags, __fd, __offset)
-#endif
+#include <mmap_call.h>
#endif /* MMAP_INTERNAL_LINUX_H */
to wait until it is done with it. */
(void) __pthread_barrier_wait (¬ify_barrier);
}
- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED)
+ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL)
{
/* The only state we keep is the copy of the thread attributes. */
__pthread_attr_destroy (data.attr);
{
int r = SYSCALL_CANCEL (mq_timedreceive_time64, mqdes, msg_ptr, msg_len,
msg_prio, abs_timeout);
- if (r == 0 || errno != ENOSYS)
+ if (r >= 0 || errno != ENOSYS)
return r;
__set_errno (EOVERFLOW);
return -1;
#define ARPHRD_ROSE 270
#define ARPHRD_X25 271 /* CCITT X.25. */
#define ARPHRD_HWX25 272 /* Boards with X.25 in firmware. */
+#define ARPHRD_CAN 280 /* Controller Area Network. */
+#define ARPHRD_MCTP 290
#define ARPHRD_PPP 512
#define ARPHRD_CISCO 513 /* Cisco HDLC. */
#define ARPHRD_HDLC ARPHRD_CISCO
#define __NR_ftruncate64 46
#define __NR_futex 98
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6 72
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_tid_address 96
#define __NR_setdomainname 162
GLIBC_2.21 __tls_get_addr F
GLIBC_2.21 _dl_mcount F
GLIBC_2.21 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
+++ /dev/null
-/* Copyright (C) 1999-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <assert.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/socket.h>
-
-/* Return a socket of any type. The socket can be used in subsequent
- ioctl calls to talk to the kernel. */
-int
-__opensock (void)
-{
- static int last_family; /* Available socket family we will use. */
- static int last_type;
- static const struct
- {
- int family;
- const char procname[15];
- } afs[] =
- {
- { AF_UNIX, "net/unix" },
- { AF_INET, "" },
- { AF_INET6, "net/if_inet6" },
- { AF_AX25, "net/ax25" },
- { AF_NETROM, "net/nr" },
- { AF_ROSE, "net/rose" },
- { AF_IPX, "net/ipx" },
- { AF_APPLETALK, "net/appletalk" },
- { AF_ECONET, "sys/net/econet" },
- { AF_ASH, "sys/net/ash" },
- { AF_X25, "net/x25" },
-#ifdef NEED_AF_IUCV
- { AF_IUCV, "net/iucv" }
-#endif
- };
-#define nafs (sizeof (afs) / sizeof (afs[0]))
- char fname[sizeof "/proc/" + 14];
- int result;
- int has_proc;
- size_t cnt;
-
- /* We already know which family to use from the last call. Use it
- again. */
- if (last_family != 0)
- {
- assert (last_type != 0);
-
- result = __socket (last_family, last_type | SOCK_CLOEXEC, 0);
- if (result != -1 || errno != EAFNOSUPPORT)
- /* Maybe the socket type isn't supported anymore (module is
- unloaded). In this case again try to find the type. */
- return result;
-
- /* Reset the values. They seem not valid anymore. */
- last_family = 0;
- last_type = 0;
- }
-
- /* Check whether the /proc filesystem is available. */
- has_proc = __access ("/proc/net", R_OK) != -1;
- strcpy (fname, "/proc/");
-
- /* Iterate over the interface families and find one which is
- available. */
- for (cnt = 0; cnt < nafs; ++cnt)
- {
- int type = SOCK_DGRAM;
-
- if (has_proc && afs[cnt].procname[0] != '\0')
- {
- strcpy (fname + 6, afs[cnt].procname);
- if (__access (fname, R_OK) == -1)
- /* The /proc entry is not available. I.e., we cannot
- create a socket of this type (without loading the
- module). Don't look for it since this might trigger
- loading the module. */
- continue;
- }
-
- if (afs[cnt].family == AF_NETROM || afs[cnt].family == AF_X25)
- type = SOCK_SEQPACKET;
-
- result = __socket (afs[cnt].family, type | SOCK_CLOEXEC, 0);
- if (result != -1)
- {
- /* Found an available family. */
- last_type = type;
- last_family = afs[cnt].family;
- return result;
- }
- }
-
- /* None of the protocol families is available. It is unclear what kind
- of error is returned. ENOENT seems like a reasonable choice. */
- __set_errno (ENOENT);
- return -1;
-}
&& strcmp (mntbuf.mnt_type, "ext4") != 0)
continue;
- struct stat64 fsst;
- if (__stat64 (mntbuf.mnt_dir, &fsst) >= 0
+ struct __stat64_t64 fsst;
+ if (__stat64_time64 (mntbuf.mnt_dir, &fsst) >= 0
&& st.st_dev == fsst.st_dev)
{
if (strcmp (mntbuf.mnt_type, "ext4") == 0)
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <ldsodefs.h>
-
-#if IS_IN (libc) && !defined SHARED
-int GLRO(dl_cache_line_size);
-#endif
-
-/* Scan the Aux Vector for the "Data Cache Block Size" entry and assign it
- to dl_cache_line_size. */
-#define DL_PLATFORM_AUXV \
- case AT_DCACHEBSIZE: \
- GLRO(dl_cache_line_size) = av->a_un.a_val; \
- break;
+#define DL_PLATFORM_AUXV \
+ GLRO(dl_cache_line_size) = auxv_values[AT_DCACHEBSIZE];
--- /dev/null
+#include <elf/dl-support.c>
+
+/* Populated from the auxiliary vector. */
+int _dl_cache_line_size;
#include <sysdep-vdso.h>
#if (defined(__PPC64__) || defined(__powerpc64__)) && _CALL_ELF != 2
-# include <dl-machine.h>
+# include <dl-funcdesc.h>
/* The correct solution is for _dl_vdso_vsym to return the address of the OPD
for the kernel VDSO function. That address would then be stored in the
__vdso_* variables and returned as the result of the IFUNC resolver function.
#define __NR_ftruncate64 194
#define __NR_futex 221
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 290
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 260
#define __NR_preadv2 380
#define __NR_prlimit64 325
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 351
#define __NR_process_vm_writev 352
#define __NR_prof 44
#define __NR_pwritev2 381
#define __NR_query_module 166
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 191
#define __NR_readdir 89
#define __NR_sendmsg 341
#define __NR_sendto 335
#define __NR_set_mempolicy 261
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 300
#define __NR_set_tid_address 232
#define __NR_setdomainname 121
GLIBC_2.22 __tls_get_addr_opt F
GLIBC_2.23 __parse_hwcap_and_convert_at_platform F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_ftime 35
#define __NR_ftruncate 93
#define __NR_futex 221
+#define __NR_futex_waitv 449
#define __NR_futimesat 290
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 260
#define __NR_preadv2 380
#define __NR_prlimit64 325
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 351
#define __NR_process_vm_writev 352
#define __NR_prof 44
#define __NR_pwritev2 381
#define __NR_query_module 166
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 191
#define __NR_readdir 89
#define __NR_sendmsg 341
#define __NR_sendto 335
#define __NR_set_mempolicy 261
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 300
#define __NR_set_tid_address 232
#define __NR_setdomainname 121
GLIBC_2.3 __tls_get_addr F
GLIBC_2.3 _dl_mcount F
GLIBC_2.3 _r_debug D 0x28
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.17 _r_debug D 0x28
GLIBC_2.22 __tls_get_addr_opt F
GLIBC_2.23 __parse_hwcap_and_convert_at_platform F
+GLIBC_2.34 __rtld_version_placeholder F
mr r8,r9
#if defined(USE_PPC_SCV) && !IS_IN(rtld) && (defined(__PPC64__) || defined(__powerpc64__))
CHECK_SCV_SUPPORT r9 0f
+ stdu r1,-SCV_FRAME_SIZE(r1)
+ cfi_adjust_cfa_offset(SCV_FRAME_SIZE)
DO_CALL_SCV
+ addi r1,r1,SCV_FRAME_SIZE
+ cfi_adjust_cfa_offset(-SCV_FRAME_SIZE)
RET_SCV
b 1f
#endif
#include <sysdep.h>
#include <socketcall.h>
-int
-__recvmmsg64 (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags,
- struct __timespec64 *timeout)
+static int
+recvmmsg_syscall (int fd, struct mmsghdr *vmessages, unsigned int vlen,
+ int flags, struct __timespec64 *timeout)
{
#ifndef __NR_recvmmsg_time64
# define __NR_recvmmsg_time64 __NR_recvmmsg
pts32 = &ts32;
}
- socklen_t csize[IOV_MAX];
- if (vlen > IOV_MAX)
- vlen = IOV_MAX;
- for (int i = 0; i < vlen; i++)
- csize[i] = vmessages[i].msg_hdr.msg_controllen;
-
# ifdef __ASSUME_RECVMMSG_SYSCALL
r = SYSCALL_CANCEL (recvmmsg, fd, vmessages, vlen, flags, pts32);
# else
{
if (timeout != NULL)
*timeout = valid_timespec_to_timespec64 (ts32);
+ }
+#endif
+ return r;
+}
+
+int
+__recvmmsg64 (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags,
+ struct __timespec64 *timeout)
+{
+#if __TIMESIZE != 64
+ socklen_t csize[IOV_MAX];
+ if (vlen > IOV_MAX)
+ vlen = IOV_MAX;
+ for (int i = 0; i < vlen; i++)
+ csize[i] = vmessages[i].msg_hdr.msg_controllen;
+#endif
+ int r = recvmmsg_syscall (fd, vmessages, vlen, flags, timeout);
+#if __TIMESIZE != 64
+ if (r > 0)
+ {
for (int i=0; i < r; i++)
__convert_scm_timestamps (&vmessages[i].msg_hdr, csize[i]);
}
-#endif /* __ASSUME_TIME64_SYSCALLS */
+#endif
return r;
}
#if __TIMESIZE != 64
ts64 = valid_timespec_to_timespec64 (*timeout);
pts64 = &ts64;
}
- int r = __recvmmsg64 (fd, vmessages, vlen, flags, pts64);
+ int r = recvmmsg_syscall (fd, vmessages, vlen, flags, pts64);
if (r >= 0 && timeout != NULL)
/* The remanining timeout will be always less the input TIMEOUT. */
*timeout = valid_timespec64_to_timespec (ts64);
#include <sysdep-cancel.h>
#include <socketcall.h>
+static int
+__recvmsg_syscall (int fd, struct msghdr *msg, int flags)
+{
+#ifdef __ASSUME_RECVMSG_SYSCALL
+ return SYSCALL_CANCEL (recvmsg, fd, msg, flags);
+#else
+ return SOCKETCALL_CANCEL (recvmsg, fd, msg, flags);
+#endif
+}
+
ssize_t
-__libc_recvmsg (int fd, struct msghdr *msg, int flags)
+__libc_recvmsg64 (int fd, struct msghdr *msg, int flags)
{
ssize_t r;
-#ifndef __ASSUME_TIME64_SYSCALLS
+#if __TIMESIZE != 64
socklen_t orig_controllen = msg != NULL ? msg->msg_controllen : 0;
#endif
-#ifdef __ASSUME_RECVMSG_SYSCALL
- r = SYSCALL_CANCEL (recvmsg, fd, msg, flags);
-#else
- r = SOCKETCALL_CANCEL (recvmsg, fd, msg, flags);
-#endif
+ r = __recvmsg_syscall (fd, msg, flags);
-#ifndef __ASSUME_TIME64_SYSCALLS
+#if __TIMESIZE != 64
if (r >= 0 && orig_controllen != 0)
__convert_scm_timestamps (msg, orig_controllen);
#endif
return r;
}
-weak_alias (__libc_recvmsg, recvmsg)
-weak_alias (__libc_recvmsg, __recvmsg)
#if __TIMESIZE != 64
-weak_alias (__recvmsg, __recvmsg64)
+weak_alias (__libc_recvmsg64, __recvmsg64)
+
+ssize_t
+__libc_recvmsg (int fd, struct msghdr *msg, int flags)
+{
+ return __recvmsg_syscall (fd, msg, flags);
+}
#endif
+weak_alias (__libc_recvmsg, recvmsg)
+weak_alias (__libc_recvmsg, __recvmsg)
.text
LEAF (__clone)
+ /* Align stack to a 128-bit boundary as per RISC-V ABI. */
+ andi a1,a1,ALMASK
+
/* Sanity check arguments. */
beqz a0,L (invalid) /* No NULL function pointers. */
beqz a1,L (invalid) /* No NULL stack pointers. */
#define __NR_clock_nanosleep_time64 407
#define __NR_clock_settime64 404
#define __NR_clone 220
+#define __NR_clone3 435
#define __NR_close 57
#define __NR_close_range 436
#define __NR_connect 203
#define __NR_fsync 82
#define __NR_ftruncate64 46
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_mbind 235
#define __NR_membarrier 283
#define __NR_memfd_create 279
+#define __NR_memfd_secret 447
#define __NR_migrate_pages 238
#define __NR_mincore 232
#define __NR_mkdirat 34
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6_time64 413
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_tid_address 96
#define __NR_setdomainname 162
GLIBC_2.33 __tls_get_addr F
GLIBC_2.33 _dl_mcount F
GLIBC_2.33 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_fsync 82
#define __NR_ftruncate 46
#define __NR_futex 98
+#define __NR_futex_waitv 449
#define __NR_get_mempolicy 236
#define __NR_get_robust_list 100
#define __NR_getcpu 168
#define __NR_mbind 235
#define __NR_membarrier 283
#define __NR_memfd_create 279
+#define __NR_memfd_secret 447
#define __NR_migrate_pages 238
#define __NR_mincore 232
#define __NR_mkdirat 34
#define __NR_preadv2 286
#define __NR_prlimit64 261
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 270
#define __NR_process_vm_writev 271
#define __NR_pselect6 72
#define __NR_pwritev 70
#define __NR_pwritev2 287
#define __NR_quotactl 60
+#define __NR_quotactl_fd 443
#define __NR_read 63
#define __NR_readahead 213
#define __NR_readlinkat 78
#define __NR_sendmsg 211
#define __NR_sendto 206
#define __NR_set_mempolicy 237
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 99
#define __NR_set_tid_address 96
#define __NR_setdomainname 162
GLIBC_2.27 __tls_get_addr F
GLIBC_2.27 _dl_mcount F
GLIBC_2.27 _r_debug D 0x28
+GLIBC_2.34 __rtld_version_placeholder F
/*
* The following must match the kernels asm/elf.h.
+ * Note: The kernel commit 511ad531afd4090625def4d9aba1f5227bd44b8e
+ * "s390/hwcaps: shorten HWCAP defines" has shortened the prefix of the macros
+ * from "HWCAP_S390_" to "HWCAP_". For compatibility reasons, we do not
+ * change the prefix in public glibc header file.
+ *
* Note that these are *not* the same as the STORE FACILITY LIST bits.
*/
#define HWCAP_S390_ESAN3 1
#define HWCAP_S390_DFLT 262144
#define HWCAP_S390_VXRS_PDE2 524288
#define HWCAP_S390_NNPA 1048576
+#define HWCAP_S390_PCI_MIO 2097152
+#define HWCAP_S390_SIE 4194304
--- /dev/null
+/* mmap - map files or devices into memory. Linux/s390 version.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
+ ({ \
+ long int __args[6] = { (long int) (__addr), (long int) (__len), \
+ (long int) (__prot), (long int) (__flags), \
+ (long int) (__fd), (long int) (__offset) }; \
+ INLINE_SYSCALL_CALL (__nr, __args); \
+ })
+#define MMAP_CALL_INTERNAL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
+ ({ \
+ long int __args[6] = { (long int) (__addr), (long int) (__len), \
+ (long int) (__prot), (long int) (__flags), \
+ (long int) (__fd), (long int) (__offset) }; \
+ INTERNAL_SYSCALL_CALL (__nr, __args); \
+ })
+++ /dev/null
-/* mmap - map files or devices into memory. Linux/s390 version.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#ifndef MMAP_S390_INTERNAL_H
-# define MMAP_S390_INTERNAL_H
-
-#define MMAP_CALL(__nr, __addr, __len, __prot, __flags, __fd, __offset) \
- ({ \
- long int __args[6] = { (long int) (__addr), (long int) (__len), \
- (long int) (__prot), (long int) (__flags), \
- (long int) (__fd), (long int) (__offset) }; \
- INLINE_SYSCALL_CALL (__nr, __args); \
- })
-
-#include_next <mmap_internal.h>
-
-#endif
+++ /dev/null
-#define NEED_AF_IUCV 1
-#include "../opensock.c"
#define __NR_ftruncate64 194
#define __NR_futex 238
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 292
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 269
#define __NR_preadv2 376
#define __NR_prlimit64 334
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 340
#define __NR_process_vm_writev 341
#define __NR_pselect6 301
#define __NR_pwritev2 377
#define __NR_query_module 167
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 222
#define __NR_readdir 89
#define __NR_sendmsg 370
#define __NR_sendto 369
#define __NR_set_mempolicy 270
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 304
#define __NR_set_tid_address 252
#define __NR_setdomainname 121
GLIBC_2.1 __libc_stack_end D 0x4
GLIBC_2.1 _dl_mcount F
GLIBC_2.3 __tls_get_offset F
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_fsync 118
#define __NR_ftruncate 93
#define __NR_futex 238
+#define __NR_futex_waitv 449
#define __NR_futimesat 292
#define __NR_get_kernel_syms 130
#define __NR_get_mempolicy 269
#define __NR_preadv2 376
#define __NR_prlimit64 334
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 340
#define __NR_process_vm_writev 341
#define __NR_pselect6 301
#define __NR_pwritev2 377
#define __NR_query_module 167
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 222
#define __NR_readdir 89
#define __NR_sendmsg 370
#define __NR_sendto 369
#define __NR_set_mempolicy 270
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 304
#define __NR_set_tid_address 252
#define __NR_setdomainname 121
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x28
GLIBC_2.3 __tls_get_offset F
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_ftruncate64 194
#define __NR_futex 240
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 299
#define __NR_get_mempolicy 275
#define __NR_get_robust_list 312
#define __NR_preadv2 381
#define __NR_prlimit64 339
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 365
#define __NR_process_vm_writev 366
#define __NR_pselect6 308
#define __NR_pwritev 334
#define __NR_pwritev2 382
#define __NR_quotactl 131
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 225
#define __NR_readdir 89
#define __NR_sendmsg 355
#define __NR_sendto 349
#define __NR_set_mempolicy 276
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 311
#define __NR_set_tid_address 258
#define __NR_setdomainname 121
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x14
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x14
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
GLIBC_2.4 __stack_chk_guard D 0x4
struct stat
{
+#ifdef __USE_TIME_BITS64
+# include <bits/struct_stat_time64_helper.h>
+#else
__dev_t st_dev; /* Device. */
-#if __WORDSIZE == 64 || !defined __USE_FILE_OFFSET64
+# if __WORDSIZE == 64 || !defined __USE_FILE_OFFSET64
unsigned short int __pad1;
__ino_t st_ino; /* File serial number. */
-#else
+# else
__ino64_t st_ino; /* File serial number. */
-#endif
+# endif
__mode_t st_mode; /* File mode. */
__nlink_t st_nlink; /* Link count. */
__uid_t st_uid; /* User ID of the file's owner. */
__gid_t st_gid; /* Group ID of the file's group.*/
__dev_t st_rdev; /* Device number, if device. */
unsigned short int __pad2;
-#ifndef __USE_FILE_OFFSET64
+# ifndef __USE_FILE_OFFSET64
__off_t st_size; /* Size of file, in bytes. */
-#else
+# else
__off64_t st_size; /* Size of file, in bytes. */
-#endif
+# endif
__blksize_t st_blksize; /* Optimal block size for I/O. */
-#ifndef __USE_FILE_OFFSET64
+# ifndef __USE_FILE_OFFSET64
__blkcnt_t st_blocks; /* Number 512-byte blocks allocated. */
-#else
+# else
__blkcnt64_t st_blocks; /* Number 512-byte blocks allocated. */
-#endif
-#ifdef __USE_XOPEN2K8
+# endif
+# ifdef __USE_XOPEN2K8
/* Nanosecond resolution timestamps are stored in a format
equivalent to 'struct timespec'. This is the type used
whenever possible but the Unix namespace rules do not allow the
struct timespec st_atim; /* Time of last access. */
struct timespec st_mtim; /* Time of last modification. */
struct timespec st_ctim; /* Time of last status change. */
-# define st_atime st_atim.tv_sec /* Backward compatibility. */
-# define st_mtime st_mtim.tv_sec
-# define st_ctime st_ctim.tv_sec
-#else
+# define st_atime st_atim.tv_sec /* Backward compatibility. */
+# define st_mtime st_mtim.tv_sec
+# define st_ctime st_ctim.tv_sec
+# else
__time_t st_atime; /* Time of last access. */
unsigned long int st_atimensec; /* Nscecs of last access. */
__time_t st_mtime; /* Time of last modification. */
unsigned long int st_mtimensec; /* Nsecs of last modification. */
__time_t st_ctime; /* Time of last status change. */
unsigned long int st_ctimensec; /* Nsecs of last status change. */
-#endif
+# endif
unsigned long int __glibc_reserved4;
unsigned long int __glibc_reserved5;
+#endif /* __USE_TIME_BITS64 */
};
#ifdef __USE_LARGEFILE64
struct stat64
{
+# ifdef __USE_TIME_BITS64
+# include <bits/struct_stat_time64_helper.h>
+# else
__dev_t st_dev; /* Device. */
-# if __WORDSIZE == 64
+# if __WORDSIZE == 64
unsigned short int __pad1;
-# endif
+# endif
__ino64_t st_ino; /* File serial number. */
__mode_t st_mode; /* File mode. */
__nlink_t st_nlink; /* Link count. */
__blksize_t st_blksize; /* Optimal block size for I/O. */
__blkcnt64_t st_blocks; /* Number 512-byte blocks allocated. */
-# ifdef __USE_XOPEN2K8
+# ifdef __USE_XOPEN2K8
/* Nanosecond resolution timestamps are stored in a format
equivalent to 'struct timespec'. This is the type used
whenever possible but the Unix namespace rules do not allow the
struct timespec st_atim; /* Time of last access. */
struct timespec st_mtim; /* Time of last modification. */
struct timespec st_ctim; /* Time of last status change. */
-# define st_atime st_atim.tv_sec /* Backward compatibility. */
-# define st_mtime st_mtim.tv_sec
-# define st_ctime st_ctim.tv_sec
-# else
+# define st_atime st_atim.tv_sec /* Backward compatibility. */
+# define st_mtime st_mtim.tv_sec
+# define st_ctime st_ctim.tv_sec
+# else
__time_t st_atime; /* Time of last access. */
unsigned long int st_atimensec; /* Nscecs of last access. */
__time_t st_mtime; /* Time of last modification. */
unsigned long int st_mtimensec; /* Nsecs of last modification. */
__time_t st_ctime; /* Time of last status change. */
unsigned long int st_ctimensec; /* Nsecs of last status change. */
-# endif
+# endif
unsigned long int __glibc_reserved4;
unsigned long int __glibc_reserved5;
+# endif /* __USE_TIME_BITS64 */
};
#endif
--- /dev/null
+/* Change data segment. Linux SPARC version.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sysdep.h>
+
+/* This must be initialized data because commons can't have aliases. */
+void *__curbrk = 0;
+
+#if HAVE_INTERNAL_BRK_ADDR_SYMBOL
+/* Old braindamage in GCC's crtstuff.c requires this symbol in an attempt
+ to work around different old braindamage in the old Linux ELF dynamic
+ linker. */
+weak_alias (__curbrk, ___brk_addr)
+#endif
+
+#ifdef __arch64__
+# define SYSCALL_NUM "0x6d"
+#else
+# define SYSCALL_NUM "0x10"
+#endif
+
+int
+__brk (void *addr)
+{
+ register long int g1 asm ("g1") = __NR_brk;
+ register long int o0 asm ("o0") = (long int) addr;
+ asm volatile ("ta " SYSCALL_NUM
+ : "=r"(o0)
+ : "r"(g1), "0"(o0)
+ : "cc");
+ __curbrk = (void *) o0;
+
+ if (__curbrk < addr)
+ {
+ __set_errno (ENOMEM);
+ return -1;
+ }
+
+ return 0;
+}
+weak_alias (__brk, brk)
--- /dev/null
+/* Invoke the brk system call. Sparc version.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifdef __arch64__
+# define SYSCALL_NUM "0x6d"
+#else
+# define SYSCALL_NUM "0x10"
+#endif
+
+static inline void *
+__brk_call (void *addr)
+{
+ register long int g1 asm ("g1") = __NR_brk;
+ register long int o0 asm ("o0") = (long int) addr;
+ asm volatile ("ta " SYSCALL_NUM
+ : "=r"(o0)
+ : "r"(g1), "0"(o0)
+ : "cc");
+ return (void *) o0;
+}
#define __NR_ftruncate64 84
#define __NR_futex 142
#define __NR_futex_time64 422
+#define __NR_futex_waitv 449
#define __NR_futimesat 288
#define __NR_get_kernel_syms 223
#define __NR_get_mempolicy 304
#define __NR_preadv2 358
#define __NR_prlimit64 331
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 338
#define __NR_process_vm_writev 339
#define __NR_pselect6 297
#define __NR_pwritev2 359
#define __NR_query_module 184
#define __NR_quotactl 165
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 205
#define __NR_readdir 204
#define __NR_sendmsg 114
#define __NR_sendto 133
#define __NR_set_mempolicy 305
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 300
#define __NR_set_tid_address 166
#define __NR_setdomainname 163
GLIBC_2.1 __libc_stack_end D 0x4
GLIBC_2.1 _dl_mcount F
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_fsync 95
#define __NR_ftruncate 130
#define __NR_futex 142
+#define __NR_futex_waitv 449
#define __NR_futimesat 288
#define __NR_get_kernel_syms 223
#define __NR_get_mempolicy 304
#define __NR_preadv2 358
#define __NR_prlimit64 331
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 338
#define __NR_process_vm_writev 339
#define __NR_pselect6 297
#define __NR_pwritev2 359
#define __NR_query_module 184
#define __NR_quotactl 165
+#define __NR_quotactl_fd 443
#define __NR_read 3
#define __NR_readahead 205
#define __NR_readdir 204
#define __NR_sendmsg 114
#define __NR_sendto 133
#define __NR_set_mempolicy 305
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 300
#define __NR_set_tid_address 166
#define __NR_setdomainname 163
GLIBC_2.2 _dl_mcount F
GLIBC_2.2 _r_debug D 0x28
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
--- /dev/null
+/* Linux definitions of functions used by static libc main startup.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+# include_next <startup.h>
+#else
+# include <sysdep.h>
+
+/* Avoid a run-time invocation of strlen. */
+#define _startup_fatal(message) \
+ do \
+ { \
+ size_t __message_length = __builtin_strlen (message); \
+ if (! __builtin_constant_p (__message_length)) \
+ { \
+ extern void _startup_fatal_not_constant (void); \
+ _startup_fatal_not_constant (); \
+ } \
+ INTERNAL_SYSCALL_CALL (write, STDERR_FILENO, (message), \
+ __message_length); \
+ INTERNAL_SYSCALL_CALL (exit_group, 127); \
+ } \
+ while (0)
+#endif /* !SHARED */
extern int prctl (int __option, ...) __THROW;
#else
# ifdef __REDIRECT
-extern int __REDIRECT (prctl, (int __option, ...), __prctl_time64) __THROW;
+extern int __REDIRECT_NTH (prctl, (int __option, ...), __prctl_time64);
# else
extern int __prctl_time64 (int __option,d ...) __THROW;
# define ioctl __prctl_time64
# This file can list all potential system calls. The names are only
# used if the installed kernel headers also provide them.
-# The list of system calls is current as of Linux 5.13.
-kernel 5.13
+# The list of system calls is current as of Linux 5.19.
+kernel 5.19
FAST_atomic_update
FAST_cmpxchg
ftruncate64
futex
futex_time64
+futex_waitv
futimesat
get_kernel_syms
get_mempolicy
mbind
membarrier
memfd_create
+memfd_secret
memory_ordering
migrate_pages
mincore
preadv2
prlimit64
process_madvise
+process_mrelease
process_vm_readv
process_vm_writev
prof
pwritev2
query_module
quotactl
+quotactl_fd
read
readahead
readdir
sendmsg
sendto
set_mempolicy
+set_mempolicy_home_node
set_robust_list
set_thread_area
set_tid_address
#define NFDS 100
-static int
-open_multiple_temp_files (void)
-{
- /* Check if the temporary file descriptor has no no gaps. */
- int lowfd = xopen ("/dev/null", O_RDONLY, 0600);
- for (int i = 1; i <= NFDS; i++)
- TEST_COMPARE (xopen ("/dev/null", O_RDONLY, 0600),
- lowfd + i);
- return lowfd;
-}
-
static void
close_range_test_max_upper_limit (void)
{
struct support_descriptors *descrs = support_descriptors_list ();
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
{
int r = close_range (lowfd, ~0U, 0);
static void
close_range_test_common (int lowfd, unsigned int flags)
{
- const int maximum_fd = lowfd + NFDS;
+ const int maximum_fd = lowfd + NFDS - 1;
const int half_fd = lowfd + NFDS / 2;
const int gap_1 = maximum_fd - 8;
struct support_descriptors *descrs = support_descriptors_list ();
/* Check if the temporary file descriptor has no no gaps. */
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
close_range_test_common (lowfd, 0);
struct support_descriptors *descrs = support_descriptors_list ();
/* Check if the temporary file descriptor has no no gaps. */
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
struct support_stack stack = support_stack_alloc (4096);
struct support_descriptors *descrs1 = support_descriptors_list ();
/* Check if the temporary file descriptor has no no gaps. */
- int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
struct support_descriptors *descrs2 = support_descriptors_list ();
support_stack_free (&stack);
- for (int i = 0; i < NFDS; i++)
+ for (int i = lowfd; i < lowfd + NFDS; i++)
TEST_VERIFY (fcntl (i, F_GETFL) > -1);
support_descriptors_check (descrs2);
close_range_cloexec_test (void)
{
/* Check if the temporary file descriptor has no no gaps. */
- const int lowfd = open_multiple_temp_files ();
+ int lowfd = support_open_dev_null_range (NFDS, O_RDONLY, 0600);
- const int maximum_fd = lowfd + NFDS;
+ const int maximum_fd = lowfd + NFDS - 1;
const int half_fd = lowfd + NFDS / 2;
const int gap_1 = maximum_fd - 8;
/* Create some gaps, close up to a threshold, and check result. */
static int gap_close[] = { 57, 78, 81, 82, 84, 90 };
for (int i = 0; i < array_length (gap_close); i++)
- xclose (gap_close[i]);
+ xclose (lowfd + gap_close[i]);
TEST_COMPARE (close_range (half_fd + 1, gap_1, CLOSE_RANGE_CLOEXEC), 0);
for (int i = half_fd + 1; i < gap_1; i++)
{
int flags = fcntl (i, F_GETFD);
- if (is_in_array (gap_close, array_length (gap_close), i))
+ if (is_in_array (gap_close, array_length (gap_close), i - lowfd))
TEST_COMPARE (flags, -1);
else
{
--- /dev/null
+/* Basic test for getauxval.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <sys/auxv.h>
+
+static int missing;
+static int mismatch;
+
+static void
+check_nonzero (unsigned long t, const char *s)
+{
+ unsigned long v = getauxval (t);
+ printf ("%s: %lu (0x%lx)\n", s, v, v);
+ if (v == 0)
+ missing++;
+}
+
+static void
+check_eq (unsigned long t, const char *s, unsigned long want)
+{
+ unsigned long v = getauxval (t);
+ printf ("%s: %lu want: %lu\n", s, v, want);
+ if (v != want)
+ mismatch++;
+}
+
+#define NZ(x) check_nonzero (x, #x)
+#define EQ(x, want) check_eq (x, #x, want)
+
+static int
+do_test (void)
+{
+ /* These auxv entries should be non-zero on Linux. */
+ NZ (AT_PHDR);
+ NZ (AT_PHENT);
+ NZ (AT_PHNUM);
+ NZ (AT_PAGESZ);
+ NZ (AT_ENTRY);
+ NZ (AT_CLKTCK);
+ NZ (AT_RANDOM);
+ NZ (AT_EXECFN);
+ if (missing)
+ FAIL_EXIT1 ("Found %d missing auxv entries.\n", missing);
+
+ /* Check against syscalls. */
+ EQ (AT_UID, getuid ());
+ EQ (AT_EUID, geteuid ());
+ EQ (AT_GID, getgid ());
+ EQ (AT_EGID, getegid ());
+ if (mismatch)
+ FAIL_EXIT1 ("Found %d mismatching auxv entries.\n", mismatch);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Verify that getcwd returns ERANGE for size 1 byte and does not underflow
+ buffer when the CWD is too long and is also a mount target of /. See bug
+ #28769 or CVE-2021-3999 for more context.
+ Copyright The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <intprops.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <support/check.h>
+#include <support/temp_file.h>
+#include <support/test-driver.h>
+#include <support/xsched.h>
+#include <support/xunistd.h>
+
+static char *base;
+#define BASENAME "tst-getcwd-smallbuff"
+#define MOUNT_NAME "mpoint"
+static int sockfd[2];
+
+static void
+do_cleanup (void)
+{
+ support_chdir_toolong_temp_directory (base);
+ TEST_VERIFY_EXIT (rmdir (MOUNT_NAME) == 0);
+ free (base);
+}
+
+static void
+send_fd (const int sock, const int fd)
+{
+ struct msghdr msg = {0};
+ union
+ {
+ struct cmsghdr hdr;
+ char buf[CMSG_SPACE (sizeof (int))];
+ } cmsgbuf = {0};
+ struct cmsghdr *cmsg;
+ struct iovec vec;
+ char ch = 'A';
+ ssize_t n;
+
+ msg.msg_control = &cmsgbuf.buf;
+ msg.msg_controllen = sizeof (cmsgbuf.buf);
+
+ cmsg = CMSG_FIRSTHDR (&msg);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy (CMSG_DATA (cmsg), &fd, sizeof (fd));
+
+ vec.iov_base = &ch;
+ vec.iov_len = 1;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+
+ while ((n = sendmsg (sock, &msg, 0)) == -1 && errno == EINTR);
+
+ TEST_VERIFY_EXIT (n == 1);
+}
+
+static int
+recv_fd (const int sock)
+{
+ struct msghdr msg = {0};
+ union
+ {
+ struct cmsghdr hdr;
+ char buf[CMSG_SPACE(sizeof(int))];
+ } cmsgbuf = {0};
+ struct cmsghdr *cmsg;
+ struct iovec vec;
+ ssize_t n;
+ char ch = '\0';
+ int fd = -1;
+
+ vec.iov_base = &ch;
+ vec.iov_len = 1;
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = &cmsgbuf.buf;
+ msg.msg_controllen = sizeof (cmsgbuf.buf);
+
+ while ((n = recvmsg (sock, &msg, 0)) == -1 && errno == EINTR);
+ if (n != 1 || ch != 'A')
+ return -1;
+
+ cmsg = CMSG_FIRSTHDR (&msg);
+ if (cmsg == NULL)
+ return -1;
+ if (cmsg->cmsg_type != SCM_RIGHTS)
+ return -1;
+ memcpy (&fd, CMSG_DATA (cmsg), sizeof (fd));
+ if (fd < 0)
+ return -1;
+ return fd;
+}
+
+static int
+child_func (void * const arg)
+{
+ xclose (sockfd[0]);
+ const int sock = sockfd[1];
+ char ch;
+
+ TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1);
+ TEST_VERIFY_EXIT (ch == '1');
+
+ if (mount ("/", MOUNT_NAME, NULL, MS_BIND | MS_REC, NULL))
+ FAIL_EXIT1 ("mount failed: %m\n");
+ const int fd = xopen ("mpoint",
+ O_RDONLY | O_PATH | O_DIRECTORY | O_NOFOLLOW, 0);
+
+ send_fd (sock, fd);
+ xclose (fd);
+
+ TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1);
+ TEST_VERIFY_EXIT (ch == 'a');
+
+ xclose (sock);
+ return 0;
+}
+
+static void
+update_map (char * const mapping, const char * const map_file)
+{
+ const size_t map_len = strlen (mapping);
+
+ const int fd = xopen (map_file, O_WRONLY, 0);
+ xwrite (fd, mapping, map_len);
+ xclose (fd);
+}
+
+static void
+proc_setgroups_write (const long child_pid, const char * const str)
+{
+ const size_t str_len = strlen(str);
+
+ char setgroups_path[sizeof ("/proc//setgroups") + INT_STRLEN_BOUND (long)];
+
+ snprintf (setgroups_path, sizeof (setgroups_path),
+ "/proc/%ld/setgroups", child_pid);
+
+ const int fd = open (setgroups_path, O_WRONLY);
+
+ if (fd < 0)
+ {
+ TEST_VERIFY_EXIT (errno == ENOENT);
+ FAIL_UNSUPPORTED ("/proc/%ld/setgroups not found\n", child_pid);
+ }
+
+ xwrite (fd, str, str_len);
+ xclose(fd);
+}
+
+static char child_stack[1024 * 1024];
+
+int
+do_test (void)
+{
+ base = support_create_and_chdir_toolong_temp_directory (BASENAME);
+
+ xmkdir (MOUNT_NAME, S_IRWXU);
+ atexit (do_cleanup);
+
+ /* Check whether user namespaces are supported. */
+ {
+ pid_t pid = xfork ();
+ if (pid == 0)
+ {
+ if (unshare (CLONE_NEWUSER | CLONE_NEWNS) != 0)
+ _exit (EXIT_UNSUPPORTED);
+ else
+ _exit (0);
+ }
+ int status;
+ xwaitpid (pid, &status, 0);
+ TEST_VERIFY_EXIT (WIFEXITED (status));
+ if (WEXITSTATUS (status) != 0)
+ return WEXITSTATUS (status);
+ }
+
+ TEST_VERIFY_EXIT (socketpair (AF_UNIX, SOCK_STREAM, 0, sockfd) == 0);
+ pid_t child_pid = xclone (child_func, NULL, child_stack,
+ sizeof (child_stack),
+ CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD);
+
+ xclose (sockfd[1]);
+ const int sock = sockfd[0];
+
+ char map_path[sizeof ("/proc//uid_map") + INT_STRLEN_BOUND (long)];
+ char map_buf[sizeof ("0 1") + INT_STRLEN_BOUND (long)];
+
+ snprintf (map_path, sizeof (map_path), "/proc/%ld/uid_map",
+ (long) child_pid);
+ snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getuid());
+ update_map (map_buf, map_path);
+
+ proc_setgroups_write ((long) child_pid, "deny");
+ snprintf (map_path, sizeof (map_path), "/proc/%ld/gid_map",
+ (long) child_pid);
+ snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getgid());
+ update_map (map_buf, map_path);
+
+ TEST_VERIFY_EXIT (send (sock, "1", 1, MSG_NOSIGNAL) == 1);
+ const int fd = recv_fd (sock);
+ TEST_VERIFY_EXIT (fd >= 0);
+ TEST_VERIFY_EXIT (fchdir (fd) == 0);
+
+ static char buf[2 * 10 + 1];
+ memset (buf, 'A', sizeof (buf));
+
+ /* Finally, call getcwd and check if it resulted in a buffer underflow. */
+ char * cwd = getcwd (buf + sizeof (buf) / 2, 1);
+ TEST_VERIFY (cwd == NULL);
+ TEST_VERIFY (errno == ERANGE);
+
+ for (int i = 0; i < sizeof (buf); i++)
+ if (buf[i] != 'A')
+ {
+ printf ("buf[%d] = %02x\n", i, (unsigned int) buf[i]);
+ support_record_failure ();
+ }
+
+ TEST_VERIFY_EXIT (send (sock, "a", 1, MSG_NOSIGNAL) == 1);
+ xclose (sock);
+ TEST_VERIFY_EXIT (xwaitpid (child_pid, NULL, 0) == child_pid);
+
+ return 0;
+}
+
+#define CLEANUP_HANDLER do_cleanup
+#include <support/test-driver.c>
help='C compiler (including options) to use')
args = parser.parse_args()
linux_version_headers = glibcsyscalls.linux_kernel_version(args.cc)
- linux_version_glibc = (5, 13)
+ linux_version_glibc = (5, 17)
sys.exit(glibcextract.compare_macro_consts(
'#define _GNU_SOURCE 1\n'
'#include <sys/mman.h>\n',
--- /dev/null
+#include "tst-socket-timestamp-compat.c"
--- /dev/null
+/* Check recvmsg/recvmmsg 64-bit timestamp support.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <arpa/inet.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/xsocket.h>
+#include <support/xunistd.h>
+#include <stdbool.h>
+
+/* AF_INET socket and address used to receive data. */
+static int srv;
+static struct sockaddr_in srv_addr;
+
+static int
+do_sendto (const struct sockaddr_in *addr, int payload)
+{
+ int s = xsocket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ xconnect (s, (const struct sockaddr *) addr, sizeof (*addr));
+
+ xsendto (s, &payload, sizeof (payload), 0, (const struct sockaddr *) addr,
+ sizeof (*addr));
+
+ xclose (s);
+
+ return 0;
+}
+
+static void
+do_recvmsg_ancillary (bool use_multi_call, struct mmsghdr *mmhdr,
+ void *msgbuf, size_t msgbuflen, int exp_payload)
+{
+ int payload;
+ struct iovec iov =
+ {
+ .iov_base = &payload,
+ .iov_len = sizeof (payload)
+ };
+ mmhdr->msg_hdr.msg_name = NULL;
+ mmhdr->msg_hdr.msg_iov = &iov;
+ mmhdr->msg_hdr.msg_iovlen = 1;
+ mmhdr->msg_hdr.msg_control = msgbuf;
+ mmhdr->msg_hdr.msg_controllen = msgbuflen;
+
+ int r;
+ if (use_multi_call)
+ {
+ r = recvmmsg (srv, mmhdr, 1, 0, NULL);
+ if (r >= 0)
+ r = mmhdr->msg_len;
+ }
+ else
+ r = recvmsg (srv, &mmhdr->msg_hdr, 0);
+ TEST_COMPARE (r, sizeof (int));
+ TEST_COMPARE (payload, exp_payload);
+}
+
+/* Check if recvmsg create the additional 64 bit timestamp if only 32 bit
+ is enabled for 64 bit recvmsg symbol. */
+static void
+do_test_large_buffer (bool mc)
+{
+ struct mmsghdr mmhdr = { 0 };
+ /* It should be large enought for either timeval/timespec and the
+ 64 time type as well. */
+
+ union
+ {
+ struct cmsghdr cmsghdr;
+ char msgbuf[512];
+ } control;
+
+ /* Enable 32 bit timeval precision and check if no 64 bit timeval stamp
+ is created. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMP_OLD, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+
+ do_sendto (&srv_addr, 42);
+ do_recvmsg_ancillary (mc, &mmhdr, &control, sizeof control, 42);
+
+ bool found_timestamp = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ if (sizeof (time_t) > 4 && cmsg->cmsg_type == SO_TIMESTAMP_NEW)
+ found_timestamp = true;
+ else
+ TEST_VERIFY (cmsg->cmsg_type != SO_TIMESTAMP_NEW);
+ }
+
+ TEST_COMPARE (found_timestamp, sizeof (time_t) > 4);
+ }
+
+ /* Same as before, but for timespec. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMPNS_OLD, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+
+ do_sendto (&srv_addr, 42);
+ do_recvmsg_ancillary (mc, &mmhdr, &control, sizeof control, 42);
+
+ bool found_timestamp = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ if (sizeof (time_t) > 4 && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
+ found_timestamp = true;
+ else
+ TEST_VERIFY (cmsg->cmsg_type != SO_TIMESTAMPNS_NEW);
+ }
+
+ TEST_COMPARE (found_timestamp, sizeof (time_t) > 4);
+ }
+}
+
+/* Check if recvmsg does not create the additional 64 bit timestamp if
+ only 32 bit timestamp is enabled if the ancillary buffer is not large
+ enought. Also checks if MSG_CTRUNC is set iff for 64 bit recvmsg
+ symbol. */
+static void
+do_test_small_buffer (bool mc)
+{
+ struct mmsghdr mmhdr = { 0 };
+
+ /* Enable 32 bit timeval precision and check if no 64 bit timeval stamp
+ is created. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMP_OLD, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+
+ union
+ {
+ struct cmsghdr cmsghdr;
+ char msgbuf[CMSG_SPACE (sizeof (struct timeval))];
+ } control;
+
+ do_sendto (&srv_addr, 42);
+ do_recvmsg_ancillary (mc, &mmhdr, &control, sizeof control, 42);
+
+ bool found_timestamp = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ if (sizeof (time_t) > 4 && cmsg->cmsg_type == SO_TIMESTAMP_NEW)
+ found_timestamp = true;
+ else
+ TEST_VERIFY (cmsg->cmsg_type != SO_TIMESTAMP_NEW);
+ }
+
+ if (sizeof (time_t) > 4)
+ {
+ TEST_VERIFY ((mmhdr.msg_hdr.msg_flags & MSG_CTRUNC));
+ TEST_COMPARE (found_timestamp, 0);
+ }
+ else
+ {
+ TEST_VERIFY (!(mmhdr.msg_hdr.msg_flags & MSG_CTRUNC));
+ TEST_COMPARE (found_timestamp, 0);
+ }
+ }
+
+ /* Same as before, but for timespec. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMPNS_OLD, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+
+ union
+ {
+ struct cmsghdr cmsghdr;
+ char msgbuf[CMSG_SPACE (sizeof (struct timespec))];
+ } control;
+
+ do_sendto (&srv_addr, 42);
+ do_recvmsg_ancillary (mc, &mmhdr, &control, sizeof control, 42);
+
+ bool found_timestamp = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ if (sizeof (time_t) > 4 && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
+ found_timestamp = true;
+ else
+ TEST_VERIFY (cmsg->cmsg_type != SO_TIMESTAMPNS_NEW);
+ }
+
+ if (sizeof (time_t) > 4)
+ {
+ TEST_VERIFY ((mmhdr.msg_hdr.msg_flags & MSG_CTRUNC));
+ TEST_COMPARE (found_timestamp, 0);
+ }
+ else
+ {
+ TEST_VERIFY ((mmhdr.msg_hdr.msg_flags & MSG_CTRUNC) == 0);
+ TEST_COMPARE (found_timestamp, 0);
+ }
+ }
+}
+
+static int
+do_test (void)
+{
+ /* This test only make sense for ABIs that support 32 bit time_t socket
+ timestampss. */
+ if (sizeof (time_t) > 4 && __TIMESIZE == 64)
+ return 0;
+
+ srv = xsocket (AF_INET, SOCK_DGRAM, 0);
+ srv_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr = {.s_addr = htonl (INADDR_LOOPBACK) },
+ };
+ xbind (srv, (struct sockaddr *) &srv_addr, sizeof (srv_addr));
+ {
+ socklen_t sa_len = sizeof (srv_addr);
+ xgetsockname (srv, (struct sockaddr *) &srv_addr, &sa_len);
+ TEST_VERIFY (sa_len == sizeof (srv_addr));
+ }
+
+ /* Check recvmsg; */
+ do_test_large_buffer (false);
+ do_test_small_buffer (false);
+ /* Check recvmmsg. */
+ do_test_large_buffer (true);
+ do_test_small_buffer (true);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+#include "tst-socket-timestamp.c"
--- /dev/null
+/* Check recvmsg/recvmmsg 64-bit timestamp support.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <array_length.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/next_to_fault.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/xunistd.h>
+#include <support/xsocket.h>
+#include <sys/mman.h>
+
+/* Some extra space added for ancillary data, it might be used to convert
+ 32-bit timestamp to 64-bit for _TIME_BITS=64. */
+enum { slack_max_size = 64 };
+static const int slack[] = { 0, 4, 8, 16, 32, slack_max_size };
+
+static bool support_64_timestamp;
+/* AF_INET socket and address used to receive data. */
+static int srv;
+static struct sockaddr_in srv_addr;
+
+static int
+do_sendto (const struct sockaddr_in *addr, int nmsgs)
+{
+ int s = xsocket (AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ xconnect (s, (const struct sockaddr *) addr, sizeof (*addr));
+
+ for (int i = 0; i < nmsgs; i++)
+ xsendto (s, &i, sizeof (i), 0, (const struct sockaddr *) addr,
+ sizeof (*addr));
+
+ xclose (s);
+
+ return 0;
+}
+
+static void
+do_recvmsg_slack_ancillary (bool use_multi_call, int s, void *cmsg,
+ size_t slack, size_t tsize, int exp_payload)
+{
+ int payload;
+ struct iovec iov =
+ {
+ .iov_base = &payload,
+ .iov_len = sizeof (payload)
+ };
+ size_t msg_controllen = CMSG_SPACE (tsize) + slack;
+ char *msg_control = cmsg - msg_controllen;
+ memset (msg_control, 0x55, msg_controllen);
+ struct mmsghdr mmhdr =
+ {
+ .msg_hdr =
+ {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_control,
+ .msg_controllen = msg_controllen
+ },
+ };
+
+ int r;
+ if (use_multi_call)
+ {
+ r = recvmmsg (s, &mmhdr, 1, 0, NULL);
+ if (r >= 0)
+ r = mmhdr.msg_len;
+ }
+ else
+ r = recvmsg (s, &mmhdr.msg_hdr, 0);
+ TEST_COMPARE (r, sizeof (int));
+ TEST_COMPARE (payload, exp_payload);
+
+ if (cmsg == NULL)
+ return;
+
+ /* A timestamp is expected if 32-bit timestamp are used (support in every
+ configuration) or if underlying kernel support 64-bit timestamps.
+ Otherwise recvmsg will need extra space do add the 64-bit timestamp. */
+ bool exp_timestamp;
+ if (sizeof (time_t) == 4 || support_64_timestamp)
+ exp_timestamp = true;
+ else
+ exp_timestamp = slack >= CMSG_SPACE (tsize);
+
+ bool timestamp = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+ if (cmsg->cmsg_type == SCM_TIMESTAMP
+ && cmsg->cmsg_len == CMSG_LEN (sizeof (struct timeval)))
+ {
+ struct timeval tv;
+ memcpy (&tv, CMSG_DATA (cmsg), sizeof (tv));
+ if (test_verbose)
+ printf ("SCM_TIMESTAMP: {%jd, %jd}\n", (intmax_t)tv.tv_sec,
+ (intmax_t)tv.tv_usec);
+ timestamp = true;
+ }
+ else if (cmsg->cmsg_type == SCM_TIMESTAMPNS
+ && cmsg->cmsg_len == CMSG_LEN (sizeof (struct timespec)))
+ {
+ struct timespec ts;
+ memcpy (&ts, CMSG_DATA (cmsg), sizeof (ts));
+ if (test_verbose)
+ printf ("SCM_TIMESTAMPNS: {%jd, %jd}\n", (intmax_t)ts.tv_sec,
+ (intmax_t)ts.tv_nsec);
+ timestamp = true;
+ }
+ }
+
+ TEST_COMPARE (timestamp, exp_timestamp);
+}
+
+/* Check if the extra ancillary space is correctly handled by recvmsg and
+ recvmmsg with different extra space for the ancillaty buffer. */
+static void
+do_test_slack_space (void)
+{
+ /* Setup the ancillary data buffer with an extra page with PROT_NONE to
+ check the possible timestamp conversion on some systems. */
+ struct support_next_to_fault nf =
+ support_next_to_fault_allocate (slack_max_size);
+ void *msgbuf = nf.buffer + slack_max_size;
+
+ /* Enable the timestamp using struct timeval precision. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMP, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+ }
+ /* Check recvmsg. */
+ do_sendto (&srv_addr, array_length (slack));
+ for (int s = 0; s < array_length (slack); s++)
+ {
+ memset (nf.buffer, 0x55, nf.length);
+ do_recvmsg_slack_ancillary (false, srv, msgbuf, slack[s],
+ sizeof (struct timeval), s);
+ }
+ /* Check recvmmsg. */
+ do_sendto (&srv_addr, array_length (slack));
+ for (int s = 0; s < array_length (slack); s++)
+ {
+ memset (nf.buffer, 0x55, nf.length);
+ do_recvmsg_slack_ancillary (true, srv, msgbuf, slack[s],
+ sizeof (struct timeval), s);
+ }
+
+ /* Now enable timestamp using a higher precision, it overwrites the previous
+ precision. */
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMPNS, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+ }
+ /* Check recvmsg. */
+ do_sendto (&srv_addr, array_length (slack));
+ for (int s = 0; s < array_length (slack); s++)
+ do_recvmsg_slack_ancillary (false, srv, msgbuf, slack[s],
+ sizeof (struct timespec), s);
+ /* Check recvmmsg. */
+ do_sendto (&srv_addr, array_length (slack));
+ for (int s = 0; s < array_length (slack); s++)
+ do_recvmsg_slack_ancillary (true, srv, msgbuf, slack[s],
+ sizeof (struct timespec), s);
+
+ support_next_to_fault_free (&nf);
+}
+
+/* Check if the converted 64-bit timestamp is correctly appended when there
+ are multiple ancillary messages. */
+static void
+do_recvmsg_multiple_ancillary (bool use_multi_call, int s, void *cmsg,
+ size_t cmsgsize, int exp_msg)
+{
+ int msg;
+ struct iovec iov =
+ {
+ .iov_base = &msg,
+ .iov_len = sizeof (msg)
+ };
+ size_t msgs = cmsgsize;
+ struct mmsghdr mmhdr =
+ {
+ .msg_hdr =
+ {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_controllen = msgs,
+ .msg_control = cmsg,
+ },
+ };
+
+ int r;
+ if (use_multi_call)
+ {
+ r = recvmmsg (s, &mmhdr, 1, 0, NULL);
+ if (r >= 0)
+ r = mmhdr.msg_len;
+ }
+ else
+ r = recvmsg (s, &mmhdr.msg_hdr, 0);
+ TEST_COMPARE (r, sizeof (int));
+ TEST_COMPARE (msg, exp_msg);
+
+ if (cmsg == NULL)
+ return;
+
+ bool timestamp = false;
+ bool origdstaddr = false;
+ for (struct cmsghdr *cmsg = CMSG_FIRSTHDR (&mmhdr.msg_hdr);
+ cmsg != NULL;
+ cmsg = CMSG_NXTHDR (&mmhdr.msg_hdr, cmsg))
+ {
+ if (cmsg->cmsg_level == SOL_IP
+ && cmsg->cmsg_type == IP_ORIGDSTADDR
+ && cmsg->cmsg_len >= CMSG_LEN (sizeof (struct sockaddr_in)))
+ {
+ struct sockaddr_in sa;
+ memcpy (&sa, CMSG_DATA (cmsg), sizeof (sa));
+ if (test_verbose)
+ {
+ char str[INET_ADDRSTRLEN];
+ inet_ntop (AF_INET, &sa.sin_addr, str, INET_ADDRSTRLEN);
+ printf ("IP_ORIGDSTADDR: %s:%d\n", str, ntohs (sa.sin_port));
+ }
+ origdstaddr = sa.sin_addr.s_addr == srv_addr.sin_addr.s_addr
+ && sa.sin_port == srv_addr.sin_port;
+ }
+ if (cmsg->cmsg_level == SOL_SOCKET
+ && cmsg->cmsg_type == SCM_TIMESTAMP
+ && cmsg->cmsg_len >= CMSG_LEN (sizeof (struct timeval)))
+ {
+ struct timeval tv;
+ memcpy (&tv, CMSG_DATA (cmsg), sizeof (tv));
+ if (test_verbose)
+ printf ("SCM_TIMESTAMP: {%jd, %jd}\n", (intmax_t)tv.tv_sec,
+ (intmax_t)tv.tv_usec);
+ timestamp = true;
+ }
+ }
+
+ TEST_COMPARE (timestamp, true);
+ TEST_COMPARE (origdstaddr, true);
+}
+
+static void
+do_test_multiple_ancillary (void)
+{
+ {
+ int r = setsockopt (srv, SOL_SOCKET, SO_TIMESTAMP, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+ }
+ {
+ int r = setsockopt (srv, IPPROTO_IP, IP_RECVORIGDSTADDR, &(int){1},
+ sizeof (int));
+ TEST_VERIFY_EXIT (r != -1);
+ }
+
+ /* Enougth data for default SO_TIMESTAMP, the IP_RECVORIGDSTADDR, and the
+ extra 64-bit SO_TIMESTAMP. */
+ enum { msgbuflen = CMSG_SPACE (2 * sizeof (uint64_t))
+ + CMSG_SPACE (sizeof (struct sockaddr_in))
+ + CMSG_SPACE (2 * sizeof (uint64_t)) };
+ char msgbuf[msgbuflen];
+
+ enum { nmsgs = 8 };
+ /* Check recvmsg. */
+ do_sendto (&srv_addr, nmsgs);
+ for (int s = 0; s < nmsgs; s++)
+ do_recvmsg_multiple_ancillary (false, srv, msgbuf, msgbuflen, s);
+ /* Check recvmmsg. */
+ do_sendto (&srv_addr, nmsgs);
+ for (int s = 0; s < nmsgs; s++)
+ do_recvmsg_multiple_ancillary (true, srv, msgbuf, msgbuflen, s);
+}
+
+static int
+do_test (void)
+{
+ srv = xsocket (AF_INET, SOCK_DGRAM, 0);
+ srv_addr = (struct sockaddr_in) {
+ .sin_family = AF_INET,
+ .sin_addr = {.s_addr = htonl (INADDR_LOOPBACK) },
+ };
+ xbind (srv, (struct sockaddr *) &srv_addr, sizeof (srv_addr));
+ {
+ socklen_t sa_len = sizeof (srv_addr);
+ xgetsockname (srv, (struct sockaddr *) &srv_addr, &sa_len);
+ TEST_VERIFY (sa_len == sizeof (srv_addr));
+ }
+
+ TEST_COMPARE (recvmsg (-1, NULL, 0), -1);
+ TEST_COMPARE (errno, EBADF);
+ TEST_COMPARE (recvmmsg (-1, NULL, 0, 0, NULL), -1);
+ TEST_COMPARE (errno, EBADF);
+
+ /* If underlying kernel does not support */
+ support_64_timestamp = support_socket_so_timestamp_time64 (srv);
+
+ do_test_slack_space ();
+ do_test_multiple_ancillary ();
+
+ xclose (srv);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
#define __NR_fsync 74
#define __NR_ftruncate 77
#define __NR_futex 202
+#define __NR_futex_waitv 449
#define __NR_futimesat 261
#define __NR_get_kernel_syms 177
#define __NR_get_mempolicy 239
#define __NR_mbind 237
#define __NR_membarrier 324
#define __NR_memfd_create 319
+#define __NR_memfd_secret 447
#define __NR_migrate_pages 256
#define __NR_mincore 27
#define __NR_mkdir 83
#define __NR_preadv2 327
#define __NR_prlimit64 302
#define __NR_process_madvise 440
+#define __NR_process_mrelease 448
#define __NR_process_vm_readv 310
#define __NR_process_vm_writev 311
#define __NR_pselect6 270
#define __NR_pwritev2 328
#define __NR_query_module 178
#define __NR_quotactl 179
+#define __NR_quotactl_fd 443
#define __NR_read 0
#define __NR_readahead 187
#define __NR_readlink 89
#define __NR_sendmsg 46
#define __NR_sendto 44
#define __NR_set_mempolicy 238
+#define __NR_set_mempolicy_home_node 450
#define __NR_set_robust_list 273
#define __NR_set_thread_area 205
#define __NR_set_tid_address 218
GLIBC_2.2.5 _dl_mcount F
GLIBC_2.2.5 _r_debug D 0x28
GLIBC_2.3 __tls_get_addr F
+GLIBC_2.34 __rtld_version_placeholder F
#define __NR_fsync 1073741898
#define __NR_ftruncate 1073741901
#define __NR_futex 1073742026
+#define __NR_futex_waitv 1073742273
#define __NR_futimesat 1073742085
#define __NR_get_mempolicy 1073742063
#define __NR_get_robust_list 1073742355
#define __NR_mbind 1073742061
#define __NR_membarrier 1073742148
#define __NR_memfd_create 1073742143
+#define __NR_memfd_secret 1073742271
#define __NR_migrate_pages 1073742080
#define __NR_mincore 1073741851
#define __NR_mkdir 1073741907
#define __NR_preadv2 1073742370
#define __NR_prlimit64 1073742126
#define __NR_process_madvise 1073742264
+#define __NR_process_mrelease 1073742272
#define __NR_process_vm_readv 1073742363
#define __NR_process_vm_writev 1073742364
#define __NR_pselect6 1073742094
#define __NR_pwritev 1073742359
#define __NR_pwritev2 1073742371
#define __NR_quotactl 1073742003
+#define __NR_quotactl_fd 1073742267
#define __NR_read 1073741824
#define __NR_readahead 1073742011
#define __NR_readlink 1073741913
#define __NR_sendmsg 1073742342
#define __NR_sendto 1073741868
#define __NR_set_mempolicy 1073742062
+#define __NR_set_mempolicy_home_node 1073742274
#define __NR_set_robust_list 1073742354
#define __NR_set_thread_area 1073742029
#define __NR_set_tid_address 1073742042
GLIBC_2.16 __tls_get_addr F
GLIBC_2.16 _dl_mcount F
GLIBC_2.16 _r_debug D 0x14
+GLIBC_2.34 __rtld_version_placeholder F
tst-strcpy-rtm \
tst-strlen-rtm \
tst-strncmp-rtm \
- tst-strrchr-rtm
+ tst-strrchr-rtm \
+ tst-wcsncmp-rtm \
+# tests
CFLAGS-tst-memchr-rtm.c += -mrtm
CFLAGS-tst-memcmp-rtm.c += -mrtm
CFLAGS-tst-strchr-rtm.c += -mrtm
CFLAGS-tst-strcpy-rtm.c += -mrtm
CFLAGS-tst-strlen-rtm.c += -mrtm
-CFLAGS-tst-strncmp-rtm.c += -mrtm
+CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
CFLAGS-tst-strrchr-rtm.c += -mrtm
+CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error
endif
ifneq ($(enable-cet),no)
long int data = cpu_features->data_cache_size;
/* Round data cache size to multiple of 256 bytes. */
data = data & ~255L;
- __x86_data_cache_size_half = data / 2;
- __x86_data_cache_size = data;
+ if (data > 0)
+ {
+ __x86_data_cache_size_half = data / 2;
+ __x86_data_cache_size = data;
+ }
long int shared = cpu_features->shared_cache_size;
/* Round shared cache size to multiple of 256 bytes. */
shared = shared & ~255L;
- __x86_shared_cache_size_half = shared / 2;
- __x86_shared_cache_size = shared;
+ if (shared > 0)
+ {
+ __x86_shared_cache_size_half = shared / 2;
+ __x86_shared_cache_size = shared;
+ }
__x86_shared_non_temporal_threshold
= cpu_features->non_temporal_threshold;
break;
}
- /* Disable TSX on some Haswell processors to avoid TSX on kernels that
- weren't updated with the latest microcode package (which disables
- broken feature by default). */
+ /* Disable TSX on some processors to avoid TSX on kernels that
+ weren't updated with the latest microcode package (which
+ disables broken feature by default). */
switch (model)
{
+ case 0x55:
+ if (stepping <= 5)
+ goto disable_tsx;
+ break;
+ case 0x8e:
+ /* NB: Although the errata documents that for model == 0x8e,
+ only 0xb stepping or lower are impacted, the intention of
+ the errata was to disable TSX on all client processors on
+ all steppings. Include 0xc stepping which is an Intel
+ Core i7-8665U, a client mobile processor. */
+ case 0x9e:
+ if (stepping > 0xc)
+ break;
+ /* Fall through. */
+ case 0x4e:
+ case 0x5e:
+ {
+ /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
+ processors listed in:
+
+https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
+ */
+disable_tsx:
+ CPU_FEATURE_UNSET (cpu_features, HLE);
+ CPU_FEATURE_UNSET (cpu_features, RTM);
+ CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
+ }
+ break;
case 0x3f:
/* Xeon E7 v3 with stepping >= 4 has working TSX. */
if (stepping >= 4)
|= bit_arch_Prefer_No_VZEROUPPER;
else
{
- cpu_features->preferred[index_arch_Prefer_No_AVX512]
- |= bit_arch_Prefer_No_AVX512;
+ /* Processors with AVX512 and AVX-VNNI won't lower CPU frequency
+ when ZMM load and store instructions are used. */
+ if (!CPU_FEATURES_CPU_P (cpu_features, AVX_VNNI))
+ cpu_features->preferred[index_arch_Prefer_No_AVX512]
+ |= bit_arch_Prefer_No_AVX512;
/* Avoid RTM abort triggered by VZEROUPPER inside a
transactionally executing RTM region. */
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|= bit_arch_Prefer_No_VZEROUPPER;
-
- /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
- requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
- requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
- AVX2 strcmp is faster than EVEX strcmp. */
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
- cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
- |= bit_arch_Prefer_AVX2_STRCMP;
}
/* Avoid avoid short distance REP MOVSB on processor with FSRM. */
CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
Fast_Copy_Backward,
disable, 18);
- CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
- (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
}
break;
case 19:
/* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
unsigned int minimum_rep_movsb_threshold;
#endif
- /* NB: The default REP MOVSB threshold is 2048 * (VEC_SIZE / 16). */
+ /* NB: The default REP MOVSB threshold is 4096 * (VEC_SIZE / 16) for
+ VEC_SIZE == 64 or 32. For VEC_SIZE == 16, the default REP MOVSB
+ threshold is 2048 * (VEC_SIZE / 16). */
unsigned int rep_movsb_threshold;
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
&& !CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512))
{
- rep_movsb_threshold = 2048 * (64 / 16);
+ rep_movsb_threshold = 4096 * (64 / 16);
#if HAVE_TUNABLES
minimum_rep_movsb_threshold = 64 * 8;
#endif
else if (CPU_FEATURE_PREFERRED_P (cpu_features,
AVX_Fast_Unaligned_Load))
{
- rep_movsb_threshold = 2048 * (32 / 16);
+ rep_movsb_threshold = 4096 * (32 / 16);
#if HAVE_TUNABLES
minimum_rep_movsb_threshold = 32 * 8;
#endif
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
rep_movsb_threshold = 2112;
- unsigned long int rep_movsb_stop_threshold;
- /* ERMS feature is implemented from AMD Zen3 architecture and it is
- performing poorly for data above L2 cache size. Henceforth, adding
- an upper bound threshold parameter to limit the usage of Enhanced
- REP MOVSB operations and setting its value to L2 cache size. */
- if (cpu_features->basic.kind == arch_kind_amd)
- rep_movsb_stop_threshold = core;
- /* Setting the upper bound of ERMS to the computed value of
- non-temporal threshold for architectures other than AMD. */
- else
- rep_movsb_stop_threshold = non_temporal_threshold;
-
/* The default threshold to use Enhanced REP STOSB. */
unsigned long int rep_stosb_threshold = 2048;
TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX);
+ /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
+ 'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
+ if that operation cannot overflow. Minimum of 0x4040 (16448) because the
+ L(large_memset_4x) loops need 64-byte to cache align and enough space for
+ at least 1 iteration of 4x PAGE_SIZE unrolled loop. Both values are
+ reflected in the manual. */
TUNABLE_SET_WITH_BOUNDS (x86_non_temporal_threshold, non_temporal_threshold,
- 0, SIZE_MAX);
+ 0x4040, SIZE_MAX >> 4);
TUNABLE_SET_WITH_BOUNDS (x86_rep_movsb_threshold, rep_movsb_threshold,
minimum_rep_movsb_threshold, SIZE_MAX);
TUNABLE_SET_WITH_BOUNDS (x86_rep_stosb_threshold, rep_stosb_threshold, 1,
SIZE_MAX);
#endif
+ unsigned long int rep_movsb_stop_threshold;
+ /* ERMS feature is implemented from AMD Zen3 architecture and it is
+ performing poorly for data above L2 cache size. Henceforth, adding
+ an upper bound threshold parameter to limit the usage of Enhanced
+ REP MOVSB operations and setting its value to L2 cache size. */
+ if (cpu_features->basic.kind == arch_kind_amd)
+ rep_movsb_stop_threshold = core;
+ /* Setting the upper bound of ERMS to the computed value of
+ non-temporal threshold for architectures other than AMD. */
+ else
+ rep_movsb_stop_threshold = non_temporal_threshold;
+
cpu_features->data_cache_size = data;
cpu_features->shared_cache_size = shared;
cpu_features->non_temporal_threshold = non_temporal_threshold;
}
x86_rep_movsb_threshold {
type: SIZE_T
- # Since there is overhead to set up REP MOVSB operation, REP MOVSB
- # isn't faster on short data. The memcpy micro benchmark in glibc
- # shows that 2KB is the approximate value above which REP MOVSB
- # becomes faster than SSE2 optimization on processors with Enhanced
- # REP MOVSB. Since larger register size can move more data with a
- # single load and store, the threshold is higher with larger register
- # size. Note: Since the REP MOVSB threshold must be greater than 8
- # times of vector size and the default value is 2048 * (vector size
- # / 16), the default value and the minimum value must be updated at
- # run-time. NB: Don't set the default value since we can't tell if
- # the tunable value is set by user or not [BZ #27069].
+ # Since there is overhead to set up REP MOVSB operation, REP
+ # MOVSB isn't faster on short data. The memcpy micro benchmark
+ # in glibc shows that 2KB is the approximate value above which
+ # REP MOVSB becomes faster than SSE2 optimization on processors
+ # with Enhanced REP MOVSB. Since larger register size can move
+ # more data with a single load and store, the threshold is
+ # higher with larger register size. Micro benchmarks show AVX
+ # REP MOVSB becomes faster apprximately at 8KB. The AVX512
+ # threshold is extrapolated to 16KB. For machines with FSRM the
+ # threshold is universally set at 2112 bytes. Note: Since the
+ # REP MOVSB threshold must be greater than 8 times of vector
+ # size and the default value is 4096 * (vector size / 16), the
+ # default value and the minimum value must be updated at
+ # run-time. NB: Don't set the default value since we can't tell
+ # if the tunable value is set by user or not [BZ #27069].
minval: 1
}
x86_rep_stosb_threshold {
BIT (Prefer_No_AVX512)
BIT (MathVec_Prefer_No_AVX512)
BIT (Prefer_FSRM)
-BIT (Prefer_AVX2_STRCMP)
BIT (Avoid_Short_Distance_REP_MOVSB)
# endif
# if ISA_V2 && defined __AVX__ && defined __AVX2__ && defined __F16C__ \
- && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE
+ && defined __FMA__ && defined __LZCNT__ && defined HAVE_X86_MOVBE \
+ && defined __BMI__ && defined __BMI2__
/* NB: ISAs in x86-64 ISA level v3 are used. */
# define ISA_V3 GNU_PROPERTY_X86_ISA_1_V3
# else
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
/* Define an entry point visible from C. */
-#define ENTRY(name) \
+#define ENTRY_P2ALIGN(name, alignment) \
.globl C_SYMBOL_NAME(name); \
.type C_SYMBOL_NAME(name),@function; \
- .align ALIGNARG(4); \
+ .align ALIGNARG(alignment); \
C_LABEL(name) \
cfi_startproc; \
_CET_ENDBR; \
CALL_MCOUNT
+/* Common entry 16 byte aligns. */
+#define ENTRY(name) ENTRY_P2ALIGN (name, 4)
+
#undef END
#define END(name) \
cfi_endproc; \
/* Local label name for asm code. */
#ifndef L
/* ELF-like local names start with `.L'. */
-# define L(name) .L##name
+# define LOCAL_LABEL(name) .L##name
+# define L(name) LOCAL_LABEL(name)
#endif
#define atom_text_section .section ".text.atom", "ax"
else if (cpu_features->basic.kind == arch_kind_amd)
{
fails += CHECK_PROC (ibpb, AMD_IBPB);
- fails += CHECK_PROC (ibrs, AMD_IBRS);
+
+ /* The IBRS feature on AMD processors is reported using the Intel feature
+ * on KVM guests (synthetic bit). In both cases the cpuinfo entry is the
+ * same. */
+ if (HAS_CPU_FEATURE (IBRS_IBPB))
+ fails += CHECK_PROC (ibrs, IBRS_IBPB);
+ else
+ fails += CHECK_PROC (ibrs, AMD_IBRS);
fails += CHECK_PROC (stibp, AMD_STIBP);
}
fails += CHECK_PROC (ibt, IBT);
fails += CHECK_FEATURE_ACTIVE (gfni, GFNI);
#endif
#if __GNUC_PREREQ (11, 0)
- fails += CHECK_FEATURE_ACTIVE (hle, HLE);
+ fails += CHECK_FEATURE_PRESENT (hle, HLE);
fails += CHECK_FEATURE_PRESENT (ibt, IBT);
fails += CHECK_FEATURE_ACTIVE (lahf_lm, LAHF64_SAHF64);
fails += CHECK_FEATURE_PRESENT (lm, LM);
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
+#include <stdint.h>
#include <tst-string-rtm.h>
+#ifdef WIDE
+# define CHAR wchar_t
+# define MEMSET wmemset
+# define STRNCMP wcsncmp
+# define TEST_NAME "wcsncmp"
+#else /* !WIDE */
+# define CHAR char
+# define MEMSET memset
+# define STRNCMP strncmp
+# define TEST_NAME "strncmp"
+#endif /* !WIDE */
+
+
+
#define LOOP 3000
#define STRING_SIZE 1024
-char string1[STRING_SIZE];
-char string2[STRING_SIZE];
+CHAR string1[STRING_SIZE];
+CHAR string2[STRING_SIZE];
__attribute__ ((noinline, noclone))
static int
prepare (void)
{
- memset (string1, 'a', STRING_SIZE - 1);
- memset (string2, 'a', STRING_SIZE - 1);
- if (strncmp (string1, string2, STRING_SIZE) == 0)
+ MEMSET (string1, 'a', STRING_SIZE - 1);
+ MEMSET (string2, 'a', STRING_SIZE - 1);
+ if (STRNCMP (string1, string2, STRING_SIZE) == 0)
return EXIT_SUCCESS;
else
return EXIT_FAILURE;
static int
function (void)
{
- if (strncmp (string1, string2, STRING_SIZE) == 0)
+ if (STRNCMP (string1, string2, STRING_SIZE) == 0)
+ return 0;
+ else
+ return 1;
+}
+
+__attribute__ ((noinline, noclone))
+static int
+function_overflow (void)
+{
+ if (STRNCMP (string1, string2, SIZE_MAX) == 0)
+ return 0;
+ else
+ return 1;
+}
+
+__attribute__ ((noinline, noclone))
+static int
+function_overflow2 (void)
+{
+ if (STRNCMP (string1, string2, SIZE_MAX >> 4) == 0)
return 0;
else
return 1;
static int
do_test (void)
{
- return do_test_1 ("strncmp", LOOP, prepare, function);
+ int status = do_test_1 (TEST_NAME, LOOP, prepare, function);
+ if (status != EXIT_SUCCESS)
+ return status;
+ status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow);
+ if (status != EXIT_SUCCESS)
+ return status;
+ status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow2);
+ if (status != EXIT_SUCCESS)
+ return status;
+ return status;
}
--- /dev/null
+/* Test case for wcsncmp inside a transactionally executing RTM region.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#define WIDE 1
+#include <wchar.h>
+#include "tst-strncmp-rtm.c"
+++ /dev/null
-/* Implemented in memset.S. */
#define ELF_MACHINE_NAME "x86_64"
+#include <assert.h>
#include <sys/param.h>
#include <sysdep.h>
#include <tls.h>
#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
+#include <dl-machine-rel.h>
/* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused))
entries will jump to the on-demand fixup code in dl-runtime.c. */
static inline int __attribute__ ((unused, always_inline))
-elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
{
Elf64_Addr *got;
extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
// XXX This is a work-around for a broken linker. Remove!
#define ELF_MACHINE_IRELATIVE R_X86_64_IRELATIVE
-/* The x86-64 never uses Elf64_Rel/Elf32_Rel relocations. */
-#define ELF_MACHINE_NO_REL 1
-#define ELF_MACHINE_NO_RELA 0
-
/* We define an initialization function. This is called very early in
_dl_sysdep_start. */
#define DL_PLATFORM_INIT dl_platform_init ()
/* Perform the relocation specified by RELOC and SYM (which is fully resolved).
MAP is the object containing the reloc. */
-auto inline void
-__attribute__ ((always_inline))
-elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- const ElfW(Sym) *sym, const struct r_found_version *version,
- void *const reloc_addr_arg, int skip_ifunc)
-{
+static inline void __attribute__((always_inline))
+elf_machine_rela(struct link_map *map, struct r_scope_elem *scope[],
+ const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+ const struct r_found_version *version,
+ void *const reloc_addr_arg, int skip_ifunc) {
ElfW(Addr) *const reloc_addr = reloc_addr_arg;
const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
# ifndef RTLD_BOOTSTRAP
const ElfW(Sym) *const refsym = sym;
# endif
- struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
if (sym != NULL
# endif
/* Set to symbol size plus addend. */
value = sym->st_size;
+ *reloc_addr = value + reloc->r_addend;
+ break;
# endif
- /* Fall through. */
+
case R_X86_64_GLOB_DAT:
case R_X86_64_JUMP_SLOT:
- *reloc_addr = value + reloc->r_addend;
+ *reloc_addr = value;
break;
# ifndef RESOLVE_CONFLICT_FIND_MAP
}
}
-auto inline void
+static inline void
__attribute ((always_inline))
elf_machine_rela_relative (ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
void *const reloc_addr_arg)
}
}
-auto inline void
+static inline void
__attribute ((always_inline))
-elf_machine_lazy_rel (struct link_map *map,
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
ElfW(Addr) l_addr, const ElfW(Rela) *reloc,
int skip_ifunc)
{
/* Always initialize TLS descriptors completely at load time, in
case static TLS is allocated for it that requires locking. */
- elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
+ elf_machine_rela (map, scope, reloc, sym, version, reloc_addr, skip_ifunc);
}
else if (__glibc_unlikely (r_type == R_X86_64_IRELATIVE))
{
02111-1307 USA. */
/* The ABI calls for the PLT stubs to pass the index of the relocation
- and not its offset. In _dl_profile_fixup and _dl_call_pltexit we
+ and not its offset. In _dl_profile_fixup and _dl_audit_pltexit we
also use the index. Therefore it is wasteful to compute the offset
in the trampoline just to reverse the operation immediately
afterwards. */
jns 3f
/* There's nothing in the frame size, so there
- will be no call to the _dl_call_pltexit. */
+ will be no call to the _dl_audit_pltexit. */
/* Get back registers content. */
movq LR_RCX_OFFSET(%rsp), %rcx
mov 24(%rbx), %RSP_LP # Drop the copied stack content
/* Now we have to prepare the La_x86_64_retval structure for the
- _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
+ _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now,
so we just need to allocate the sizeof(La_x86_64_retval) space on
the stack, since the alignment has already been taken care of. */
# ifdef RESTORE_AVX
movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
- call _dl_call_pltexit
+ call _dl_audit_pltexit
/* Restore return registers. */
movq LRV_RAX_OFFSET(%rsp), %rax
vmovaps %zmm0, %zmm8
/* Check for large arguments path */
- vpbroadcastq .L_2il0floatpacket.16(%rip), %zmm2
+ vpternlogd $0xff, %zmm2, %zmm2, %zmm2
/*
ARGUMENT RANGE REDUCTION:
jmp .LBL_2_7
#endif
END (_ZGVeN8v_cos_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.16:
- .long 0xffffffff,0xffffffff
- .type .L_2il0floatpacket.16,@object
/* preserve mantissa, set input exponent to 2^(-10) */
vpternlogq $248, _ExpMask(%rax), %zmm3, %zmm2
- vpbroadcastq .L_2il0floatpacket.12(%rip), %zmm1
+ vpternlogd $0xff, %zmm1, %zmm1, %zmm1
vpsrlq $32, %zmm4, %zmm6
/* reciprocal approximation good to at least 11 bits */
jmp .LBL_2_7
#endif
END (_ZGVeN8v_log_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.12:
- .long 0xffffffff,0xffffffff
- .type .L_2il0floatpacket.12,@object
andq $-64, %rsp
subq $1280, %rsp
movq __svml_d_trig_data@GOTPCREL(%rip), %rax
- vpbroadcastq .L_2il0floatpacket.14(%rip), %zmm14
+ vpternlogd $0xff, %zmm1, %zmm1, %zmm14
vmovups __dAbsMask(%rax), %zmm7
vmovups __dInvPI(%rax), %zmm2
vmovups __dRShifter(%rax), %zmm1
jmp .LBL_2_7
#endif
END (_ZGVeN8v_sin_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.14:
- .long 0xffffffff,0xffffffff
- .type .L_2il0floatpacket.14,@object
/* SinPoly = SinR*SinPoly */
vfmadd213pd %zmm5, %zmm5, %zmm4
- vpbroadcastq .L_2il0floatpacket.15(%rip), %zmm3
+ vpternlogd $0xff, %zmm3, %zmm3, %zmm3
/* Update Cos result's sign */
vxorpd %zmm2, %zmm1, %zmm1
ENTRY (_ZGVeN8vvv_sincos_skx)
WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx
END (_ZGVeN8vvv_sincos_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.15:
- .long 0xffffffff,0xffffffff
- .type .L_2il0floatpacket.15,@object
X = X - Y*PI1 - Y*PI2 - Y*PI3
*/
vmovaps %zmm0, %zmm6
- vmovups .L_2il0floatpacket.13(%rip), %zmm12
+ vpternlogd $0xff, %zmm12, %zmm12, %zmm12
vmovups __sRShifter(%rax), %zmm3
vmovups __sPI1_FMA(%rax), %zmm5
vmovups __sA9_FMA(%rax), %zmm9
jmp .LBL_2_7
#endif
END (_ZGVeN16v_cosf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.13:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.13,@object
vmovaps %zmm0, %zmm7
/* compare against threshold */
- vmovups .L_2il0floatpacket.13(%rip), %zmm3
+ vpternlogd $0xff, %zmm3, %zmm3, %zmm3
vmovups __sInvLn2(%rax), %zmm4
vmovups __sShifter(%rax), %zmm1
vmovups __sLn2hi(%rax), %zmm6
#endif
END (_ZGVeN16v_expf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.13:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.13,@object
andq $-64, %rsp
subq $1280, %rsp
movq __svml_slog_data@GOTPCREL(%rip), %rax
- vmovups .L_2il0floatpacket.7(%rip), %zmm6
+ vpternlogd $0xff, %zmm6, %zmm6, %zmm6
vmovups _iBrkValue(%rax), %zmm4
vmovups _sPoly_7(%rax), %zmm8
#endif
END (_ZGVeN16v_logf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.7:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.7,@object
vpsrlq $32, %zmm3, %zmm2
vpmovqd %zmm2, %ymm11
vcvtps2pd %ymm14, %zmm13
- vmovups .L_2il0floatpacket.23(%rip), %zmm14
+ vpternlogd $0xff, %zmm14, %zmm14, %zmm14
vmovaps %zmm14, %zmm26
vpandd _ABSMASK(%rax), %zmm1, %zmm8
vpcmpd $1, _INF(%rax), %zmm8, %k2
vpmovqd %zmm11, %ymm5
vpxord %zmm10, %zmm10, %zmm10
vgatherdpd _Log2Rcp_lookup(%rax,%ymm4), %zmm10{%k3}
- vpbroadcastq .L_2il0floatpacket.24(%rip), %zmm4
+ vpternlogd $0xff, %zmm4, %zmm4, %zmm4
vpxord %zmm11, %zmm11, %zmm11
vcvtdq2pd %ymm7, %zmm7
vgatherdpd _Log2Rcp_lookup(%rax,%ymm5), %zmm11{%k1}
jmp .LBL_2_7
#endif
END (_ZGVeN16vv_powf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.23:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.23,@object
-.L_2il0floatpacket.24:
- .long 0xffffffff,0xffffffff
- .type .L_2il0floatpacket.24,@object
/* Result sign calculations */
vpternlogd $150, %zmm0, %zmm14, %zmm1
- vmovups .L_2il0floatpacket.13(%rip), %zmm14
+ vpternlogd $0xff, %zmm14, %zmm14, %zmm14
/* Add correction term 0.5 for cos() part */
vaddps %zmm8, %zmm5, %zmm15
ENTRY (_ZGVeN16vvv_sincosf_skx)
WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx
END (_ZGVeN16vvv_sincosf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.13:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.13,@object
movq __svml_s_trig_data@GOTPCREL(%rip), %rax
/* Check for large and special values */
- vmovups .L_2il0floatpacket.11(%rip), %zmm14
+ vpternlogd $0xff, %zmm14, %zmm14, %zmm14
vmovups __sAbsMask(%rax), %zmm5
vmovups __sInvPI(%rax), %zmm1
vmovups __sRShifter(%rax), %zmm2
jmp .LBL_2_7
#endif
END (_ZGVeN16v_sinf_skx)
-
- .section .rodata, "a"
-.L_2il0floatpacket.11:
- .long 0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
- .type .L_2il0floatpacket.11,@object
/* Use movups and movaps for smaller code sizes. */
#define VMOVU movups
#define VMOVA movaps
-
+#define MOV_SIZE 3
#define SECTION(p) p
#ifdef USE_MULTIARCH
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#define VEC_SIZE 16
+#define PAGE_SIZE 4096
.text
-ENTRY (__memrchr)
- movd %esi, %xmm1
-
- sub $16, %RDX_LP
- jbe L(length_less16)
-
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
-
- add %RDX_LP, %RDI_LP
- pshufd $0, %xmm1, %xmm1
-
- movdqu (%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
-
-/* Check if there is a match. */
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches0)
-
- sub $64, %rdi
- mov %edi, %ecx
- and $15, %ecx
- jz L(loop_prolog)
-
- add $16, %rdi
- add $16, %rdx
- and $-16, %rdi
- sub %rcx, %rdx
-
- .p2align 4
-L(loop_prolog):
- sub $64, %rdx
- jbe L(exit_loop)
-
- movdqa 48(%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%rdi), %xmm4
- pcmpeqb %xmm1, %xmm4
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches0)
-
- sub $64, %rdi
- sub $64, %rdx
- jbe L(exit_loop)
-
- movdqa 48(%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16)
-
- movdqa (%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches0)
-
- mov %edi, %ecx
- and $63, %ecx
- jz L(align64_loop)
-
- add $64, %rdi
- add $64, %rdx
- and $-64, %rdi
- sub %rcx, %rdx
-
- .p2align 4
-L(align64_loop):
- sub $64, %rdi
- sub $64, %rdx
- jbe L(exit_loop)
-
- movdqa (%rdi), %xmm0
- movdqa 16(%rdi), %xmm2
- movdqa 32(%rdi), %xmm3
- movdqa 48(%rdi), %xmm4
-
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm1, %xmm2
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm1, %xmm4
-
- pmaxub %xmm3, %xmm0
- pmaxub %xmm4, %xmm2
- pmaxub %xmm0, %xmm2
- pmovmskb %xmm2, %eax
-
- test %eax, %eax
- jz L(align64_loop)
-
- pmovmskb %xmm4, %eax
- test %eax, %eax
- jnz L(matches48)
-
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%rdi), %xmm2
-
- pcmpeqb %xmm1, %xmm2
- pcmpeqb (%rdi), %xmm1
-
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches16)
-
- pmovmskb %xmm1, %eax
- bsr %eax, %eax
-
- add %rdi, %rax
+ENTRY_P2ALIGN(__memrchr, 6)
+#ifdef __ILP32__
+ /* Clear upper bits. */
+ mov %RDX_LP, %RDX_LP
+#endif
+ movd %esi, %xmm0
+
+ /* Get end pointer. */
+ leaq (%rdx, %rdi), %rcx
+
+ punpcklbw %xmm0, %xmm0
+ punpcklwd %xmm0, %xmm0
+ pshufd $0, %xmm0, %xmm0
+
+ /* Check if we can load 1x VEC without cross a page. */
+ testl $(PAGE_SIZE - VEC_SIZE), %ecx
+ jz L(page_cross)
+
+ /* NB: This load happens regardless of whether rdx (len) is zero. Since
+ it doesn't cross a page and the standard gurantees any pointer have
+ at least one-valid byte this load must be safe. For the entire
+ history of the x86 memrchr implementation this has been possible so
+ no code "should" be relying on a zero-length check before this load.
+ The zero-length check is moved to the page cross case because it is
+ 1) pretty cold and including it pushes the hot case len <= VEC_SIZE
+ into 2-cache lines. */
+ movups -(VEC_SIZE)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ subq $VEC_SIZE, %rdx
+ ja L(more_1x_vec)
+L(ret_vec_x0_test):
+ /* Zero-flag set if eax (src) is zero. Destination unchanged if src is
+ zero. */
+ bsrl %eax, %eax
+ jz L(ret_0)
+ /* Check if the CHAR match is in bounds. Need to truly zero `eax` here
+ if out of bounds. */
+ addl %edx, %eax
+ jl L(zero_0)
+ /* Since we subtracted VEC_SIZE from rdx earlier we can just add to base
+ ptr. */
+ addq %rdi, %rax
+L(ret_0):
ret
- .p2align 4
-L(exit_loop):
- add $64, %edx
- cmp $32, %edx
- jbe L(exit_loop_32)
-
- movdqa 48(%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48)
-
- movdqa 32(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
- test %eax, %eax
- jnz L(matches32)
-
- movdqa 16(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm3
- pmovmskb %xmm3, %eax
- test %eax, %eax
- jnz L(matches16_1)
- cmp $48, %edx
- jbe L(return_null)
-
- pcmpeqb (%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches0_1)
- xor %eax, %eax
+ .p2align 4,, 5
+L(ret_vec_x0):
+ bsrl %eax, %eax
+ leaq -(VEC_SIZE)(%rcx, %rax), %rax
ret
- .p2align 4
-L(exit_loop_32):
- movdqa 48(%rdi), %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %eax
- test %eax, %eax
- jnz L(matches48_1)
- cmp $16, %edx
- jbe L(return_null)
-
- pcmpeqb 32(%rdi), %xmm1
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz L(matches32_1)
- xor %eax, %eax
+ .p2align 4,, 2
+L(zero_0):
+ xorl %eax, %eax
ret
- .p2align 4
-L(matches0):
- bsr %eax, %eax
- add %rdi, %rax
- ret
-
- .p2align 4
-L(matches16):
- bsr %eax, %eax
- lea 16(%rax, %rdi), %rax
- ret
- .p2align 4
-L(matches32):
- bsr %eax, %eax
- lea 32(%rax, %rdi), %rax
+ .p2align 4,, 8
+L(more_1x_vec):
+ testl %eax, %eax
+ jnz L(ret_vec_x0)
+
+ /* Align rcx (pointer to string). */
+ decq %rcx
+ andq $-VEC_SIZE, %rcx
+
+ movq %rcx, %rdx
+ /* NB: We could consistenyl save 1-byte in this pattern with `movaps
+ %xmm0, %xmm1; pcmpeq IMM8(r), %xmm1; ...`. The reason against it is
+ it adds more frontend uops (even if the moves can be eliminated) and
+ some percentage of the time actual backend uops. */
+ movaps -(VEC_SIZE)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ subq %rdi, %rdx
+ pmovmskb %xmm1, %eax
+
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(more_2x_vec)
+L(last_2x_vec):
+ subl $VEC_SIZE, %edx
+ jbe L(ret_vec_x0_test)
+
+ testl %eax, %eax
+ jnz L(ret_vec_x0)
+
+ movaps -(VEC_SIZE * 2)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ subl $VEC_SIZE, %edx
+ bsrl %eax, %eax
+ jz L(ret_1)
+ addl %edx, %eax
+ jl L(zero_0)
+ addq %rdi, %rax
+L(ret_1):
ret
- .p2align 4
-L(matches48):
- bsr %eax, %eax
- lea 48(%rax, %rdi), %rax
+ /* Don't align. Otherwise lose 2-byte encoding in jump to L(page_cross)
+ causes the hot pause (length <= VEC_SIZE) to span multiple cache
+ lines. Naturally aligned % 16 to 8-bytes. */
+L(page_cross):
+ /* Zero length check. */
+ testq %rdx, %rdx
+ jz L(zero_0)
+
+ leaq -1(%rcx), %r8
+ andq $-(VEC_SIZE), %r8
+
+ movaps (%r8), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %esi
+ /* Shift out negative alignment (because we are starting from endptr and
+ working backwards). */
+ negl %ecx
+ /* 32-bit shift but VEC_SIZE=16 so need to mask the shift count
+ explicitly. */
+ andl $(VEC_SIZE - 1), %ecx
+ shl %cl, %esi
+ movzwl %si, %eax
+ leaq (%rdi, %rdx), %rcx
+ cmpq %rdi, %r8
+ ja L(more_1x_vec)
+ subl $VEC_SIZE, %edx
+ bsrl %eax, %eax
+ jz L(ret_2)
+ addl %edx, %eax
+ jl L(zero_1)
+ addq %rdi, %rax
+L(ret_2):
ret
- .p2align 4
-L(matches0_1):
- bsr %eax, %eax
- sub $64, %rdx
- add %rax, %rdx
- jl L(return_null)
- add %rdi, %rax
+ /* Fits in aliging bytes. */
+L(zero_1):
+ xorl %eax, %eax
ret
- .p2align 4
-L(matches16_1):
- bsr %eax, %eax
- sub $48, %rdx
- add %rax, %rdx
- jl L(return_null)
- lea 16(%rdi, %rax), %rax
+ .p2align 4,, 5
+L(ret_vec_x1):
+ bsrl %eax, %eax
+ leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax
ret
- .p2align 4
-L(matches32_1):
- bsr %eax, %eax
- sub $32, %rdx
- add %rax, %rdx
- jl L(return_null)
- lea 32(%rdi, %rax), %rax
- ret
+ .p2align 4,, 8
+L(more_2x_vec):
+ testl %eax, %eax
+ jnz L(ret_vec_x0)
- .p2align 4
-L(matches48_1):
- bsr %eax, %eax
- sub $16, %rdx
- add %rax, %rdx
- jl L(return_null)
- lea 48(%rdi, %rax), %rax
- ret
+ movaps -(VEC_SIZE * 2)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+ testl %eax, %eax
+ jnz L(ret_vec_x1)
- .p2align 4
-L(return_null):
- xor %eax, %eax
- ret
- .p2align 4
-L(length_less16_offset0):
- test %edx, %edx
- jz L(return_null)
+ movaps -(VEC_SIZE * 3)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
- mov %dl, %cl
- pcmpeqb (%rdi), %xmm1
+ subq $(VEC_SIZE * 4), %rdx
+ ja L(more_4x_vec)
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
+ addl $(VEC_SIZE), %edx
+ jle L(ret_vec_x2_test)
- pmovmskb %xmm1, %eax
+L(last_vec):
+ testl %eax, %eax
+ jnz L(ret_vec_x2)
- and %edx, %eax
- test %eax, %eax
- jz L(return_null)
+ movaps -(VEC_SIZE * 4)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
- bsr %eax, %eax
- add %rdi, %rax
+ subl $(VEC_SIZE), %edx
+ bsrl %eax, %eax
+ jz L(ret_3)
+ addl %edx, %eax
+ jl L(zero_2)
+ addq %rdi, %rax
+L(ret_3):
ret
- .p2align 4
-L(length_less16):
- punpcklbw %xmm1, %xmm1
- punpcklbw %xmm1, %xmm1
-
- add $16, %edx
-
- pshufd $0, %xmm1, %xmm1
-
- mov %edi, %ecx
- and $15, %ecx
- jz L(length_less16_offset0)
-
- mov %cl, %dh
- mov %ecx, %esi
- add %dl, %dh
- and $-16, %rdi
-
- sub $16, %dh
- ja L(length_less16_part2)
-
- pcmpeqb (%rdi), %xmm1
- pmovmskb %xmm1, %eax
-
- sar %cl, %eax
- mov %dl, %cl
-
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %eax
- test %eax, %eax
- jz L(return_null)
-
- bsr %eax, %eax
- add %rdi, %rax
- add %rsi, %rax
+ .p2align 4,, 6
+L(ret_vec_x2_test):
+ bsrl %eax, %eax
+ jz L(zero_2)
+ addl %edx, %eax
+ jl L(zero_2)
+ addq %rdi, %rax
ret
- .p2align 4
-L(length_less16_part2):
- movdqa 16(%rdi), %xmm2
- pcmpeqb %xmm1, %xmm2
- pmovmskb %xmm2, %eax
-
- mov %dh, %cl
- mov $1, %edx
- sal %cl, %edx
- sub $1, %edx
-
- and %edx, %eax
+L(zero_2):
+ xorl %eax, %eax
+ ret
- test %eax, %eax
- jnz L(length_less16_part2_return)
- pcmpeqb (%rdi), %xmm1
- pmovmskb %xmm1, %eax
+ .p2align 4,, 5
+L(ret_vec_x2):
+ bsrl %eax, %eax
+ leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax
+ ret
- mov %esi, %ecx
- sar %cl, %eax
- test %eax, %eax
- jz L(return_null)
+ .p2align 4,, 5
+L(ret_vec_x3):
+ bsrl %eax, %eax
+ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax
+ ret
- bsr %eax, %eax
- add %rdi, %rax
- add %rsi, %rax
+ .p2align 4,, 8
+L(more_4x_vec):
+ testl %eax, %eax
+ jnz L(ret_vec_x2)
+
+ movaps -(VEC_SIZE * 4)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ testl %eax, %eax
+ jnz L(ret_vec_x3)
+
+ addq $-(VEC_SIZE * 4), %rcx
+ cmpq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec)
+
+ /* Offset everything by 4x VEC_SIZE here to save a few bytes at the end
+ keeping the code from spilling to the next cache line. */
+ addq $(VEC_SIZE * 4 - 1), %rcx
+ andq $-(VEC_SIZE * 4), %rcx
+ leaq (VEC_SIZE * 4)(%rdi), %rdx
+ andq $-(VEC_SIZE * 4), %rdx
+
+ .p2align 4,, 11
+L(loop_4x_vec):
+ movaps (VEC_SIZE * -1)(%rcx), %xmm1
+ movaps (VEC_SIZE * -2)(%rcx), %xmm2
+ movaps (VEC_SIZE * -3)(%rcx), %xmm3
+ movaps (VEC_SIZE * -4)(%rcx), %xmm4
+ pcmpeqb %xmm0, %xmm1
+ pcmpeqb %xmm0, %xmm2
+ pcmpeqb %xmm0, %xmm3
+ pcmpeqb %xmm0, %xmm4
+
+ por %xmm1, %xmm2
+ por %xmm3, %xmm4
+ por %xmm2, %xmm4
+
+ pmovmskb %xmm4, %esi
+ testl %esi, %esi
+ jnz L(loop_end)
+
+ addq $-(VEC_SIZE * 4), %rcx
+ cmpq %rdx, %rcx
+ jne L(loop_4x_vec)
+
+ subl %edi, %edx
+
+ /* Ends up being 1-byte nop. */
+ .p2align 4,, 2
+L(last_4x_vec):
+ movaps -(VEC_SIZE)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ cmpl $(VEC_SIZE * 2), %edx
+ jbe L(last_2x_vec)
+
+ testl %eax, %eax
+ jnz L(ret_vec_x0)
+
+
+ movaps -(VEC_SIZE * 2)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ testl %eax, %eax
+ jnz L(ret_vec_end)
+
+ movaps -(VEC_SIZE * 3)(%rcx), %xmm1
+ pcmpeqb %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+
+ subl $(VEC_SIZE * 3), %edx
+ ja L(last_vec)
+ bsrl %eax, %eax
+ jz L(ret_4)
+ addl %edx, %eax
+ jl L(zero_3)
+ addq %rdi, %rax
+L(ret_4):
ret
- .p2align 4
-L(length_less16_part2_return):
- bsr %eax, %eax
- lea 16(%rax, %rdi), %rax
+ /* Ends up being 1-byte nop. */
+ .p2align 4,, 3
+L(loop_end):
+ pmovmskb %xmm1, %eax
+ sall $16, %eax
+ jnz L(ret_vec_end)
+
+ pmovmskb %xmm2, %eax
+ testl %eax, %eax
+ jnz L(ret_vec_end)
+
+ pmovmskb %xmm3, %eax
+ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3)
+ then it won't affect the result in esi (VEC4). If ecx is non-zero
+ then CHAR in VEC3 and bsrq will use that position. */
+ sall $16, %eax
+ orl %esi, %eax
+ bsrl %eax, %eax
+ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax
ret
-END (__memrchr)
+L(ret_vec_end):
+ bsrl %eax, %eax
+ leaq (VEC_SIZE * -2)(%rax, %rcx), %rax
+ ret
+ /* Use in L(last_4x_vec). In the same cache line. This is just a spare
+ aligning bytes. */
+L(zero_3):
+ xorl %eax, %eax
+ ret
+ /* 2-bytes from next cache line. */
+END(__memrchr)
weak_alias (__memrchr, memrchr)
-/* memset/bzero -- set memory area to CH/0
+/* memset -- set memory area to CH/0
Optimized version for x86-64.
Copyright (C) 2002-2021 Free Software Foundation, Inc.
This file is part of the GNU C Library.
<https://www.gnu.org/licenses/>. */
#include <sysdep.h>
+#define USE_WITH_SSE2 1
#define VEC_SIZE 16
+#define MOV_SIZE 3
+#define RET_SIZE 1
+
#define VEC(i) xmm##i
-/* Don't use movups and movaps since it will get larger nop paddings for
- alignment. */
-#define VMOVU movdqu
-#define VMOVA movdqa
+#define VMOVU movups
+#define VMOVA movaps
-#define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
movd d, %xmm0; \
movq r, %rax; \
punpcklbw %xmm0, %xmm0; \
punpcklwd %xmm0, %xmm0; \
pshufd $0, %xmm0, %xmm0
-#define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
movd d, %xmm0; \
- movq r, %rax; \
- pshufd $0, %xmm0, %xmm0
+ pshufd $0, %xmm0, %xmm0; \
+ movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
+
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
#define SECTION(p) p
ifeq ($(subdir),string)
-sysdep_routines += strncat-c stpncpy-c strncpy-c \
- strcmp-sse2 strcmp-sse2-unaligned strcmp-ssse3 \
- strcmp-sse4_2 strcmp-avx2 \
- strncmp-sse2 strncmp-ssse3 strncmp-sse4_2 strncmp-avx2 \
- memchr-sse2 rawmemchr-sse2 memchr-avx2 rawmemchr-avx2 \
- memrchr-sse2 memrchr-avx2 \
- memcmp-sse2 \
- memcmp-avx2-movbe \
- memcmp-sse4 memcpy-ssse3 \
- memmove-ssse3 \
- memcpy-ssse3-back \
- memmove-ssse3-back \
- memmove-avx512-no-vzeroupper \
- strcasecmp_l-sse2 strcasecmp_l-ssse3 \
- strcasecmp_l-sse4_2 strcasecmp_l-avx \
- strncase_l-sse2 strncase_l-ssse3 \
- strncase_l-sse4_2 strncase_l-avx \
- strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
- strrchr-sse2 strrchr-avx2 \
- strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
- strcat-avx2 strncat-avx2 \
- strcat-ssse3 strncat-ssse3\
- strcpy-avx2 strncpy-avx2 \
- strcpy-sse2 stpcpy-sse2 \
- strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
- strcpy-sse2-unaligned strncpy-sse2-unaligned \
- stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
- stpcpy-avx2 stpncpy-avx2 \
- strcat-sse2 \
- strcat-sse2-unaligned strncat-sse2-unaligned \
- strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
- strcspn-sse2 strpbrk-sse2 strspn-sse2 \
- strcspn-c strpbrk-c strspn-c varshift \
- memset-avx512-no-vzeroupper \
- memmove-sse2-unaligned-erms \
- memmove-avx-unaligned-erms \
- memmove-avx512-unaligned-erms \
- memset-sse2-unaligned-erms \
- memset-avx2-unaligned-erms \
- memset-avx512-unaligned-erms \
- memchr-avx2-rtm \
- memcmp-avx2-movbe-rtm \
- memmove-avx-unaligned-erms-rtm \
- memrchr-avx2-rtm \
- memset-avx2-unaligned-erms-rtm \
- rawmemchr-avx2-rtm \
- strchr-avx2-rtm \
- strcmp-avx2-rtm \
- strchrnul-avx2-rtm \
- stpcpy-avx2-rtm \
- stpncpy-avx2-rtm \
- strcat-avx2-rtm \
- strcpy-avx2-rtm \
- strlen-avx2-rtm \
- strncat-avx2-rtm \
- strncmp-avx2-rtm \
- strncpy-avx2-rtm \
- strnlen-avx2-rtm \
- strrchr-avx2-rtm \
- memchr-evex \
- memcmp-evex-movbe \
- memmove-evex-unaligned-erms \
- memrchr-evex \
- memset-evex-unaligned-erms \
- rawmemchr-evex \
- stpcpy-evex \
- stpncpy-evex \
- strcat-evex \
- strchr-evex \
- strchrnul-evex \
- strcmp-evex \
- strcpy-evex \
- strlen-evex \
- strncat-evex \
- strncmp-evex \
- strncpy-evex \
- strnlen-evex \
- strrchr-evex \
- memchr-evex-rtm \
- rawmemchr-evex-rtm
+sysdep_routines += \
+ memchr-avx2 \
+ memchr-avx2-rtm \
+ memchr-evex \
+ memchr-evex-rtm \
+ memchr-sse2 \
+ memcmp-avx2-movbe \
+ memcmp-avx2-movbe-rtm \
+ memcmp-evex-movbe \
+ memcmp-sse2 \
+ memcmp-ssse3 \
+ memcpy-ssse3 \
+ memcpy-ssse3-back \
+ memmove-avx-unaligned-erms \
+ memmove-avx-unaligned-erms-rtm \
+ memmove-avx512-no-vzeroupper \
+ memmove-avx512-unaligned-erms \
+ memmove-erms \
+ memmove-evex-unaligned-erms \
+ memmove-sse2-unaligned-erms \
+ memmove-ssse3 \
+ memmove-ssse3-back \
+ memrchr-avx2 \
+ memrchr-avx2-rtm \
+ memrchr-evex \
+ memrchr-sse2 \
+ memset-avx2-unaligned-erms \
+ memset-avx2-unaligned-erms-rtm \
+ memset-avx512-no-vzeroupper \
+ memset-avx512-unaligned-erms \
+ memset-erms \
+ memset-evex-unaligned-erms \
+ memset-sse2-unaligned-erms \
+ rawmemchr-avx2 \
+ rawmemchr-avx2-rtm \
+ rawmemchr-evex \
+ rawmemchr-evex-rtm \
+ rawmemchr-sse2 \
+ stpcpy-avx2 \
+ stpcpy-avx2-rtm \
+ stpcpy-evex \
+ stpcpy-sse2 \
+ stpcpy-sse2-unaligned \
+ stpcpy-ssse3 \
+ stpncpy-avx2 \
+ stpncpy-avx2-rtm \
+ stpncpy-c \
+ stpncpy-evex \
+ stpncpy-sse2-unaligned \
+ stpncpy-ssse3 \
+ strcasecmp_l-avx2 \
+ strcasecmp_l-avx2-rtm \
+ strcasecmp_l-evex \
+ strcasecmp_l-sse2 \
+ strcasecmp_l-sse4_2 \
+ strcasecmp_l-ssse3 \
+ strcat-avx2 \
+ strcat-avx2-rtm \
+ strcat-evex \
+ strcat-sse2 \
+ strcat-sse2-unaligned \
+ strcat-ssse3 \
+ strchr-avx2 \
+ strchr-avx2-rtm \
+ strchr-evex \
+ strchr-sse2 \
+ strchr-sse2-no-bsf \
+ strchrnul-avx2 \
+ strchrnul-avx2-rtm \
+ strchrnul-evex \
+ strchrnul-sse2 \
+ strcmp-avx2 \
+ strcmp-avx2-rtm \
+ strcmp-evex \
+ strcmp-sse2 \
+ strcmp-sse2-unaligned \
+ strcmp-sse4_2 \
+ strcmp-ssse3 \
+ strcpy-avx2 \
+ strcpy-avx2-rtm \
+ strcpy-evex \
+ strcpy-sse2 \
+ strcpy-sse2-unaligned \
+ strcpy-ssse3 \
+ strcspn-c \
+ strcspn-sse2 \
+ strlen-avx2 \
+ strlen-avx2-rtm \
+ strlen-evex \
+ strlen-evex512 \
+ strlen-sse2 \
+ strncase_l-avx2 \
+ strncase_l-avx2-rtm \
+ strncase_l-evex \
+ strncase_l-sse2 \
+ strncase_l-sse4_2 \
+ strncase_l-ssse3 \
+ strncat-avx2 \
+ strncat-avx2-rtm \
+ strncat-c \
+ strncat-evex \
+ strncat-sse2-unaligned \
+ strncat-ssse3 \
+ strncmp-avx2 \
+ strncmp-avx2-rtm \
+ strncmp-evex \
+ strncmp-sse2 \
+ strncmp-sse4_2 \
+ strncmp-ssse3 \
+ strncpy-avx2 \
+ strncpy-avx2-rtm \
+ strncpy-c \
+ strncpy-evex \
+ strncpy-sse2-unaligned \
+ strncpy-ssse3 \
+ strnlen-avx2 \
+ strnlen-avx2-rtm \
+ strnlen-evex \
+ strnlen-evex512 \
+ strnlen-sse2 \
+ strpbrk-c \
+ strpbrk-sse2 \
+ strrchr-avx2 \
+ strrchr-avx2-rtm \
+ strrchr-evex \
+ strrchr-sse2 \
+ strspn-c \
+ strspn-sse2 \
+ strstr-avx512 \
+ strstr-sse2-unaligned \
+ varshift \
+# sysdep_routines
CFLAGS-varshift.c += -msse4
CFLAGS-strcspn-c.c += -msse4
CFLAGS-strpbrk-c.c += -msse4
CFLAGS-strspn-c.c += -msse4
+CFLAGS-strstr-avx512.c += -mavx512f -mavx512vl -mavx512dq -mavx512bw -mbmi -mbmi2 -O3
endif
ifeq ($(subdir),wcsmbs)
-sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
- wmemcmp-avx2-movbe \
- wmemchr-sse2 wmemchr-avx2 \
- wcscmp-sse2 wcscmp-avx2 \
- wcsncmp-sse2 wcsncmp-avx2 \
- wcscpy-ssse3 wcscpy-c \
- wcschr-sse2 wcschr-avx2 \
- wcsrchr-sse2 wcsrchr-avx2 \
- wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
- wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
- wcschr-avx2-rtm \
- wcscmp-avx2-rtm \
- wcslen-avx2-rtm \
- wcsncmp-avx2-rtm \
- wcsnlen-avx2-rtm \
- wcsrchr-avx2-rtm \
- wmemchr-avx2-rtm \
- wmemcmp-avx2-movbe-rtm \
- wcschr-evex \
- wcscmp-evex \
- wcslen-evex \
- wcsncmp-evex \
- wcsnlen-evex \
- wcsrchr-evex \
- wmemchr-evex \
- wmemcmp-evex-movbe \
- wmemchr-evex-rtm
+sysdep_routines += \
+ wcschr-avx2 \
+ wcschr-avx2-rtm \
+ wcschr-evex \
+ wcschr-sse2 \
+ wcscmp-avx2 \
+ wcscmp-avx2-rtm \
+ wcscmp-evex \
+ wcscmp-sse2 \
+ wcscpy-c \
+ wcscpy-ssse3 \
+ wcslen-avx2 \
+ wcslen-avx2-rtm \
+ wcslen-evex \
+ wcslen-evex512 \
+ wcslen-sse2 \
+ wcslen-sse4_1 \
+ wcsncmp-avx2 \
+ wcsncmp-avx2-rtm \
+ wcsncmp-evex \
+ wcsncmp-sse2 \
+ wcsnlen-avx2 \
+ wcsnlen-avx2-rtm \
+ wcsnlen-c \
+ wcsnlen-evex \
+ wcsnlen-evex512 \
+ wcsnlen-sse4_1 \
+ wcsrchr-avx2 \
+ wcsrchr-avx2-rtm \
+ wcsrchr-evex \
+ wcsrchr-sse2 \
+ wmemchr-avx2 \
+ wmemchr-avx2-rtm \
+ wmemchr-evex \
+ wmemchr-evex-rtm \
+ wmemchr-sse2 \
+ wmemcmp-avx2-movbe \
+ wmemcmp-avx2-movbe-rtm \
+ wmemcmp-c \
+ wmemcmp-evex-movbe \
+ wmemcmp-ssse3 \
+# sysdep_routines
endif
ifeq ($(subdir),debug)
-sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \
- memmove_chk-nonshared memset_chk-nonshared \
- wmemset_chk-nonshared
+sysdep_routines += \
+ memcpy_chk-nonshared \
+ memmove_chk-nonshared \
+ mempcpy_chk-nonshared \
+ memset_chk-nonshared \
+ wmemset_chk-nonshared \
+# sysdep_routines
endif
--- /dev/null
+/* Common config for AVX-RTM VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _AVX_RTM_VECS_H
+#define _AVX_RTM_VECS_H 1
+
+#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+
+#define USE_WITH_RTM 1
+#include "avx-vecs.h"
+
+#undef SECTION
+#define SECTION(p) p##.avx.rtm
+
+#endif
--- /dev/null
+/* Common config for AVX VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _AVX_VECS_H
+#define _AVX_VECS_H 1
+
+#ifdef VEC_SIZE
+# error "Multiple VEC configs included!"
+#endif
+
+#define VEC_SIZE 32
+#include "vec-macros.h"
+
+#define USE_WITH_AVX 1
+#define SECTION(p) p##.avx
+
+/* 4-byte mov instructions with AVX2. */
+#define MOV_SIZE 4
+/* 1 (ret) + 3 (vzeroupper). */
+#define RET_SIZE 4
+#define VZEROUPPER vzeroupper
+
+#define VMOVU vmovdqu
+#define VMOVA vmovdqa
+#define VMOVNT vmovntdq
+
+/* Often need to access xmm portion. */
+#define VEC_xmm VEC_any_xmm
+#define VEC VEC_any_ymm
+
+#endif
+++ /dev/null
-#include <sysdep.h>
-
- .text
-ENTRY(bcopy)
- xchg %rdi, %rsi
- jmp __libc_memmove /* Branch to IFUNC memmove. */
-END(bcopy)
--- /dev/null
+/* Common config for EVEX256 and EVEX512 VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _EVEX_VECS_COMMON_H
+#define _EVEX_VECS_COMMON_H 1
+
+#include "vec-macros.h"
+
+/* 6-byte mov instructions with EVEX. */
+#define MOV_SIZE 6
+/* No vzeroupper needed. */
+#define RET_SIZE 1
+#define VZEROUPPER
+
+#define VMOVU vmovdqu64
+#define VMOVA vmovdqa64
+#define VMOVNT vmovntdq
+
+#define VEC_xmm VEC_hi_xmm
+#define VEC_ymm VEC_hi_ymm
+#define VEC_zmm VEC_hi_zmm
+
+#endif
--- /dev/null
+/* Common config for EVEX256 VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _EVEX256_VECS_H
+#define _EVEX256_VECS_H 1
+
+#ifdef VEC_SIZE
+# error "Multiple VEC configs included!"
+#endif
+
+#define VEC_SIZE 32
+#include "evex-vecs-common.h"
+
+#define USE_WITH_EVEX256 1
+#define SECTION(p) p##.evex
+
+#define VEC VEC_ymm
+
+#endif
--- /dev/null
+/* Common config for EVEX512 VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _EVEX512_VECS_H
+#define _EVEX512_VECS_H 1
+
+#ifdef VEC_SIZE
+# error "Multiple VEC configs included!"
+#endif
+
+#define VEC_SIZE 64
+#include "evex-vecs-common.h"
+
+#define USE_WITH_EVEX512 1
+#define SECTION(p) p##.evex512
+
+#define VEC VEC_zmm
+
+#endif
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (MOVBE)),
__memcmp_evex_movbe)
- IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1),
- __memcmp_sse4_1)
IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3),
__memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strlen_evex)
+ IFUNC_IMPL_ADD (array, i, strlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strlen_evex512)
IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
/* Support sysdeps/x86_64/multiarch/strnlen.c. */
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__strnlen_evex)
+ IFUNC_IMPL_ADD (array, i, strnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strnlen_evex512)
IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
/* Support sysdeps/x86_64/multiarch/stpncpy.c. */
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp,
IFUNC_IMPL_ADD (array, i, strcasecmp,
- CPU_FEATURE_USABLE (AVX),
- __strcasecmp_avx)
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strcasecmp_evex)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcasecmp_avx2)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strcasecmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strcasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_sse42)
/* Support sysdeps/x86_64/multiarch/strcasecmp_l.c. */
IFUNC_IMPL (i, name, strcasecmp_l,
- IFUNC_IMPL_ADD (array, i, strcasecmp_l,
- CPU_FEATURE_USABLE (AVX),
- __strcasecmp_l_avx)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strcasecmp_l_evex)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcasecmp_l_avx2)
+ IFUNC_IMPL_ADD (array, i, strcasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strcasecmp_l_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strcasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strcasecmp_l_sse42)
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp,
IFUNC_IMPL_ADD (array, i, strncasecmp,
- CPU_FEATURE_USABLE (AVX),
- __strncasecmp_avx)
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strncasecmp_evex)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncasecmp_avx2)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strncasecmp_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strncasecmp,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_sse42)
/* Support sysdeps/x86_64/multiarch/strncase_l.c. */
IFUNC_IMPL (i, name, strncasecmp_l,
- IFUNC_IMPL_ADD (array, i, strncasecmp_l,
- CPU_FEATURE_USABLE (AVX),
- __strncasecmp_l_avx)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)),
+ __strncasecmp_l_evex)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncasecmp_l_avx2)
+ IFUNC_IMPL_ADD (array, i, strncasecmp,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __strncasecmp_l_avx2_rtm)
IFUNC_IMPL_ADD (array, i, strncasecmp_l,
CPU_FEATURE_USABLE (SSE4_2),
__strncasecmp_l_sse42)
/* Support sysdeps/x86_64/multiarch/strstr.c. */
IFUNC_IMPL (i, name, strstr,
+ IFUNC_IMPL_ADD (array, i, strstr,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (AVX512DQ)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __strstr_avx512)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcslen_evex)
+ IFUNC_IMPL_ADD (array, i, wcslen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcslen_evex512)
IFUNC_IMPL_ADD (array, i, wcslen,
CPU_FEATURE_USABLE (SSE4_1),
__wcslen_sse4_1)
&& CPU_FEATURE_USABLE (AVX512BW)
&& CPU_FEATURE_USABLE (BMI2)),
__wcsnlen_evex)
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ (CPU_FEATURE_USABLE (AVX512VL)
+ && CPU_FEATURE_USABLE (AVX512BW)
+ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsnlen_evex512)
IFUNC_IMPL_ADD (array, i, wcsnlen,
CPU_FEATURE_USABLE (SSE4_1),
__wcsnlen_sse4_1)
&& CPU_FEATURE_USABLE (BMI2)
&& CPU_FEATURE_USABLE (MOVBE)),
__wmemcmp_evex_movbe)
- IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1),
- __wmemcmp_sse4_1)
IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3),
__wmemcmp_ssse3)
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
CPU_FEATURE_USABLE (AVX2),
__wmemset_chk_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (RTM)),
+ __wmemset_chk_avx2_unaligned_rtm)
IFUNC_IMPL_ADD (array, i, __wmemset_chk,
(CPU_FEATURE_USABLE (AVX512VL)
&& CPU_FEATURE_USABLE (AVX512BW)
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
return OPTIMIZE (avx2_movbe);
}
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
- return OPTIMIZE (sse4_1);
-
if (CPU_FEATURE_USABLE_P (cpu_features, SSSE3))
return OPTIMIZE (ssse3);
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
static inline void *
IFUNC_SELECTOR (void)
{
const struct cpu_features* cpu_features = __get_cpu_features ();
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
- return OPTIMIZE (avx);
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
+ return OPTIMIZE (evex);
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
+ return OPTIMIZE (avx2_rtm);
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx2);
+ }
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
# define MEMCHR __memchr_avx2_rtm
#endif
+#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
.section SECTION(.text),"ax",@progbits
-ENTRY (MEMCHR)
+ENTRY_P2ALIGN (MEMCHR, 5)
# ifndef USE_AS_RAWMEMCHR
/* Check for zero length. */
# ifdef __ILP32__
# endif
testl %eax, %eax
jz L(aligned_more)
- tzcntl %eax, %eax
+ bsfl %eax, %eax
addq %rdi, %rax
- VZEROUPPER_RETURN
+L(return_vzeroupper):
+ ZERO_UPPER_VEC_REGISTERS_RETURN
+
# ifndef USE_AS_RAWMEMCHR
- .p2align 5
+ .p2align 4
L(first_vec_x0):
/* Check if first match was before length. */
tzcntl %eax, %eax
/* NB: Multiply length by 4 to get byte count. */
sall $2, %edx
# endif
- xorl %ecx, %ecx
+ COND_VZEROUPPER
+ /* Use branch instead of cmovcc so L(first_vec_x0) fits in one fetch
+ block. branch here as opposed to cmovcc is not that costly. Common
+ usage of memchr is to check if the return was NULL (if string was
+ known to contain CHAR user would use rawmemchr). This branch will be
+ highly correlated with the user branch and can be used by most
+ modern branch predictors to predict the user branch. */
cmpl %eax, %edx
- leaq (%rdi, %rax), %rax
- cmovle %rcx, %rax
- VZEROUPPER_RETURN
-
-L(null):
- xorl %eax, %eax
- ret
-# endif
- .p2align 4
-L(cross_page_boundary):
- /* Save pointer before aligning as its original value is
- necessary for computer return address if byte is found or
- adjusting length if it is not and this is memchr. */
- movq %rdi, %rcx
- /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
- and rdi for rawmemchr. */
- orq $(VEC_SIZE - 1), %ALGN_PTR_REG
- VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
-# ifndef USE_AS_RAWMEMCHR
- /* Calculate length until end of page (length checked for a
- match). */
- leaq 1(%ALGN_PTR_REG), %rsi
- subq %RRAW_PTR_REG, %rsi
-# ifdef USE_AS_WMEMCHR
- /* NB: Divide bytes by 4 to get wchar_t count. */
- shrl $2, %esi
-# endif
-# endif
- /* Remove the leading bytes. */
- sarxl %ERAW_PTR_REG, %eax, %eax
-# ifndef USE_AS_RAWMEMCHR
- /* Check the end of data. */
- cmpq %rsi, %rdx
- jbe L(first_vec_x0)
+ jle L(null)
+ addq %rdi, %rax
+ ret
# endif
- testl %eax, %eax
- jz L(cross_page_continue)
- tzcntl %eax, %eax
- addq %RRAW_PTR_REG, %rax
-L(return_vzeroupper):
- ZERO_UPPER_VEC_REGISTERS_RETURN
- .p2align 4
+ .p2align 4,, 10
L(first_vec_x1):
- tzcntl %eax, %eax
+ bsfl %eax, %eax
incq %rdi
addq %rdi, %rax
VZEROUPPER_RETURN
-
+# ifndef USE_AS_RAWMEMCHR
+ /* First in aligning bytes here. */
+L(null):
+ xorl %eax, %eax
+ ret
+# endif
.p2align 4
L(first_vec_x2):
tzcntl %eax, %eax
incq %rdi
addq %rdi, %rax
VZEROUPPER_RETURN
- .p2align 4
+ .p2align 4,, 6
L(set_zero_end):
xorl %eax, %eax
VZEROUPPER_RETURN
VZEROUPPER_RETURN
# endif
+ .p2align 4
+L(cross_page_boundary):
+ /* Save pointer before aligning as its original value is necessary for
+ computer return address if byte is found or adjusting length if it
+ is not and this is memchr. */
+ movq %rdi, %rcx
+ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
+ and rdi for rawmemchr. */
+ orq $(VEC_SIZE - 1), %ALGN_PTR_REG
+ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+# ifndef USE_AS_RAWMEMCHR
+ /* Calculate length until end of page (length checked for a match). */
+ leaq 1(%ALGN_PTR_REG), %rsi
+ subq %RRAW_PTR_REG, %rsi
+# ifdef USE_AS_WMEMCHR
+ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrl $2, %esi
+# endif
+# endif
+ /* Remove the leading bytes. */
+ sarxl %ERAW_PTR_REG, %eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+ /* Check the end of data. */
+ cmpq %rsi, %rdx
+ jbe L(first_vec_x0)
+# endif
+ testl %eax, %eax
+ jz L(cross_page_continue)
+ bsfl %eax, %eax
+ addq %RRAW_PTR_REG, %rax
+ VZEROUPPER_RETURN
+
+
END (MEMCHR)
#endif
# define PAGE_SIZE 4096
.section SECTION(.text),"ax",@progbits
-ENTRY (MEMCHR)
+ENTRY_P2ALIGN (MEMCHR, 6)
# ifndef USE_AS_RAWMEMCHR
/* Check for zero length. */
test %RDX_LP, %RDX_LP
xorl %eax, %eax
ret
- .p2align 5
+ .p2align 4
L(first_vec_x0):
- /* Check if first match was before length. */
- tzcntl %eax, %eax
- xorl %ecx, %ecx
- cmpl %eax, %edx
- leaq (%rdi, %rax, CHAR_SIZE), %rax
- cmovle %rcx, %rax
+ /* Check if first match was before length. NB: tzcnt has false data-
+ dependency on destination. eax already had a data-dependency on esi
+ so this should have no affect here. */
+ tzcntl %eax, %esi
+# ifdef USE_AS_WMEMCHR
+ leaq (%rdi, %rsi, CHAR_SIZE), %rdi
+# else
+ addq %rsi, %rdi
+# endif
+ xorl %eax, %eax
+ cmpl %esi, %edx
+ cmovg %rdi, %rax
ret
-# else
- /* NB: first_vec_x0 is 17 bytes which will leave
- cross_page_boundary (which is relatively cold) close enough
- to ideal alignment. So only realign L(cross_page_boundary) if
- rawmemchr. */
- .p2align 4
# endif
+
+ .p2align 4
L(cross_page_boundary):
/* Save pointer before aligning as its original value is
necessary for computer return address if byte is found or
L(zero_end):
ret
+L(set_zero_end):
+ xorl %eax, %eax
+ ret
.p2align 4
L(first_vec_x1_check):
- tzcntl %eax, %eax
+ /* eax must be non-zero. Use bsfl to save code size. */
+ bsfl %eax, %eax
/* Adjust length. */
subl $-(CHAR_PER_VEC * 4), %edx
/* Check if match within remaining length. */
/* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
ret
-L(set_zero_end):
- xorl %eax, %eax
- ret
.p2align 4
L(loop_4x_vec_end):
# endif
ret
- .p2align 4
+ .p2align 4,, 10
L(last_vec_x1_return):
tzcntl %eax, %eax
# if defined USE_AS_WMEMCHR || RET_OFFSET != 0
# endif
# ifndef USE_AS_RAWMEMCHR
+ .p2align 4,, 5
L(last_4x_vec_or_less_cmpeq):
VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0
kmovd %k0, %eax
# endif
andl %ecx, %eax
jz L(zero_end2)
- tzcntl %eax, %eax
+ bsfl %eax, %eax
leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
L(zero_end2):
ret
leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
ret
# endif
-
+ /* 7 bytes from next cache line. */
END (MEMCHR)
#endif
# ifndef USE_AS_WMEMCMP
cmpl $8, %edx
jae L(between_8_15)
+ /* Fall through for [4, 7]. */
cmpl $4, %edx
- jae L(between_4_7)
+ jb L(between_2_3)
- /* Load as big endian to avoid branches. */
- movzwl (%rdi), %eax
- movzwl (%rsi), %ecx
- shll $8, %eax
- shll $8, %ecx
- bswap %eax
- bswap %ecx
- movzbl -1(%rdi, %rdx), %edi
- movzbl -1(%rsi, %rdx), %esi
- orl %edi, %eax
- orl %esi, %ecx
- /* Subtraction is okay because the upper 8 bits are zero. */
- subl %ecx, %eax
+ movbe (%rdi), %eax
+ movbe (%rsi), %ecx
+ shlq $32, %rax
+ shlq $32, %rcx
+ movbe -4(%rdi, %rdx), %edi
+ movbe -4(%rsi, %rdx), %esi
+ orq %rdi, %rax
+ orq %rsi, %rcx
+ subq %rcx, %rax
+ /* Fast path for return zero. */
+ jnz L(ret_nonzero)
/* No ymm register was touched. */
ret
/* No ymm register was touched. */
ret
+ .p2align 4,, 5
+L(ret_nonzero):
+ sbbl %eax, %eax
+ orl $1, %eax
+ /* No ymm register was touched. */
+ ret
+
+ .p2align 4,, 2
+L(zero):
+ xorl %eax, %eax
+ /* No ymm register was touched. */
+ ret
+
.p2align 4
L(between_8_15):
-# endif
+ movbe (%rdi), %rax
+ movbe (%rsi), %rcx
+ subq %rcx, %rax
+ jnz L(ret_nonzero)
+ movbe -8(%rdi, %rdx), %rax
+ movbe -8(%rsi, %rdx), %rcx
+ subq %rcx, %rax
+ /* Fast path for return zero. */
+ jnz L(ret_nonzero)
+ /* No ymm register was touched. */
+ ret
+# else
/* If USE_AS_WMEMCMP fall through into 8-15 byte case. */
vmovq (%rdi), %xmm1
vmovq (%rsi), %xmm2
VPCMPEQ %xmm1, %xmm2, %xmm2
vpmovmskb %xmm2, %eax
subl $0xffff, %eax
+ /* Fast path for return zero. */
jnz L(return_vec_0)
/* No ymm register was touched. */
ret
+# endif
- .p2align 4
-L(zero):
- xorl %eax, %eax
- ret
-
- .p2align 4
+ .p2align 4,, 10
L(between_16_31):
/* From 16 to 31 bytes. No branch when size == 16. */
vmovdqu (%rsi), %xmm2
VPCMPEQ (%rdi), %xmm2, %xmm2
vpmovmskb %xmm2, %eax
subl $0xffff, %eax
+ /* Fast path for return zero. */
jnz L(return_vec_0)
/* No ymm register was touched. */
ret
# ifdef USE_AS_WMEMCMP
+ .p2align 4,, 2
+L(zero):
+ xorl %eax, %eax
+ ret
+
.p2align 4
L(one_or_less):
jb L(zero)
# else
.p2align 4
-L(between_4_7):
- /* Load as big endian with overlapping movbe to avoid branches.
- */
- movbe (%rdi), %eax
- movbe (%rsi), %ecx
- shlq $32, %rax
- shlq $32, %rcx
- movbe -4(%rdi, %rdx), %edi
- movbe -4(%rsi, %rdx), %esi
- orq %rdi, %rax
- orq %rsi, %rcx
- subq %rcx, %rax
- jz L(zero_4_7)
- sbbl %eax, %eax
- orl $1, %eax
-L(zero_4_7):
+L(between_2_3):
+ /* Load as big endian to avoid branches. */
+ movzwl (%rdi), %eax
+ movzwl (%rsi), %ecx
+ bswap %eax
+ bswap %ecx
+ shrl %eax
+ shrl %ecx
+ movzbl -1(%rdi, %rdx), %edi
+ movzbl -1(%rsi, %rdx), %esi
+ orl %edi, %eax
+ orl %esi, %ecx
+ /* Subtraction is okay because the upper bit is zero. */
+ subl %ecx, %eax
/* No ymm register was touched. */
ret
# endif
area.
7. Use 2 vector compares when size is 2 * CHAR_PER_VEC or less.
8. Use 4 vector compares when size is 4 * CHAR_PER_VEC or less.
- 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less. */
+ 9. Use 8 vector compares when size is 8 * CHAR_PER_VEC or less.
+
+When possible the implementation tries to optimize for frontend in the
+following ways:
+Throughput:
+ 1. All code sections that fit are able to run optimally out of the
+ LSD.
+ 2. All code sections that fit are able to run optimally out of the
+ DSB
+ 3. Basic blocks are contained in minimum number of fetch blocks
+ necessary.
+
+Latency:
+ 1. Logically connected basic blocks are put in the same
+ cache-line.
+ 2. Logically connected basic blocks that do not fit in the same
+ cache-line are put in adjacent lines. This can get beneficial
+ L2 spatial prefetching and L1 next-line prefetching. */
# include <sysdep.h>
# define VMOVU vmovdqu64
# ifdef USE_AS_WMEMCMP
+# define VMOVU_MASK vmovdqu32
# define CHAR_SIZE 4
# define VPCMP vpcmpd
+# define VPTEST vptestmd
# else
+# define VMOVU_MASK vmovdqu8
# define CHAR_SIZE 1
# define VPCMP vpcmpub
+# define VPTEST vptestmb
# endif
+
# define VEC_SIZE 32
# define PAGE_SIZE 4096
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
*/
.section .text.evex,"ax",@progbits
-ENTRY (MEMCMP)
+/* Cache align memcmp entry. This allows for much more thorough
+ frontend optimization. */
+ENTRY_P2ALIGN (MEMCMP, 6)
# ifdef __ILP32__
/* Clear the upper 32 bits. */
movl %edx, %edx
# endif
cmp $CHAR_PER_VEC, %RDX_LP
- jb L(less_vec)
+ /* Fall through for [0, VEC_SIZE] as its the hottest. */
+ ja L(more_1x_vec)
+
+ /* Create mask for CHAR's we want to compare. This allows us to
+ avoid having to include page cross logic. */
+ movl $-1, %ecx
+ bzhil %edx, %ecx, %ecx
+ kmovd %ecx, %k2
+
+ /* Safe to load full ymm with mask. */
+ VMOVU_MASK (%rsi), %YMM2{%k2}
+ VPCMP $4,(%rdi), %YMM2, %k1{%k2}
+ kmovd %k1, %eax
+ testl %eax, %eax
+ jnz L(return_vec_0)
+ ret
+
+ .p2align 4
+L(return_vec_0):
+ tzcntl %eax, %eax
+# ifdef USE_AS_WMEMCMP
+ movl (%rdi, %rax, CHAR_SIZE), %ecx
+ xorl %edx, %edx
+ cmpl (%rsi, %rax, CHAR_SIZE), %ecx
+ /* NB: no partial register stall here because xorl zero idiom
+ above. */
+ setg %dl
+ leal -1(%rdx, %rdx), %eax
+# else
+ movzbl (%rsi, %rax), %ecx
+ movzbl (%rdi, %rax), %eax
+ subl %ecx, %eax
+# endif
+ ret
+
+ .p2align 4
+L(more_1x_vec):
/* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
VMOVU (%rsi), %YMM1
/* Use compare not equals to directly check for mismatch. */
- VPCMP $4, (%rdi), %YMM1, %k1
+ VPCMP $4,(%rdi), %YMM1, %k1
kmovd %k1, %eax
/* NB: eax must be destination register if going to
- L(return_vec_[0,2]). For L(return_vec_3 destination register
+ L(return_vec_[0,2]). For L(return_vec_3) destination register
must be ecx. */
testl %eax, %eax
jnz L(return_vec_0)
/* Check third and fourth VEC no matter what. */
VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
- VPCMP $4, (VEC_SIZE * 2)(%rdi), %YMM3, %k1
+ VPCMP $4,(VEC_SIZE * 2)(%rdi), %YMM3, %k1
kmovd %k1, %eax
testl %eax, %eax
jnz L(return_vec_2)
VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
- VPCMP $4, (VEC_SIZE * 3)(%rdi), %YMM4, %k1
+ VPCMP $4,(VEC_SIZE * 3)(%rdi), %YMM4, %k1
kmovd %k1, %ecx
testl %ecx, %ecx
jnz L(return_vec_3)
- /* Zero YMM0. 4x VEC reduction is done with vpxor + vtern so
- compare with zero to get a mask is needed. */
- vpxorq %XMM0, %XMM0, %XMM0
-
/* Go to 4x VEC loop. */
cmpq $(CHAR_PER_VEC * 8), %rdx
ja L(more_8x_vec)
VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3
- /* Or together YMM1, YMM2, and YMM3 into YMM3. */
- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3
VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
/* Ternary logic to xor (VEC_SIZE * 3)(%rdi) with YMM4 while
- oring with YMM3. Result is stored in YMM4. */
- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4
- /* Compare YMM4 with 0. If any 1s s1 and s2 don't match. */
- VPCMP $4, %YMM4, %YMM0, %k1
+ oring with YMM1. Result is stored in YMM4. */
+ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4
+
+ /* Or together YMM2, YMM3, and YMM4 into YMM4. */
+ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4
+
+ /* Test YMM4 against itself. Store any CHAR mismatches in k1.
+ */
+ VPTEST %YMM4, %YMM4, %k1
+ /* k1 must go to ecx for L(return_vec_0_1_2_3). */
kmovd %k1, %ecx
testl %ecx, %ecx
jnz L(return_vec_0_1_2_3)
/* NB: eax must be zero to reach here. */
ret
- /* NB: aligning 32 here allows for the rest of the jump targets
- to be tuned for 32 byte alignment. Most important this ensures
- the L(more_8x_vec) loop is 32 byte aligned. */
- .p2align 5
-L(less_vec):
- /* Check if one or less CHAR. This is necessary for size = 0 but
- is also faster for size = CHAR_SIZE. */
- cmpl $1, %edx
- jbe L(one_or_less)
-
- /* Check if loading one VEC from either s1 or s2 could cause a
- page cross. This can have false positives but is by far the
- fastest method. */
- movl %edi, %eax
- orl %esi, %eax
- andl $(PAGE_SIZE - 1), %eax
- cmpl $(PAGE_SIZE - VEC_SIZE), %eax
- jg L(page_cross_less_vec)
-
- /* No page cross possible. */
- VMOVU (%rsi), %YMM2
- VPCMP $4, (%rdi), %YMM2, %k1
- kmovd %k1, %eax
- /* Create mask in ecx for potentially in bound matches. */
- bzhil %edx, %eax, %eax
+
+ .p2align 4,, 8
+L(8x_end_return_vec_0_1_2_3):
+ movq %rdx, %rdi
+L(8x_return_vec_0_1_2_3):
+ addq %rdi, %rsi
+L(return_vec_0_1_2_3):
+ VPTEST %YMM1, %YMM1, %k0
+ kmovd %k0, %eax
+ testl %eax, %eax
jnz L(return_vec_0)
- ret
- .p2align 4
-L(return_vec_0):
- tzcntl %eax, %eax
+ VPTEST %YMM2, %YMM2, %k0
+ kmovd %k0, %eax
+ testl %eax, %eax
+ jnz L(return_vec_1)
+
+ VPTEST %YMM3, %YMM3, %k0
+ kmovd %k0, %eax
+ testl %eax, %eax
+ jnz L(return_vec_2)
+L(return_vec_3):
+ /* bsf saves 1 byte from tzcnt. This keep L(return_vec_3) in one
+ fetch block and the entire L(*return_vec_0_1_2_3) in 1 cache
+ line. */
+ bsfl %ecx, %ecx
# ifdef USE_AS_WMEMCMP
- movl (%rdi, %rax, CHAR_SIZE), %ecx
+ movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax
xorl %edx, %edx
- cmpl (%rsi, %rax, CHAR_SIZE), %ecx
- /* NB: no partial register stall here because xorl zero idiom
- above. */
+ cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax
setg %dl
leal -1(%rdx, %rdx), %eax
# else
- movzbl (%rsi, %rax), %ecx
- movzbl (%rdi, %rax), %eax
+ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx
subl %ecx, %eax
# endif
ret
- /* NB: No p2align necessary. Alignment % 16 is naturally 1
- which is good enough for a target not in a loop. */
+
+ .p2align 4
L(return_vec_1):
- tzcntl %eax, %eax
+ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_1) in one
+ fetch block. */
+ bsfl %eax, %eax
# ifdef USE_AS_WMEMCMP
movl VEC_SIZE(%rdi, %rax, CHAR_SIZE), %ecx
xorl %edx, %edx
# endif
ret
- /* NB: No p2align necessary. Alignment % 16 is naturally 2
- which is good enough for a target not in a loop. */
+ .p2align 4,, 10
L(return_vec_2):
- tzcntl %eax, %eax
+ /* bsf saves 1 byte over tzcnt and keeps L(return_vec_2) in one
+ fetch block. */
+ bsfl %eax, %eax
# ifdef USE_AS_WMEMCMP
movl (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx
xorl %edx, %edx
# endif
ret
- .p2align 4
-L(8x_return_vec_0_1_2_3):
- /* Returning from L(more_8x_vec) requires restoring rsi. */
- addq %rdi, %rsi
-L(return_vec_0_1_2_3):
- VPCMP $4, %YMM1, %YMM0, %k0
- kmovd %k0, %eax
- testl %eax, %eax
- jnz L(return_vec_0)
-
- VPCMP $4, %YMM2, %YMM0, %k0
- kmovd %k0, %eax
- testl %eax, %eax
- jnz L(return_vec_1)
-
- VPCMP $4, %YMM3, %YMM0, %k0
- kmovd %k0, %eax
- testl %eax, %eax
- jnz L(return_vec_2)
-L(return_vec_3):
- tzcntl %ecx, %ecx
-# ifdef USE_AS_WMEMCMP
- movl (VEC_SIZE * 3)(%rdi, %rcx, CHAR_SIZE), %eax
- xorl %edx, %edx
- cmpl (VEC_SIZE * 3)(%rsi, %rcx, CHAR_SIZE), %eax
- setg %dl
- leal -1(%rdx, %rdx), %eax
-# else
- movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
- movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx
- subl %ecx, %eax
-# endif
- ret
-
.p2align 4
L(more_8x_vec):
/* Set end of s1 in rdx. */
andq $-VEC_SIZE, %rdi
/* Adjust because first 4x vec where check already. */
subq $-(VEC_SIZE * 4), %rdi
+
.p2align 4
L(loop_4x_vec):
VMOVU (%rsi, %rdi), %YMM1
vpxorq (%rdi), %YMM1, %YMM1
-
VMOVU VEC_SIZE(%rsi, %rdi), %YMM2
vpxorq VEC_SIZE(%rdi), %YMM2, %YMM2
-
VMOVU (VEC_SIZE * 2)(%rsi, %rdi), %YMM3
vpxorq (VEC_SIZE * 2)(%rdi), %YMM3, %YMM3
- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3
-
VMOVU (VEC_SIZE * 3)(%rsi, %rdi), %YMM4
- vpternlogd $0xde, (VEC_SIZE * 3)(%rdi), %YMM3, %YMM4
- VPCMP $4, %YMM4, %YMM0, %k1
+ vpternlogd $0xde,(VEC_SIZE * 3)(%rdi), %YMM1, %YMM4
+ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4
+ VPTEST %YMM4, %YMM4, %k1
kmovd %k1, %ecx
testl %ecx, %ecx
jnz L(8x_return_vec_0_1_2_3)
cmpl $(VEC_SIZE * 2), %edi
jae L(8x_last_2x_vec)
+ vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3
+
VMOVU (%rsi, %rdx), %YMM1
vpxorq (%rdx), %YMM1, %YMM1
VMOVU VEC_SIZE(%rsi, %rdx), %YMM2
vpxorq VEC_SIZE(%rdx), %YMM2, %YMM2
-
- vpxorq (VEC_SIZE * 2)(%rdx), %YMM3, %YMM3
- vpternlogd $0xfe, %YMM1, %YMM2, %YMM3
-
VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM4
- vpternlogd $0xde, (VEC_SIZE * 3)(%rdx), %YMM3, %YMM4
- VPCMP $4, %YMM4, %YMM0, %k1
+ vpternlogd $0xde,(VEC_SIZE * 3)(%rdx), %YMM1, %YMM4
+ vpternlogd $0xfe, %YMM2, %YMM3, %YMM4
+ VPTEST %YMM4, %YMM4, %k1
kmovd %k1, %ecx
- /* Restore s1 pointer to rdi. */
- movq %rdx, %rdi
testl %ecx, %ecx
- jnz L(8x_return_vec_0_1_2_3)
+ jnz L(8x_end_return_vec_0_1_2_3)
/* NB: eax must be zero to reach here. */
ret
/* Only entry is from L(more_8x_vec). */
- .p2align 4
+ .p2align 4,, 10
L(8x_last_2x_vec):
- VPCMP $4, (VEC_SIZE * 2)(%rdx), %YMM3, %k1
+ VPCMP $4,(VEC_SIZE * 2)(%rdx), %YMM3, %k1
kmovd %k1, %eax
testl %eax, %eax
jnz L(8x_return_vec_2)
/* Naturally aligned to 16 bytes. */
L(8x_last_1x_vec):
VMOVU (VEC_SIZE * 3)(%rsi, %rdx), %YMM1
- VPCMP $4, (VEC_SIZE * 3)(%rdx), %YMM1, %k1
+ VPCMP $4,(VEC_SIZE * 3)(%rdx), %YMM1, %k1
kmovd %k1, %eax
testl %eax, %eax
jnz L(8x_return_vec_3)
ret
- .p2align 4
-L(last_2x_vec):
- /* Check second to last VEC. */
- VMOVU -(VEC_SIZE * 2)(%rsi, %rdx, CHAR_SIZE), %YMM1
- VPCMP $4, -(VEC_SIZE * 2)(%rdi, %rdx, CHAR_SIZE), %YMM1, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_1_end)
-
- /* Check last VEC. */
- .p2align 4
-L(last_1x_vec):
- VMOVU -(VEC_SIZE * 1)(%rsi, %rdx, CHAR_SIZE), %YMM1
- VPCMP $4, -(VEC_SIZE * 1)(%rdi, %rdx, CHAR_SIZE), %YMM1, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_0_end)
- ret
-
- .p2align 4
+ /* Not ideally aligned (at offset +9 bytes in fetch block) but
+ not aligning keeps it in the same cache line as
+ L(8x_last_1x/2x_vec) so likely worth it. As well, saves code
+ size. */
+ .p2align 4,, 4
L(8x_return_vec_2):
subq $VEC_SIZE, %rdx
L(8x_return_vec_3):
- tzcntl %eax, %eax
+ bsfl %eax, %eax
# ifdef USE_AS_WMEMCMP
leaq (%rdx, %rax, CHAR_SIZE), %rax
movl (VEC_SIZE * 3)(%rax), %ecx
# endif
ret
+ .p2align 4,, 10
+L(last_2x_vec):
+ /* Check second to last VEC. */
+ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx, CHAR_SIZE), %YMM1
+ VPCMP $4, -(VEC_SIZE * 2)(%rdi, %rdx, CHAR_SIZE), %YMM1, %k1
+ kmovd %k1, %eax
+ testl %eax, %eax
+ jnz L(return_vec_1_end)
+
+ /* Check last VEC. */
.p2align 4
-L(return_vec_0_end):
- tzcntl %eax, %eax
- addl %edx, %eax
-# ifdef USE_AS_WMEMCMP
- movl -VEC_SIZE(%rdi, %rax, CHAR_SIZE), %ecx
- xorl %edx, %edx
- cmpl -VEC_SIZE(%rsi, %rax, CHAR_SIZE), %ecx
- setg %dl
- leal -1(%rdx, %rdx), %eax
-# else
- movzbl -VEC_SIZE(%rsi, %rax), %ecx
- movzbl -VEC_SIZE(%rdi, %rax), %eax
- subl %ecx, %eax
-# endif
+L(last_1x_vec):
+ VMOVU -(VEC_SIZE * 1)(%rsi, %rdx, CHAR_SIZE), %YMM1
+ VPCMP $4, -(VEC_SIZE * 1)(%rdi, %rdx, CHAR_SIZE), %YMM1, %k1
+ kmovd %k1, %eax
+ testl %eax, %eax
+ jnz L(return_vec_0_end)
ret
- .p2align 4
+
+ /* Don't align. Takes 2-fetch blocks either way and aligning
+ will cause code to spill into another cacheline. */
L(return_vec_1_end):
- tzcntl %eax, %eax
+ /* Use bsf to save code size. This is necessary to have
+ L(one_or_less) fit in aligning bytes between. */
+ bsfl %eax, %eax
addl %edx, %eax
# ifdef USE_AS_WMEMCMP
movl -(VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %ecx
# endif
ret
-
- .p2align 4
-L(page_cross_less_vec):
- /* if USE_AS_WMEMCMP it can only be 0, 4, 8, 12, 16, 20, 24, 28
- bytes. */
- cmpl $(16 / CHAR_SIZE), %edx
- jae L(between_16_31)
-# ifndef USE_AS_WMEMCMP
- cmpl $8, %edx
- jae L(between_8_15)
- cmpl $4, %edx
- jae L(between_4_7)
-L(between_2_3):
- /* Load as big endian to avoid branches. */
- movzwl (%rdi), %eax
- movzwl (%rsi), %ecx
- shll $8, %eax
- shll $8, %ecx
- bswap %eax
- bswap %ecx
- movzbl -1(%rdi, %rdx), %edi
- movzbl -1(%rsi, %rdx), %esi
- orl %edi, %eax
- orl %esi, %ecx
- /* Subtraction is okay because the upper 8 bits are zero. */
- subl %ecx, %eax
- ret
- .p2align 4
-L(one_or_less):
- jb L(zero)
- movzbl (%rsi), %ecx
- movzbl (%rdi), %eax
- subl %ecx, %eax
- ret
-
- .p2align 4
-L(between_8_15):
-# endif
- /* If USE_AS_WMEMCMP fall through into 8-15 byte case. */
- vmovq (%rdi), %XMM1
- vmovq (%rsi), %XMM2
- VPCMP $4, %XMM1, %XMM2, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_0)
- /* Use overlapping loads to avoid branches. */
- leaq -8(%rdi, %rdx, CHAR_SIZE), %rdi
- leaq -8(%rsi, %rdx, CHAR_SIZE), %rsi
- vmovq (%rdi), %XMM1
- vmovq (%rsi), %XMM2
- VPCMP $4, %XMM1, %XMM2, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_0)
- ret
-
- .p2align 4
-L(zero):
- xorl %eax, %eax
- ret
-
- .p2align 4
-L(between_16_31):
- /* From 16 to 31 bytes. No branch when size == 16. */
- VMOVU (%rsi), %XMM2
- VPCMP $4, (%rdi), %XMM2, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_0)
-
- /* Use overlapping loads to avoid branches. */
-
- VMOVU -16(%rsi, %rdx, CHAR_SIZE), %XMM2
- leaq -16(%rdi, %rdx, CHAR_SIZE), %rdi
- leaq -16(%rsi, %rdx, CHAR_SIZE), %rsi
- VPCMP $4, (%rdi), %XMM2, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(return_vec_0)
- ret
-
+ /* Don't align. Takes 2-fetch blocks either way and aligning
+ will cause code to spill into another cacheline. */
+L(return_vec_0_end):
+ tzcntl %eax, %eax
+ addl %edx, %eax
# ifdef USE_AS_WMEMCMP
- .p2align 4
-L(one_or_less):
- jb L(zero)
- movl (%rdi), %ecx
+ movl -VEC_SIZE(%rdi, %rax, CHAR_SIZE), %ecx
xorl %edx, %edx
- cmpl (%rsi), %ecx
- je L(zero)
+ cmpl -VEC_SIZE(%rsi, %rax, CHAR_SIZE), %ecx
setg %dl
leal -1(%rdx, %rdx), %eax
- ret
# else
-
- .p2align 4
-L(between_4_7):
- /* Load as big endian with overlapping movbe to avoid branches.
- */
- movbe (%rdi), %eax
- movbe (%rsi), %ecx
- shlq $32, %rax
- shlq $32, %rcx
- movbe -4(%rdi, %rdx), %edi
- movbe -4(%rsi, %rdx), %esi
- orq %rdi, %rax
- orq %rsi, %rcx
- subq %rcx, %rax
- jz L(zero_4_7)
- sbbl %eax, %eax
- orl $1, %eax
-L(zero_4_7):
- ret
+ movzbl -VEC_SIZE(%rsi, %rax), %ecx
+ movzbl -VEC_SIZE(%rdi, %rax), %eax
+ subl %ecx, %eax
# endif
+ ret
+ /* 1-byte until next cache line. */
END (MEMCMP)
#endif
+++ /dev/null
-/* memcmp with SSE4.1, wmemcmp with SSE4.1
- Copyright (C) 2010-2021 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-
-# ifndef MEMCMP
-# define MEMCMP __memcmp_sse4_1
-# endif
-
-# define JMPTBL(I, B) (I - B)
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- lea TABLE(%rip), %r11; \
- movslq (%r11, INDEX, SCALE), %rcx; \
- add %r11, %rcx; \
- _CET_NOTRACK jmp *%rcx; \
- ud2
-
-/* Warning!
- wmemcmp has to use SIGNED comparison for elements.
- memcmp has to use UNSIGNED comparison for elemnts.
-*/
-
- .section .text.sse4.1,"ax",@progbits
-ENTRY (MEMCMP)
-# ifdef USE_AS_WMEMCMP
- shl $2, %RDX_LP
-# elif defined __ILP32__
- /* Clear the upper 32 bits. */
- mov %edx, %edx
-# endif
- pxor %xmm0, %xmm0
- cmp $79, %RDX_LP
- ja L(79bytesormore)
-# ifndef USE_AS_WMEMCMP
- cmp $1, %RDX_LP
- je L(firstbyte)
-# endif
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-# ifndef USE_AS_WMEMCMP
- .p2align 4
-L(firstbyte):
- movzbl (%rdi), %eax
- movzbl (%rsi), %ecx
- sub %ecx, %eax
- ret
-# endif
-
- .p2align 4
-L(79bytesormore):
- movdqu (%rsi), %xmm1
- movdqu (%rdi), %xmm2
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
- mov %rsi, %rcx
- and $-16, %rsi
- add $16, %rsi
- sub %rsi, %rcx
-
- sub %rcx, %rdi
- add %rcx, %rdx
- test $0xf, %rdi
- jz L(2aligned)
-
- cmp $128, %rdx
- ja L(128bytesormore)
-L(less128bytes):
- sub $64, %rdx
-
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqu 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqu 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
- cmp $32, %rdx
- jb L(less32bytesin64)
-
- movdqu 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqu 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin64):
- add $64, %rdi
- add $64, %rsi
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-L(128bytesormore):
- cmp $512, %rdx
- ja L(512bytesormore)
- cmp $256, %rdx
- ja L(less512bytes)
-L(less256bytes):
- sub $128, %rdx
-
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqu 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqu 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
-
- movdqu 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqu 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
-
- movdqu 96(%rdi), %xmm2
- pxor 96(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(112bytesin256)
-
- movdqu 112(%rdi), %xmm2
- pxor 112(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(128bytesin256)
-
- add $128, %rsi
- add $128, %rdi
-
- cmp $64, %rdx
- jae L(less128bytes)
-
- cmp $32, %rdx
- jb L(less32bytesin128)
-
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin128):
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-L(less512bytes):
- sub $256, %rdx
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqu 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqu 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
-
- movdqu 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqu 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
-
- movdqu 96(%rdi), %xmm2
- pxor 96(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(112bytesin256)
-
- movdqu 112(%rdi), %xmm2
- pxor 112(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(128bytesin256)
-
- movdqu 128(%rdi), %xmm2
- pxor 128(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(144bytesin256)
-
- movdqu 144(%rdi), %xmm2
- pxor 144(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(160bytesin256)
-
- movdqu 160(%rdi), %xmm2
- pxor 160(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(176bytesin256)
-
- movdqu 176(%rdi), %xmm2
- pxor 176(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(192bytesin256)
-
- movdqu 192(%rdi), %xmm2
- pxor 192(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(208bytesin256)
-
- movdqu 208(%rdi), %xmm2
- pxor 208(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(224bytesin256)
-
- movdqu 224(%rdi), %xmm2
- pxor 224(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(240bytesin256)
-
- movdqu 240(%rdi), %xmm2
- pxor 240(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(256bytesin256)
-
- add $256, %rsi
- add $256, %rdi
-
- cmp $128, %rdx
- jae L(less256bytes)
-
- cmp $64, %rdx
- jae L(less128bytes)
-
- cmp $32, %rdx
- jb L(less32bytesin256)
-
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin256):
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
- .p2align 4
-L(512bytesormore):
-# ifdef DATA_CACHE_SIZE_HALF
- mov $DATA_CACHE_SIZE_HALF, %R8_LP
-# else
- mov __x86_data_cache_size_half(%rip), %R8_LP
-# endif
- mov %r8, %r9
- shr $1, %r8
- add %r9, %r8
- cmp %r8, %rdx
- ja L(L2_L3_cache_unaglined)
- sub $64, %rdx
- .p2align 4
-L(64bytesormore_loop):
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- movdqa %xmm2, %xmm1
-
- movdqu 16(%rdi), %xmm3
- pxor 16(%rsi), %xmm3
- por %xmm3, %xmm1
-
- movdqu 32(%rdi), %xmm4
- pxor 32(%rsi), %xmm4
- por %xmm4, %xmm1
-
- movdqu 48(%rdi), %xmm5
- pxor 48(%rsi), %xmm5
- por %xmm5, %xmm1
-
- ptest %xmm1, %xmm0
- jnc L(64bytesormore_loop_end)
- add $64, %rsi
- add $64, %rdi
- sub $64, %rdx
- jae L(64bytesormore_loop)
-
- add $64, %rdx
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-L(L2_L3_cache_unaglined):
- sub $64, %rdx
- .p2align 4
-L(L2_L3_unaligned_128bytes_loop):
- prefetchnta 0x1c0(%rdi)
- prefetchnta 0x1c0(%rsi)
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- movdqa %xmm2, %xmm1
-
- movdqu 16(%rdi), %xmm3
- pxor 16(%rsi), %xmm3
- por %xmm3, %xmm1
-
- movdqu 32(%rdi), %xmm4
- pxor 32(%rsi), %xmm4
- por %xmm4, %xmm1
-
- movdqu 48(%rdi), %xmm5
- pxor 48(%rsi), %xmm5
- por %xmm5, %xmm1
-
- ptest %xmm1, %xmm0
- jnc L(64bytesormore_loop_end)
- add $64, %rsi
- add $64, %rdi
- sub $64, %rdx
- jae L(L2_L3_unaligned_128bytes_loop)
-
- add $64, %rdx
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-/*
- * This case is for machines which are sensitive for unaligned instructions.
- */
- .p2align 4
-L(2aligned):
- cmp $128, %rdx
- ja L(128bytesormorein2aligned)
-L(less128bytesin2aligned):
- sub $64, %rdx
-
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqa 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqa 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqa 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
- cmp $32, %rdx
- jb L(less32bytesin64in2alinged)
-
- movdqa 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqa 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin64in2alinged):
- add $64, %rdi
- add $64, %rsi
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
- .p2align 4
-L(128bytesormorein2aligned):
- cmp $512, %rdx
- ja L(512bytesormorein2aligned)
- cmp $256, %rdx
- ja L(256bytesormorein2aligned)
-L(less256bytesin2alinged):
- sub $128, %rdx
-
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqa 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqa 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqa 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
-
- movdqa 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqa 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
-
- movdqa 96(%rdi), %xmm2
- pxor 96(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(112bytesin256)
-
- movdqa 112(%rdi), %xmm2
- pxor 112(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(128bytesin256)
-
- add $128, %rsi
- add $128, %rdi
-
- cmp $64, %rdx
- jae L(less128bytesin2aligned)
-
- cmp $32, %rdx
- jb L(less32bytesin128in2aligned)
-
- movdqu (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqu 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin128in2aligned):
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
- .p2align 4
-L(256bytesormorein2aligned):
-
- sub $256, %rdx
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqa 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
-
- movdqa 32(%rdi), %xmm2
- pxor 32(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(48bytesin256)
-
- movdqa 48(%rdi), %xmm2
- pxor 48(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(64bytesin256)
-
- movdqa 64(%rdi), %xmm2
- pxor 64(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(80bytesin256)
-
- movdqa 80(%rdi), %xmm2
- pxor 80(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(96bytesin256)
-
- movdqa 96(%rdi), %xmm2
- pxor 96(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(112bytesin256)
-
- movdqa 112(%rdi), %xmm2
- pxor 112(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(128bytesin256)
-
- movdqa 128(%rdi), %xmm2
- pxor 128(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(144bytesin256)
-
- movdqa 144(%rdi), %xmm2
- pxor 144(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(160bytesin256)
-
- movdqa 160(%rdi), %xmm2
- pxor 160(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(176bytesin256)
-
- movdqa 176(%rdi), %xmm2
- pxor 176(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(192bytesin256)
-
- movdqa 192(%rdi), %xmm2
- pxor 192(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(208bytesin256)
-
- movdqa 208(%rdi), %xmm2
- pxor 208(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(224bytesin256)
-
- movdqa 224(%rdi), %xmm2
- pxor 224(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(240bytesin256)
-
- movdqa 240(%rdi), %xmm2
- pxor 240(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(256bytesin256)
-
- add $256, %rsi
- add $256, %rdi
-
- cmp $128, %rdx
- jae L(less256bytesin2alinged)
-
- cmp $64, %rdx
- jae L(less128bytesin2aligned)
-
- cmp $32, %rdx
- jb L(less32bytesin256in2alinged)
-
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(16bytesin256)
-
- movdqa 16(%rdi), %xmm2
- pxor 16(%rsi), %xmm2
- ptest %xmm2, %xmm0
- jnc L(32bytesin256)
- sub $32, %rdx
- add $32, %rdi
- add $32, %rsi
-L(less32bytesin256in2alinged):
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
- .p2align 4
-L(512bytesormorein2aligned):
-# ifdef DATA_CACHE_SIZE_HALF
- mov $DATA_CACHE_SIZE_HALF, %R8_LP
-# else
- mov __x86_data_cache_size_half(%rip), %R8_LP
-# endif
- mov %r8, %r9
- shr $1, %r8
- add %r9, %r8
- cmp %r8, %rdx
- ja L(L2_L3_cache_aglined)
-
- sub $64, %rdx
- .p2align 4
-L(64bytesormore_loopin2aligned):
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- movdqa %xmm2, %xmm1
-
- movdqa 16(%rdi), %xmm3
- pxor 16(%rsi), %xmm3
- por %xmm3, %xmm1
-
- movdqa 32(%rdi), %xmm4
- pxor 32(%rsi), %xmm4
- por %xmm4, %xmm1
-
- movdqa 48(%rdi), %xmm5
- pxor 48(%rsi), %xmm5
- por %xmm5, %xmm1
-
- ptest %xmm1, %xmm0
- jnc L(64bytesormore_loop_end)
- add $64, %rsi
- add $64, %rdi
- sub $64, %rdx
- jae L(64bytesormore_loopin2aligned)
-
- add $64, %rdx
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-L(L2_L3_cache_aglined):
- sub $64, %rdx
-
- .p2align 4
-L(L2_L3_aligned_128bytes_loop):
- prefetchnta 0x1c0(%rdi)
- prefetchnta 0x1c0(%rsi)
- movdqa (%rdi), %xmm2
- pxor (%rsi), %xmm2
- movdqa %xmm2, %xmm1
-
- movdqa 16(%rdi), %xmm3
- pxor 16(%rsi), %xmm3
- por %xmm3, %xmm1
-
- movdqa 32(%rdi), %xmm4
- pxor 32(%rsi), %xmm4
- por %xmm4, %xmm1
-
- movdqa 48(%rdi), %xmm5
- pxor 48(%rsi), %xmm5
- por %xmm5, %xmm1
-
- ptest %xmm1, %xmm0
- jnc L(64bytesormore_loop_end)
- add $64, %rsi
- add $64, %rdi
- sub $64, %rdx
- jae L(L2_L3_aligned_128bytes_loop)
-
- add $64, %rdx
- add %rdx, %rsi
- add %rdx, %rdi
- BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4)
-
-
- .p2align 4
-L(64bytesormore_loop_end):
- add $16, %rdi
- add $16, %rsi
- ptest %xmm2, %xmm0
- jnc L(16bytes)
-
- add $16, %rdi
- add $16, %rsi
- ptest %xmm3, %xmm0
- jnc L(16bytes)
-
- add $16, %rdi
- add $16, %rsi
- ptest %xmm4, %xmm0
- jnc L(16bytes)
-
- add $16, %rdi
- add $16, %rsi
- jmp L(16bytes)
-
-L(256bytesin256):
- add $256, %rdi
- add $256, %rsi
- jmp L(16bytes)
-L(240bytesin256):
- add $240, %rdi
- add $240, %rsi
- jmp L(16bytes)
-L(224bytesin256):
- add $224, %rdi
- add $224, %rsi
- jmp L(16bytes)
-L(208bytesin256):
- add $208, %rdi
- add $208, %rsi
- jmp L(16bytes)
-L(192bytesin256):
- add $192, %rdi
- add $192, %rsi
- jmp L(16bytes)
-L(176bytesin256):
- add $176, %rdi
- add $176, %rsi
- jmp L(16bytes)
-L(160bytesin256):
- add $160, %rdi
- add $160, %rsi
- jmp L(16bytes)
-L(144bytesin256):
- add $144, %rdi
- add $144, %rsi
- jmp L(16bytes)
-L(128bytesin256):
- add $128, %rdi
- add $128, %rsi
- jmp L(16bytes)
-L(112bytesin256):
- add $112, %rdi
- add $112, %rsi
- jmp L(16bytes)
-L(96bytesin256):
- add $96, %rdi
- add $96, %rsi
- jmp L(16bytes)
-L(80bytesin256):
- add $80, %rdi
- add $80, %rsi
- jmp L(16bytes)
-L(64bytesin256):
- add $64, %rdi
- add $64, %rsi
- jmp L(16bytes)
-L(48bytesin256):
- add $16, %rdi
- add $16, %rsi
-L(32bytesin256):
- add $16, %rdi
- add $16, %rsi
-L(16bytesin256):
- add $16, %rdi
- add $16, %rsi
-L(16bytes):
- mov -16(%rdi), %rax
- mov -16(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-L(8bytes):
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(12bytes):
- mov -12(%rdi), %rax
- mov -12(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-L(4bytes):
- mov -4(%rsi), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%rdi), %eax
- cmp %eax, %ecx
-# else
- cmp -4(%rdi), %ecx
-# endif
- jne L(diffin4bytes)
-L(0bytes):
- xor %eax, %eax
- ret
-
-# ifndef USE_AS_WMEMCMP
-/* unreal case for wmemcmp */
- .p2align 4
-L(65bytes):
- movdqu -65(%rdi), %xmm1
- movdqu -65(%rsi), %xmm2
- mov $-65, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(49bytes):
- movdqu -49(%rdi), %xmm1
- movdqu -49(%rsi), %xmm2
- mov $-49, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(33bytes):
- movdqu -33(%rdi), %xmm1
- movdqu -33(%rsi), %xmm2
- mov $-33, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(17bytes):
- mov -17(%rdi), %rax
- mov -17(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-L(9bytes):
- mov -9(%rdi), %rax
- mov -9(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- movzbl -1(%rdi), %eax
- movzbl -1(%rsi), %edx
- sub %edx, %eax
- ret
-
- .p2align 4
-L(13bytes):
- mov -13(%rdi), %rax
- mov -13(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(5bytes):
- mov -5(%rdi), %eax
- mov -5(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
- movzbl -1(%rdi), %eax
- movzbl -1(%rsi), %edx
- sub %edx, %eax
- ret
-
- .p2align 4
-L(66bytes):
- movdqu -66(%rdi), %xmm1
- movdqu -66(%rsi), %xmm2
- mov $-66, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(50bytes):
- movdqu -50(%rdi), %xmm1
- movdqu -50(%rsi), %xmm2
- mov $-50, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(34bytes):
- movdqu -34(%rdi), %xmm1
- movdqu -34(%rsi), %xmm2
- mov $-34, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(18bytes):
- mov -18(%rdi), %rax
- mov -18(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-L(10bytes):
- mov -10(%rdi), %rax
- mov -10(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- movzwl -2(%rdi), %eax
- movzwl -2(%rsi), %ecx
- cmp %cl, %al
- jne L(end)
- and $0xffff, %eax
- and $0xffff, %ecx
- sub %ecx, %eax
- ret
-
- .p2align 4
-L(14bytes):
- mov -14(%rdi), %rax
- mov -14(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(6bytes):
- mov -6(%rdi), %eax
- mov -6(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
-L(2bytes):
- movzwl -2(%rsi), %ecx
- movzwl -2(%rdi), %eax
- cmp %cl, %al
- jne L(end)
- and $0xffff, %eax
- and $0xffff, %ecx
- sub %ecx, %eax
- ret
-
- .p2align 4
-L(67bytes):
- movdqu -67(%rdi), %xmm2
- movdqu -67(%rsi), %xmm1
- mov $-67, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(51bytes):
- movdqu -51(%rdi), %xmm2
- movdqu -51(%rsi), %xmm1
- mov $-51, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(35bytes):
- movdqu -35(%rsi), %xmm1
- movdqu -35(%rdi), %xmm2
- mov $-35, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(19bytes):
- mov -19(%rdi), %rax
- mov -19(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-L(11bytes):
- mov -11(%rdi), %rax
- mov -11(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -4(%rdi), %eax
- mov -4(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(15bytes):
- mov -15(%rdi), %rax
- mov -15(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(7bytes):
- mov -7(%rdi), %eax
- mov -7(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
- mov -4(%rdi), %eax
- mov -4(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(3bytes):
- movzwl -3(%rdi), %eax
- movzwl -3(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin2bytes)
-L(1bytes):
- movzbl -1(%rdi), %eax
- movzbl -1(%rsi), %ecx
- sub %ecx, %eax
- ret
-# endif
-
- .p2align 4
-L(68bytes):
- movdqu -68(%rdi), %xmm2
- movdqu -68(%rsi), %xmm1
- mov $-68, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(52bytes):
- movdqu -52(%rdi), %xmm2
- movdqu -52(%rsi), %xmm1
- mov $-52, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(36bytes):
- movdqu -36(%rdi), %xmm2
- movdqu -36(%rsi), %xmm1
- mov $-36, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(20bytes):
- movdqu -20(%rdi), %xmm2
- movdqu -20(%rsi), %xmm1
- mov $-20, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -4(%rsi), %ecx
-
-# ifndef USE_AS_WMEMCMP
- mov -4(%rdi), %eax
- cmp %eax, %ecx
-# else
- cmp -4(%rdi), %ecx
-# endif
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-
-# ifndef USE_AS_WMEMCMP
-/* unreal cases for wmemcmp */
- .p2align 4
-L(69bytes):
- movdqu -69(%rsi), %xmm1
- movdqu -69(%rdi), %xmm2
- mov $-69, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(53bytes):
- movdqu -53(%rsi), %xmm1
- movdqu -53(%rdi), %xmm2
- mov $-53, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(37bytes):
- movdqu -37(%rsi), %xmm1
- movdqu -37(%rdi), %xmm2
- mov $-37, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(21bytes):
- movdqu -21(%rsi), %xmm1
- movdqu -21(%rdi), %xmm2
- mov $-21, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(70bytes):
- movdqu -70(%rsi), %xmm1
- movdqu -70(%rdi), %xmm2
- mov $-70, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(54bytes):
- movdqu -54(%rsi), %xmm1
- movdqu -54(%rdi), %xmm2
- mov $-54, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(38bytes):
- movdqu -38(%rsi), %xmm1
- movdqu -38(%rdi), %xmm2
- mov $-38, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(22bytes):
- movdqu -22(%rsi), %xmm1
- movdqu -22(%rdi), %xmm2
- mov $-22, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(71bytes):
- movdqu -71(%rsi), %xmm1
- movdqu -71(%rdi), %xmm2
- mov $-71, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(55bytes):
- movdqu -55(%rdi), %xmm2
- movdqu -55(%rsi), %xmm1
- mov $-55, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(39bytes):
- movdqu -39(%rdi), %xmm2
- movdqu -39(%rsi), %xmm1
- mov $-39, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(23bytes):
- movdqu -23(%rdi), %xmm2
- movdqu -23(%rsi), %xmm1
- mov $-23, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-# endif
-
- .p2align 4
-L(72bytes):
- movdqu -72(%rsi), %xmm1
- movdqu -72(%rdi), %xmm2
- mov $-72, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(56bytes):
- movdqu -56(%rdi), %xmm2
- movdqu -56(%rsi), %xmm1
- mov $-56, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(40bytes):
- movdqu -40(%rdi), %xmm2
- movdqu -40(%rsi), %xmm1
- mov $-40, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(24bytes):
- movdqu -24(%rdi), %xmm2
- movdqu -24(%rsi), %xmm1
- mov $-24, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -8(%rsi), %rcx
- mov -8(%rdi), %rax
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
-# ifndef USE_AS_WMEMCMP
-/* unreal cases for wmemcmp */
- .p2align 4
-L(73bytes):
- movdqu -73(%rsi), %xmm1
- movdqu -73(%rdi), %xmm2
- mov $-73, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(57bytes):
- movdqu -57(%rdi), %xmm2
- movdqu -57(%rsi), %xmm1
- mov $-57, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(41bytes):
- movdqu -41(%rdi), %xmm2
- movdqu -41(%rsi), %xmm1
- mov $-41, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(25bytes):
- movdqu -25(%rdi), %xmm2
- movdqu -25(%rsi), %xmm1
- mov $-25, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -9(%rdi), %rax
- mov -9(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- movzbl -1(%rdi), %eax
- movzbl -1(%rsi), %ecx
- sub %ecx, %eax
- ret
-
- .p2align 4
-L(74bytes):
- movdqu -74(%rsi), %xmm1
- movdqu -74(%rdi), %xmm2
- mov $-74, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(58bytes):
- movdqu -58(%rdi), %xmm2
- movdqu -58(%rsi), %xmm1
- mov $-58, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(42bytes):
- movdqu -42(%rdi), %xmm2
- movdqu -42(%rsi), %xmm1
- mov $-42, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(26bytes):
- movdqu -26(%rdi), %xmm2
- movdqu -26(%rsi), %xmm1
- mov $-26, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -10(%rdi), %rax
- mov -10(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- movzwl -2(%rdi), %eax
- movzwl -2(%rsi), %ecx
- jmp L(diffin2bytes)
-
- .p2align 4
-L(75bytes):
- movdqu -75(%rsi), %xmm1
- movdqu -75(%rdi), %xmm2
- mov $-75, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(59bytes):
- movdqu -59(%rdi), %xmm2
- movdqu -59(%rsi), %xmm1
- mov $-59, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(43bytes):
- movdqu -43(%rdi), %xmm2
- movdqu -43(%rsi), %xmm1
- mov $-43, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(27bytes):
- movdqu -27(%rdi), %xmm2
- movdqu -27(%rsi), %xmm1
- mov $-27, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -11(%rdi), %rax
- mov -11(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -4(%rdi), %eax
- mov -4(%rsi), %ecx
- cmp %eax, %ecx
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-# endif
- .p2align 4
-L(76bytes):
- movdqu -76(%rsi), %xmm1
- movdqu -76(%rdi), %xmm2
- mov $-76, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(60bytes):
- movdqu -60(%rdi), %xmm2
- movdqu -60(%rsi), %xmm1
- mov $-60, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(44bytes):
- movdqu -44(%rdi), %xmm2
- movdqu -44(%rsi), %xmm1
- mov $-44, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(28bytes):
- movdqu -28(%rdi), %xmm2
- movdqu -28(%rsi), %xmm1
- mov $-28, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -12(%rdi), %rax
- mov -12(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -4(%rsi), %ecx
-# ifndef USE_AS_WMEMCMP
- mov -4(%rdi), %eax
- cmp %eax, %ecx
-# else
- cmp -4(%rdi), %ecx
-# endif
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-
-# ifndef USE_AS_WMEMCMP
-/* unreal cases for wmemcmp */
- .p2align 4
-L(77bytes):
- movdqu -77(%rsi), %xmm1
- movdqu -77(%rdi), %xmm2
- mov $-77, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(61bytes):
- movdqu -61(%rdi), %xmm2
- movdqu -61(%rsi), %xmm1
- mov $-61, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(45bytes):
- movdqu -45(%rdi), %xmm2
- movdqu -45(%rsi), %xmm1
- mov $-45, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(29bytes):
- movdqu -29(%rdi), %xmm2
- movdqu -29(%rsi), %xmm1
- mov $-29, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -13(%rdi), %rax
- mov -13(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(78bytes):
- movdqu -78(%rsi), %xmm1
- movdqu -78(%rdi), %xmm2
- mov $-78, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(62bytes):
- movdqu -62(%rdi), %xmm2
- movdqu -62(%rsi), %xmm1
- mov $-62, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(46bytes):
- movdqu -46(%rdi), %xmm2
- movdqu -46(%rsi), %xmm1
- mov $-46, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(30bytes):
- movdqu -30(%rdi), %xmm2
- movdqu -30(%rsi), %xmm1
- mov $-30, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -14(%rdi), %rax
- mov -14(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
- .p2align 4
-L(79bytes):
- movdqu -79(%rsi), %xmm1
- movdqu -79(%rdi), %xmm2
- mov $-79, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(63bytes):
- movdqu -63(%rdi), %xmm2
- movdqu -63(%rsi), %xmm1
- mov $-63, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(47bytes):
- movdqu -47(%rdi), %xmm2
- movdqu -47(%rsi), %xmm1
- mov $-47, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(31bytes):
- movdqu -31(%rdi), %xmm2
- movdqu -31(%rsi), %xmm1
- mov $-31, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
- mov -15(%rdi), %rax
- mov -15(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-# endif
- .p2align 4
-L(64bytes):
- movdqu -64(%rdi), %xmm2
- movdqu -64(%rsi), %xmm1
- mov $-64, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(48bytes):
- movdqu -48(%rdi), %xmm2
- movdqu -48(%rsi), %xmm1
- mov $-48, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-L(32bytes):
- movdqu -32(%rdi), %xmm2
- movdqu -32(%rsi), %xmm1
- mov $-32, %dl
- pxor %xmm1, %xmm2
- ptest %xmm2, %xmm0
- jnc L(less16bytes)
-
- mov -16(%rdi), %rax
- mov -16(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
-
- mov -8(%rdi), %rax
- mov -8(%rsi), %rcx
- cmp %rax, %rcx
- jne L(diffin8bytes)
- xor %eax, %eax
- ret
-
-/*
- * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block.
- */
- .p2align 3
-L(less16bytes):
- movsbq %dl, %rdx
- mov (%rsi, %rdx), %rcx
- mov (%rdi, %rdx), %rax
- cmp %rax, %rcx
- jne L(diffin8bytes)
- mov 8(%rsi, %rdx), %rcx
- mov 8(%rdi, %rdx), %rax
-L(diffin8bytes):
- cmp %eax, %ecx
- jne L(diffin4bytes)
- shr $32, %rcx
- shr $32, %rax
-
-# ifdef USE_AS_WMEMCMP
-/* for wmemcmp */
- cmp %eax, %ecx
- jne L(diffin4bytes)
- xor %eax, %eax
- ret
-# endif
-
-L(diffin4bytes):
-# ifndef USE_AS_WMEMCMP
- cmp %cx, %ax
- jne L(diffin2bytes)
- shr $16, %ecx
- shr $16, %eax
-L(diffin2bytes):
- cmp %cl, %al
- jne L(end)
- and $0xffff, %eax
- and $0xffff, %ecx
- sub %ecx, %eax
- ret
-
- .p2align 4
-L(end):
- and $0xff, %eax
- and $0xff, %ecx
- sub %ecx, %eax
- ret
-# else
-
-/* for wmemcmp */
- mov $1, %eax
- jl L(nequal_bigger)
- neg %eax
- ret
-
- .p2align 4
-L(nequal_bigger):
- ret
-
-L(unreal_case):
- xor %eax, %eax
- ret
-# endif
-
-END (MEMCMP)
-
- .section .rodata.sse4.1,"a",@progbits
- .p2align 3
-# ifndef USE_AS_WMEMCMP
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(1bytes), L(table_64bytes))
- .int JMPTBL (L(2bytes), L(table_64bytes))
- .int JMPTBL (L(3bytes), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(5bytes), L(table_64bytes))
- .int JMPTBL (L(6bytes), L(table_64bytes))
- .int JMPTBL (L(7bytes), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(9bytes), L(table_64bytes))
- .int JMPTBL (L(10bytes), L(table_64bytes))
- .int JMPTBL (L(11bytes), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(13bytes), L(table_64bytes))
- .int JMPTBL (L(14bytes), L(table_64bytes))
- .int JMPTBL (L(15bytes), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(17bytes), L(table_64bytes))
- .int JMPTBL (L(18bytes), L(table_64bytes))
- .int JMPTBL (L(19bytes), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(21bytes), L(table_64bytes))
- .int JMPTBL (L(22bytes), L(table_64bytes))
- .int JMPTBL (L(23bytes), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(25bytes), L(table_64bytes))
- .int JMPTBL (L(26bytes), L(table_64bytes))
- .int JMPTBL (L(27bytes), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(29bytes), L(table_64bytes))
- .int JMPTBL (L(30bytes), L(table_64bytes))
- .int JMPTBL (L(31bytes), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(33bytes), L(table_64bytes))
- .int JMPTBL (L(34bytes), L(table_64bytes))
- .int JMPTBL (L(35bytes), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(37bytes), L(table_64bytes))
- .int JMPTBL (L(38bytes), L(table_64bytes))
- .int JMPTBL (L(39bytes), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(41bytes), L(table_64bytes))
- .int JMPTBL (L(42bytes), L(table_64bytes))
- .int JMPTBL (L(43bytes), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(45bytes), L(table_64bytes))
- .int JMPTBL (L(46bytes), L(table_64bytes))
- .int JMPTBL (L(47bytes), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(49bytes), L(table_64bytes))
- .int JMPTBL (L(50bytes), L(table_64bytes))
- .int JMPTBL (L(51bytes), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(53bytes), L(table_64bytes))
- .int JMPTBL (L(54bytes), L(table_64bytes))
- .int JMPTBL (L(55bytes), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(57bytes), L(table_64bytes))
- .int JMPTBL (L(58bytes), L(table_64bytes))
- .int JMPTBL (L(59bytes), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(61bytes), L(table_64bytes))
- .int JMPTBL (L(62bytes), L(table_64bytes))
- .int JMPTBL (L(63bytes), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
- .int JMPTBL (L(65bytes), L(table_64bytes))
- .int JMPTBL (L(66bytes), L(table_64bytes))
- .int JMPTBL (L(67bytes), L(table_64bytes))
- .int JMPTBL (L(68bytes), L(table_64bytes))
- .int JMPTBL (L(69bytes), L(table_64bytes))
- .int JMPTBL (L(70bytes), L(table_64bytes))
- .int JMPTBL (L(71bytes), L(table_64bytes))
- .int JMPTBL (L(72bytes), L(table_64bytes))
- .int JMPTBL (L(73bytes), L(table_64bytes))
- .int JMPTBL (L(74bytes), L(table_64bytes))
- .int JMPTBL (L(75bytes), L(table_64bytes))
- .int JMPTBL (L(76bytes), L(table_64bytes))
- .int JMPTBL (L(77bytes), L(table_64bytes))
- .int JMPTBL (L(78bytes), L(table_64bytes))
- .int JMPTBL (L(79bytes), L(table_64bytes))
-# else
-L(table_64bytes):
- .int JMPTBL (L(0bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(4bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(8bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(12bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(16bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(20bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(24bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(28bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(32bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(36bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(40bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(44bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(48bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(52bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(56bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(60bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(64bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(68bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(72bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(76bytes), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
- .int JMPTBL (L(unreal_case), L(table_64bytes))
-# endif
-#endif
# define VMOVNT vmovntdq
# define VMOVU vmovdqu
# define VMOVA vmovdqa
-
+# define MOV_SIZE 4
# define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
# define VMOVNT vmovntdq
# define VMOVU vmovdqu
# define VMOVA vmovdqa
-
+# define MOV_SIZE 4
# define SECTION(p) p##.avx
# define MEMMOVE_SYMBOL(p,s) p##_avx_##s
# define VMOVU vmovdqu64
# define VMOVA vmovdqa64
# define VZEROUPPER
-
+# define MOV_SIZE 6
# define SECTION(p) p##.evex512
# define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
--- /dev/null
+/* memcpy/mempcpy/memmove implement with rep movsb
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+ .text
+ENTRY (__mempcpy_chk_erms)
+ cmp %RDX_LP, %RCX_LP
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_erms)
+
+/* Only used to measure performance of REP MOVSB. */
+ENTRY (__mempcpy_erms)
+ mov %RDI_LP, %RAX_LP
+ /* Skip zero length. */
+ test %RDX_LP, %RDX_LP
+ jz 2f
+ add %RDX_LP, %RAX_LP
+ jmp L(start_movsb)
+END (__mempcpy_erms)
+
+ENTRY (__memmove_chk_erms)
+ cmp %RDX_LP, %RCX_LP
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_erms)
+
+ENTRY (__memmove_erms)
+ movq %rdi, %rax
+ /* Skip zero length. */
+ test %RDX_LP, %RDX_LP
+ jz 2f
+L(start_movsb):
+ mov %RDX_LP, %RCX_LP
+ cmp %RSI_LP, %RDI_LP
+ jb 1f
+ /* Source == destination is less common. */
+ je 2f
+ lea (%rsi,%rcx), %RDX_LP
+ cmp %RDX_LP, %RDI_LP
+ jb L(movsb_backward)
+1:
+ rep movsb
+2:
+ ret
+L(movsb_backward):
+ leaq -1(%rdi,%rcx), %rdi
+ leaq -1(%rsi,%rcx), %rsi
+ std
+ rep movsb
+ cld
+ ret
+END (__memmove_erms)
+strong_alias (__memmove_erms, __memcpy_erms)
+strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+#endif
# define VMOVU vmovdqu64
# define VMOVA vmovdqa64
# define VZEROUPPER
-
+# define MOV_SIZE 6
# define SECTION(p) p##.evex
# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
# endif
#endif
+/* Whether to align before movsb. Ultimately we want 64 byte
+ align and not worth it to load 4x VEC for VEC_SIZE == 16. */
+#define ALIGN_MOVSB (VEC_SIZE > 16)
+/* Number of bytes to align movsb to. */
+#define MOVSB_ALIGN_TO 64
+
+#define SMALL_MOV_SIZE (MOV_SIZE <= 4)
+#define LARGE_MOV_SIZE (MOV_SIZE > 4)
+
+#if SMALL_MOV_SIZE + LARGE_MOV_SIZE != 1
+# error MOV_SIZE Unknown
+#endif
+
+#if LARGE_MOV_SIZE
+# define SMALL_SIZE_OFFSET (4)
+#else
+# define SMALL_SIZE_OFFSET (0)
+#endif
+
#ifndef PAGE_SIZE
# define PAGE_SIZE 4096
#endif
# define LARGE_LOAD_SIZE (VEC_SIZE * 4)
#endif
-/* Amount to shift rdx by to compare for memcpy_large_4x. */
+/* Amount to shift __x86_shared_non_temporal_threshold by for
+ bound for memcpy_large_4x. This is essentially use to to
+ indicate that the copy is far beyond the scope of L3
+ (assuming no user config x86_non_temporal_threshold) and to
+ use a more aggressively unrolled loop. NB: before
+ increasing the value also update initialization of
+ x86_non_temporal_threshold. */
#ifndef LOG_4X_MEMCPY_THRESH
# define LOG_4X_MEMCPY_THRESH 4
#endif
# endif
cmp $VEC_SIZE, %RDX_LP
jb L(less_vec)
+ /* Load regardless. */
+ VMOVU (%rsi), %VEC(0)
cmp $(VEC_SIZE * 2), %RDX_LP
ja L(more_2x_vec)
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-L(last_2x_vec):
-#endif
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
- VMOVU (%rsi), %VEC(0)
VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
VMOVU %VEC(0), (%rdi)
VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-L(nop):
- ret
+#if !(defined USE_MULTIARCH && IS_IN (libc))
+ ZERO_UPPER_VEC_REGISTERS_RETURN
#else
VZEROUPPER_RETURN
#endif
#if defined USE_MULTIARCH && IS_IN (libc)
END (MEMMOVE_SYMBOL (__memmove, unaligned))
-# if VEC_SIZE == 16
-ENTRY (__mempcpy_chk_erms)
- cmp %RDX_LP, %RCX_LP
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__mempcpy_chk_erms)
-
-/* Only used to measure performance of REP MOVSB. */
-ENTRY (__mempcpy_erms)
- mov %RDI_LP, %RAX_LP
- /* Skip zero length. */
- test %RDX_LP, %RDX_LP
- jz 2f
- add %RDX_LP, %RAX_LP
- jmp L(start_movsb)
-END (__mempcpy_erms)
-
-ENTRY (__memmove_chk_erms)
- cmp %RDX_LP, %RCX_LP
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memmove_chk_erms)
-
-ENTRY (__memmove_erms)
- movq %rdi, %rax
- /* Skip zero length. */
- test %RDX_LP, %RDX_LP
- jz 2f
-L(start_movsb):
- mov %RDX_LP, %RCX_LP
- cmp %RSI_LP, %RDI_LP
- jb 1f
- /* Source == destination is less common. */
- je 2f
- lea (%rsi,%rcx), %RDX_LP
- cmp %RDX_LP, %RDI_LP
- jb L(movsb_backward)
-1:
- rep movsb
-2:
- ret
-L(movsb_backward):
- leaq -1(%rdi,%rcx), %rdi
- leaq -1(%rsi,%rcx), %rsi
- std
- rep movsb
- cld
- ret
-END (__memmove_erms)
-strong_alias (__memmove_erms, __memcpy_erms)
-strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
-# endif
-
# ifdef SHARED
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
cmp %RDX_LP, %RCX_LP
END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
# endif
-ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+ENTRY_P2ALIGN (MEMMOVE_SYMBOL (__memmove, unaligned_erms), 6)
movq %rdi, %rax
L(start_erms):
# ifdef __ILP32__
# endif
cmp $VEC_SIZE, %RDX_LP
jb L(less_vec)
+ /* Load regardless. */
+ VMOVU (%rsi), %VEC(0)
cmp $(VEC_SIZE * 2), %RDX_LP
ja L(movsb_more_2x_vec)
-L(last_2x_vec):
- /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
- VMOVU (%rsi), %VEC(0)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE.
+ */
+ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(1)
VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi, %rdx)
L(return):
-#if VEC_SIZE > 16
+# if VEC_SIZE > 16
ZERO_UPPER_VEC_REGISTERS_RETURN
-#else
+# else
ret
+# endif
#endif
-L(movsb):
- cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP
- jae L(more_8x_vec)
- cmpq %rsi, %rdi
- jb 1f
- /* Source == destination is less common. */
- je L(nop)
- leaq (%rsi,%rdx), %r9
- cmpq %r9, %rdi
- /* Avoid slow backward REP MOVSB. */
- jb L(more_8x_vec_backward)
-# if AVOID_SHORT_DISTANCE_REP_MOVSB
- andl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
- jz 3f
- movq %rdi, %rcx
- subq %rsi, %rcx
- jmp 2f
-# endif
-1:
-# if AVOID_SHORT_DISTANCE_REP_MOVSB
- andl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
- jz 3f
- movq %rsi, %rcx
- subq %rdi, %rcx
-2:
-/* Avoid "rep movsb" if RCX, the distance between source and destination,
- is N*4GB + [1..63] with N >= 0. */
- cmpl $63, %ecx
- jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
-3:
-# endif
- mov %RDX_LP, %RCX_LP
- rep movsb
-L(nop):
+#if LARGE_MOV_SIZE
+ /* If LARGE_MOV_SIZE this fits in the aligning bytes between the
+ ENTRY block and L(less_vec). */
+ .p2align 4,, 8
+L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl (%rsi), %ecx
+ movl (%rsi, %rdx), %esi
+ movl %ecx, (%rdi)
+ movl %esi, (%rdi, %rdx)
ret
#endif
+ .p2align 4
L(less_vec):
/* Less than 1 VEC. */
#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
# error Unsupported VEC_SIZE!
#endif
#if VEC_SIZE > 32
- cmpb $32, %dl
+ cmpl $32, %edx
jae L(between_32_63)
#endif
#if VEC_SIZE > 16
- cmpb $16, %dl
+ cmpl $16, %edx
jae L(between_16_31)
#endif
- cmpb $8, %dl
+ cmpl $8, %edx
jae L(between_8_15)
- cmpb $4, %dl
+#if SMALL_MOV_SIZE
+ cmpl $4, %edx
+#else
+ subq $4, %rdx
+#endif
jae L(between_4_7)
- cmpb $1, %dl
- ja L(between_2_3)
- jb 1f
- movzbl (%rsi), %ecx
+ cmpl $(1 - SMALL_SIZE_OFFSET), %edx
+ jl L(copy_0)
+ movb (%rsi), %cl
+ je L(copy_1)
+ movzwl (-2 + SMALL_SIZE_OFFSET)(%rsi, %rdx), %esi
+ movw %si, (-2 + SMALL_SIZE_OFFSET)(%rdi, %rdx)
+L(copy_1):
movb %cl, (%rdi)
-1:
+L(copy_0):
+ ret
+
+#if SMALL_MOV_SIZE
+ .p2align 4,, 8
+L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl -4(%rsi, %rdx), %ecx
+ movl (%rsi), %esi
+ movl %ecx, -4(%rdi, %rdx)
+ movl %esi, (%rdi)
+ ret
+#endif
+
+#if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+ .p2align 4,, 8
+L(between_16_31):
+ vmovdqu (%rsi), %xmm0
+ vmovdqu -16(%rsi, %rdx), %xmm1
+ vmovdqu %xmm0, (%rdi)
+ vmovdqu %xmm1, -16(%rdi, %rdx)
+ /* No ymm registers have been touched. */
ret
+#endif
+
#if VEC_SIZE > 32
+ .p2align 4,, 10
L(between_32_63):
/* From 32 to 63. No branch when size == 32. */
VMOVU (%rsi), %YMM0
- VMOVU -32(%rsi,%rdx), %YMM1
+ VMOVU -32(%rsi, %rdx), %YMM1
VMOVU %YMM0, (%rdi)
- VMOVU %YMM1, -32(%rdi,%rdx)
- VZEROUPPER_RETURN
-#endif
-#if VEC_SIZE > 16
- /* From 16 to 31. No branch when size == 16. */
-L(between_16_31):
- VMOVU (%rsi), %XMM0
- VMOVU -16(%rsi,%rdx), %XMM1
- VMOVU %XMM0, (%rdi)
- VMOVU %XMM1, -16(%rdi,%rdx)
+ VMOVU %YMM1, -32(%rdi, %rdx)
VZEROUPPER_RETURN
#endif
+
+ .p2align 4,, 10
L(between_8_15):
/* From 8 to 15. No branch when size == 8. */
- movq -8(%rsi,%rdx), %rcx
+ movq -8(%rsi, %rdx), %rcx
movq (%rsi), %rsi
- movq %rcx, -8(%rdi,%rdx)
movq %rsi, (%rdi)
- ret
-L(between_4_7):
- /* From 4 to 7. No branch when size == 4. */
- movl -4(%rsi,%rdx), %ecx
- movl (%rsi), %esi
- movl %ecx, -4(%rdi,%rdx)
- movl %esi, (%rdi)
- ret
-L(between_2_3):
- /* From 2 to 3. No branch when size == 2. */
- movzwl -2(%rsi,%rdx), %ecx
- movzwl (%rsi), %esi
- movw %cx, -2(%rdi,%rdx)
- movw %si, (%rdi)
+ movq %rcx, -8(%rdi, %rdx)
ret
+ .p2align 4,, 10
+L(last_4x_vec):
+ /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */
+
+ /* VEC(0) and VEC(1) have already been loaded. */
+ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(2)
+ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(3)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+ VMOVU %VEC(2), -VEC_SIZE(%rdi, %rdx)
+ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi, %rdx)
+ VZEROUPPER_RETURN
+
+ .p2align 4
#if defined USE_MULTIARCH && IS_IN (libc)
L(movsb_more_2x_vec):
cmp __x86_rep_movsb_threshold(%rip), %RDX_LP
ja L(movsb)
#endif
L(more_2x_vec):
- /* More than 2 * VEC and there may be overlap between destination
- and source. */
+ /* More than 2 * VEC and there may be overlap between
+ destination and source. */
cmpq $(VEC_SIZE * 8), %rdx
ja L(more_8x_vec)
+ /* Load VEC(1) regardless. VEC(0) has already been loaded. */
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
cmpq $(VEC_SIZE * 4), %rdx
jbe L(last_4x_vec)
- /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
+ /* Copy from 4 * VEC + 1 to 8 * VEC, inclusively. */
VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(4)
- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5)
- VMOVU -(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6)
- VMOVU -(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7)
+ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(4)
+ VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(5)
+ VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(6)
+ VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(7)
VMOVU %VEC(0), (%rdi)
VMOVU %VEC(1), VEC_SIZE(%rdi)
VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi)
VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi)
- VMOVU %VEC(4), -VEC_SIZE(%rdi,%rdx)
- VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
- VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
- VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
- VZEROUPPER_RETURN
-L(last_4x_vec):
- /* Copy from 2 * VEC + 1 to 4 * VEC, inclusively. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(2)
- VMOVU -(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(1), VEC_SIZE(%rdi)
- VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
- VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VMOVU %VEC(4), -VEC_SIZE(%rdi, %rdx)
+ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi, %rdx)
+ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi, %rdx)
+ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi, %rdx)
VZEROUPPER_RETURN
+ .p2align 4,, 4
L(more_8x_vec):
+ movq %rdi, %rcx
+ subq %rsi, %rcx
+ /* Go to backwards temporal copy if overlap no matter what as
+ backward REP MOVSB is slow and we don't want to use NT stores if
+ there is overlap. */
+ cmpq %rdx, %rcx
+ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */
+ jb L(more_8x_vec_backward_check_nop)
/* Check if non-temporal move candidate. */
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
/* Check non-temporal store threshold. */
- cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
ja L(large_memcpy_2x)
#endif
- /* Entry if rdx is greater than non-temporal threshold but there
- is overlap. */
+ /* To reach this point there cannot be overlap and dst > src. So
+ check for overlap and src > dst in which case correctness
+ requires forward copy. Otherwise decide between backward/forward
+ copy depending on address aliasing. */
+
+ /* Entry if rdx is greater than __x86_rep_movsb_stop_threshold
+ but less than __x86_shared_non_temporal_threshold. */
L(more_8x_vec_check):
- cmpq %rsi, %rdi
- ja L(more_8x_vec_backward)
- /* Source == destination is less common. */
- je L(nop)
- /* Load the first VEC and last 4 * VEC to support overlapping
- addresses. */
- VMOVU (%rsi), %VEC(4)
+ /* rcx contains dst - src. Add back length (rdx). */
+ leaq (%rcx, %rdx), %r8
+ /* If r8 has different sign than rcx then there is overlap so we
+ must do forward copy. */
+ xorq %rcx, %r8
+ /* Isolate just sign bit of r8. */
+ shrq $63, %r8
+ /* Get 4k difference dst - src. */
+ andl $(PAGE_SIZE - 256), %ecx
+ /* If r8 is non-zero must do foward for correctness. Otherwise
+ if ecx is non-zero there is 4k False Alaising so do backward
+ copy. */
+ addl %r8d, %ecx
+ jz L(more_8x_vec_backward)
+
+ /* if rdx is greater than __x86_shared_non_temporal_threshold
+ but there is overlap, or from short distance movsb. */
+L(more_8x_vec_forward):
+ /* Load first and last 4 * VEC to support overlapping addresses.
+ */
+
+ /* First vec was already loaded into VEC(0). */
VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(5)
VMOVU -(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
+ /* Save begining of dst. */
+ movq %rdi, %rcx
+ /* Align dst to VEC_SIZE - 1. */
+ orq $(VEC_SIZE - 1), %rdi
VMOVU -(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
VMOVU -(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
- /* Save start and stop of the destination buffer. */
- movq %rdi, %r11
- leaq -VEC_SIZE(%rdi, %rdx), %rcx
- /* Align destination for aligned stores in the loop. Compute
- how much destination is misaligned. */
- movq %rdi, %r8
- andq $(VEC_SIZE - 1), %r8
- /* Get the negative of offset for alignment. */
- subq $VEC_SIZE, %r8
- /* Adjust source. */
- subq %r8, %rsi
- /* Adjust destination which should be aligned now. */
- subq %r8, %rdi
- /* Adjust length. */
- addq %r8, %rdx
- .p2align 4
+ /* Subtract dst from src. Add back after dst aligned. */
+ subq %rcx, %rsi
+ /* Finish aligning dst. */
+ incq %rdi
+ /* Restore src adjusted with new value for aligned dst. */
+ addq %rdi, %rsi
+ /* Store end of buffer minus tail in rdx. */
+ leaq (VEC_SIZE * -4)(%rcx, %rdx), %rdx
+
+ /* Dont use multi-byte nop to align. */
+ .p2align 4,, 11
L(loop_4x_vec_forward):
/* Copy 4 * VEC a time forward. */
- VMOVU (%rsi), %VEC(0)
- VMOVU VEC_SIZE(%rsi), %VEC(1)
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
+ VMOVU (%rsi), %VEC(1)
+ VMOVU VEC_SIZE(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(3)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(4)
subq $-(VEC_SIZE * 4), %rsi
- addq $-(VEC_SIZE * 4), %rdx
- VMOVA %VEC(0), (%rdi)
- VMOVA %VEC(1), VEC_SIZE(%rdi)
- VMOVA %VEC(2), (VEC_SIZE * 2)(%rdi)
- VMOVA %VEC(3), (VEC_SIZE * 3)(%rdi)
+ VMOVA %VEC(1), (%rdi)
+ VMOVA %VEC(2), VEC_SIZE(%rdi)
+ VMOVA %VEC(3), (VEC_SIZE * 2)(%rdi)
+ VMOVA %VEC(4), (VEC_SIZE * 3)(%rdi)
subq $-(VEC_SIZE * 4), %rdi
- cmpq $(VEC_SIZE * 4), %rdx
+ cmpq %rdi, %rdx
ja L(loop_4x_vec_forward)
/* Store the last 4 * VEC. */
- VMOVU %VEC(5), (%rcx)
- VMOVU %VEC(6), -VEC_SIZE(%rcx)
- VMOVU %VEC(7), -(VEC_SIZE * 2)(%rcx)
- VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ VMOVU %VEC(5), (VEC_SIZE * 3)(%rdx)
+ VMOVU %VEC(6), (VEC_SIZE * 2)(%rdx)
+ VMOVU %VEC(7), VEC_SIZE(%rdx)
+ VMOVU %VEC(8), (%rdx)
/* Store the first VEC. */
- VMOVU %VEC(4), (%r11)
+ VMOVU %VEC(0), (%rcx)
+ /* Keep L(nop_backward) target close to jmp for 2-byte encoding.
+ */
+L(nop_backward):
VZEROUPPER_RETURN
+ .p2align 4,, 8
+L(more_8x_vec_backward_check_nop):
+ /* rcx contains dst - src. Test for dst == src to skip all of
+ memmove. */
+ testq %rcx, %rcx
+ jz L(nop_backward)
L(more_8x_vec_backward):
/* Load the first 4 * VEC and last VEC to support overlapping
addresses. */
- VMOVU (%rsi), %VEC(4)
+
+ /* First vec was also loaded into VEC(0). */
VMOVU VEC_SIZE(%rsi), %VEC(5)
VMOVU (VEC_SIZE * 2)(%rsi), %VEC(6)
+ /* Begining of region for 4x backward copy stored in rcx. */
+ leaq (VEC_SIZE * -4 + -1)(%rdi, %rdx), %rcx
VMOVU (VEC_SIZE * 3)(%rsi), %VEC(7)
- VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(8)
- /* Save stop of the destination buffer. */
- leaq -VEC_SIZE(%rdi, %rdx), %r11
- /* Align destination end for aligned stores in the loop. Compute
- how much destination end is misaligned. */
- leaq -VEC_SIZE(%rsi, %rdx), %rcx
- movq %r11, %r9
- movq %r11, %r8
- andq $(VEC_SIZE - 1), %r8
- /* Adjust source. */
- subq %r8, %rcx
- /* Adjust the end of destination which should be aligned now. */
- subq %r8, %r9
- /* Adjust length. */
- subq %r8, %rdx
-
- .p2align 4
+ VMOVU -VEC_SIZE(%rsi, %rdx), %VEC(8)
+ /* Subtract dst from src. Add back after dst aligned. */
+ subq %rdi, %rsi
+ /* Align dst. */
+ andq $-(VEC_SIZE), %rcx
+ /* Restore src. */
+ addq %rcx, %rsi
+
+ /* Don't use multi-byte nop to align. */
+ .p2align 4,, 11
L(loop_4x_vec_backward):
/* Copy 4 * VEC a time backward. */
- VMOVU (%rcx), %VEC(0)
- VMOVU -VEC_SIZE(%rcx), %VEC(1)
- VMOVU -(VEC_SIZE * 2)(%rcx), %VEC(2)
- VMOVU -(VEC_SIZE * 3)(%rcx), %VEC(3)
- addq $-(VEC_SIZE * 4), %rcx
- addq $-(VEC_SIZE * 4), %rdx
- VMOVA %VEC(0), (%r9)
- VMOVA %VEC(1), -VEC_SIZE(%r9)
- VMOVA %VEC(2), -(VEC_SIZE * 2)(%r9)
- VMOVA %VEC(3), -(VEC_SIZE * 3)(%r9)
- addq $-(VEC_SIZE * 4), %r9
- cmpq $(VEC_SIZE * 4), %rdx
- ja L(loop_4x_vec_backward)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(1)
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 1)(%rsi), %VEC(3)
+ VMOVU (VEC_SIZE * 0)(%rsi), %VEC(4)
+ addq $(VEC_SIZE * -4), %rsi
+ VMOVA %VEC(1), (VEC_SIZE * 3)(%rcx)
+ VMOVA %VEC(2), (VEC_SIZE * 2)(%rcx)
+ VMOVA %VEC(3), (VEC_SIZE * 1)(%rcx)
+ VMOVA %VEC(4), (VEC_SIZE * 0)(%rcx)
+ addq $(VEC_SIZE * -4), %rcx
+ cmpq %rcx, %rdi
+ jb L(loop_4x_vec_backward)
/* Store the first 4 * VEC. */
- VMOVU %VEC(4), (%rdi)
+ VMOVU %VEC(0), (%rdi)
VMOVU %VEC(5), VEC_SIZE(%rdi)
VMOVU %VEC(6), (VEC_SIZE * 2)(%rdi)
VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
/* Store the last VEC. */
- VMOVU %VEC(8), (%r11)
+ VMOVU %VEC(8), -VEC_SIZE(%rdx, %rdi)
VZEROUPPER_RETURN
+#if defined USE_MULTIARCH && IS_IN (libc)
+ /* L(skip_short_movsb_check) is only used with ERMS. Not for
+ FSRM. */
+ .p2align 5,, 16
+# if ALIGN_MOVSB
+L(skip_short_movsb_check):
+# if MOVSB_ALIGN_TO > VEC_SIZE
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+# endif
+# if MOVSB_ALIGN_TO > (VEC_SIZE * 2)
+# error Unsupported MOVSB_ALIGN_TO
+# endif
+ /* If CPU does not have FSRM two options for aligning. Align src
+ if dst and src 4k alias. Otherwise align dst. */
+ testl $(PAGE_SIZE - 512), %ecx
+ jnz L(movsb_align_dst)
+ /* Fall through. dst and src 4k alias. It's better to align src
+ here because the bottleneck will be loads dues to the false
+ dependency on dst. */
+
+ /* rcx already has dst - src. */
+ movq %rcx, %r9
+ /* Add src to len. Subtract back after src aligned. -1 because
+ src is initially aligned to MOVSB_ALIGN_TO - 1. */
+ leaq -1(%rsi, %rdx), %rcx
+ /* Inclusively align src to MOVSB_ALIGN_TO - 1. */
+ orq $(MOVSB_ALIGN_TO - 1), %rsi
+ /* Restore dst and len adjusted with new values for aligned dst.
+ */
+ leaq 1(%rsi, %r9), %rdi
+ subq %rsi, %rcx
+ /* Finish aligning src. */
+ incq %rsi
+
+ rep movsb
+
+ VMOVU %VEC(0), (%r8)
+# if MOVSB_ALIGN_TO > VEC_SIZE
+ VMOVU %VEC(1), VEC_SIZE(%r8)
+# endif
+ VZEROUPPER_RETURN
+# endif
+
+ .p2align 4,, 12
+L(movsb):
+ movq %rdi, %rcx
+ subq %rsi, %rcx
+ /* Go to backwards temporal copy if overlap no matter what as
+ backward REP MOVSB is slow and we don't want to use NT stores if
+ there is overlap. */
+ cmpq %rdx, %rcx
+ /* L(more_8x_vec_backward_check_nop) checks for src == dst. */
+ jb L(more_8x_vec_backward_check_nop)
+# if ALIGN_MOVSB
+ /* Save dest for storing aligning VECs later. */
+ movq %rdi, %r8
+# endif
+ /* If above __x86_rep_movsb_stop_threshold most likely is
+ candidate for NT moves aswell. */
+ cmp __x86_rep_movsb_stop_threshold(%rip), %RDX_LP
+ jae L(large_memcpy_2x_check)
+# if AVOID_SHORT_DISTANCE_REP_MOVSB || ALIGN_MOVSB
+ /* Only avoid short movsb if CPU has FSRM. */
+ testl $X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB, __x86_string_control(%rip)
+ jz L(skip_short_movsb_check)
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
+ /* Avoid "rep movsb" if RCX, the distance between source and
+ destination, is N*4GB + [1..63] with N >= 0. */
+
+ /* ecx contains dst - src. Early check for backward copy
+ conditions means only case of slow movsb with src = dst + [0,
+ 63] is ecx in [-63, 0]. Use unsigned comparison with -64 check
+ for that case. */
+ cmpl $-64, %ecx
+ ja L(more_8x_vec_forward)
+# endif
+# endif
+# if ALIGN_MOVSB
+# if MOVSB_ALIGN_TO > VEC_SIZE
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+# endif
+# if MOVSB_ALIGN_TO > (VEC_SIZE * 2)
+# error Unsupported MOVSB_ALIGN_TO
+# endif
+ /* Fall through means cpu has FSRM. In that case exclusively
+ align destination. */
+L(movsb_align_dst):
+ /* Subtract dst from src. Add back after dst aligned. */
+ subq %rdi, %rsi
+ /* Exclusively align dst to MOVSB_ALIGN_TO (64). */
+ addq $(MOVSB_ALIGN_TO - 1), %rdi
+ /* Add dst to len. Subtract back after dst aligned. */
+ leaq (%r8, %rdx), %rcx
+ /* Finish aligning dst. */
+ andq $-(MOVSB_ALIGN_TO), %rdi
+ /* Restore src and len adjusted with new values for aligned dst.
+ */
+ addq %rdi, %rsi
+ subq %rdi, %rcx
+
+ rep movsb
+
+ /* Store VECs loaded for aligning. */
+ VMOVU %VEC(0), (%r8)
+# if MOVSB_ALIGN_TO > VEC_SIZE
+ VMOVU %VEC(1), VEC_SIZE(%r8)
+# endif
+ VZEROUPPER_RETURN
+# else /* !ALIGN_MOVSB. */
+L(skip_short_movsb_check):
+ mov %RDX_LP, %RCX_LP
+ rep movsb
+ ret
+# endif
+#endif
+
+ .p2align 4,, 10
#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
- .p2align 4
+L(large_memcpy_2x_check):
+ /* Entry from L(large_memcpy_2x) has a redundant load of
+ __x86_shared_non_temporal_threshold(%rip). L(large_memcpy_2x)
+ is only use for the non-erms memmove which is generally less
+ common. */
L(large_memcpy_2x):
- /* Compute absolute value of difference between source and
- destination. */
- movq %rdi, %r9
- subq %rsi, %r9
- movq %r9, %r8
- leaq -1(%r9), %rcx
- sarq $63, %r8
- xorq %r8, %r9
- subq %r8, %r9
- /* Don't use non-temporal store if there is overlap between
- destination and source since destination may be in cache when
- source is loaded. */
- cmpq %r9, %rdx
- ja L(more_8x_vec_check)
+ mov __x86_shared_non_temporal_threshold(%rip), %R11_LP
+ cmp %R11_LP, %RDX_LP
+ jb L(more_8x_vec_check)
+ /* To reach this point it is impossible for dst > src and
+ overlap. Remaining to check is src > dst and overlap. rcx
+ already contains dst - src. Negate rcx to get src - dst. If
+ length > rcx then there is overlap and forward copy is best. */
+ negq %rcx
+ cmpq %rcx, %rdx
+ ja L(more_8x_vec_forward)
/* Cache align destination. First store the first 64 bytes then
adjust alignments. */
- VMOVU (%rsi), %VEC(8)
-#if VEC_SIZE < 64
- VMOVU VEC_SIZE(%rsi), %VEC(9)
-#if VEC_SIZE < 32
- VMOVU (VEC_SIZE * 2)(%rsi), %VEC(10)
- VMOVU (VEC_SIZE * 3)(%rsi), %VEC(11)
-#endif
-#endif
- VMOVU %VEC(8), (%rdi)
-#if VEC_SIZE < 64
- VMOVU %VEC(9), VEC_SIZE(%rdi)
-#if VEC_SIZE < 32
- VMOVU %VEC(10), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(11), (VEC_SIZE * 3)(%rdi)
-#endif
-#endif
+
+ /* First vec was also loaded into VEC(0). */
+# if VEC_SIZE < 64
+ VMOVU VEC_SIZE(%rsi), %VEC(1)
+# if VEC_SIZE < 32
+ VMOVU (VEC_SIZE * 2)(%rsi), %VEC(2)
+ VMOVU (VEC_SIZE * 3)(%rsi), %VEC(3)
+# endif
+# endif
+ VMOVU %VEC(0), (%rdi)
+# if VEC_SIZE < 64
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+# if VEC_SIZE < 32
+ VMOVU %VEC(2), (VEC_SIZE * 2)(%rdi)
+ VMOVU %VEC(3), (VEC_SIZE * 3)(%rdi)
+# endif
+# endif
+
/* Adjust source, destination, and size. */
movq %rdi, %r8
andq $63, %r8
/* Adjust length. */
addq %r8, %rdx
- /* Test if source and destination addresses will alias. If they do
- the larger pipeline in large_memcpy_4x alleviated the
+ /* Test if source and destination addresses will alias. If they
+ do the larger pipeline in large_memcpy_4x alleviated the
performance drop. */
+
+ /* ecx contains -(dst - src). not ecx will return dst - src - 1
+ which works for testing aliasing. */
+ notl %ecx
+ movq %rdx, %r10
testl $(PAGE_SIZE - VEC_SIZE * 8), %ecx
jz L(large_memcpy_4x)
- movq %rdx, %r10
- shrq $LOG_4X_MEMCPY_THRESH, %r10
- cmp __x86_shared_non_temporal_threshold(%rip), %r10
+ /* r11 has __x86_shared_non_temporal_threshold. Shift it left
+ by LOG_4X_MEMCPY_THRESH to get L(large_memcpy_4x) threshold.
+ */
+ shlq $LOG_4X_MEMCPY_THRESH, %r11
+ cmp %r11, %rdx
jae L(large_memcpy_4x)
/* edx will store remainder size for copying tail. */
andl $(PAGE_SIZE * 2 - 1), %edx
/* r10 stores outer loop counter. */
- shrq $((LOG_PAGE_SIZE + 1) - LOG_4X_MEMCPY_THRESH), %r10
+ shrq $(LOG_PAGE_SIZE + 1), %r10
/* Copy 4x VEC at a time from 2 pages. */
.p2align 4
L(loop_large_memcpy_2x_outer):
.p2align 4
L(large_memcpy_4x):
- movq %rdx, %r10
/* edx will store remainder size for copying tail. */
andl $(PAGE_SIZE * 4 - 1), %edx
/* r10 stores outer loop counter. */
/* ecx stores inner loop counter. */
movl $(PAGE_SIZE / LARGE_LOAD_SIZE), %ecx
L(loop_large_memcpy_4x_inner):
- /* Only one prefetch set per page as doing 4 pages give more time
- for prefetcher to keep up. */
+ /* Only one prefetch set per page as doing 4 pages give more
+ time for prefetcher to keep up. */
PREFETCH_ONE_SET(1, (%rsi), PREFETCHED_LOAD_SIZE)
PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE + PREFETCHED_LOAD_SIZE)
PREFETCH_ONE_SET(1, (%rsi), PAGE_SIZE * 2 + PREFETCHED_LOAD_SIZE)
# define MEMRCHR __memrchr_avx2_rtm
#endif
+#define COND_VZEROUPPER COND_VZEROUPPER_XTEST
#define ZERO_UPPER_VEC_REGISTERS_RETURN \
ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
# include <sysdep.h>
# ifndef MEMRCHR
-# define MEMRCHR __memrchr_avx2
+# define MEMRCHR __memrchr_avx2
# endif
# ifndef VZEROUPPER
-# define VZEROUPPER vzeroupper
+# define VZEROUPPER vzeroupper
# endif
# ifndef SECTION
# define SECTION(p) p##.avx
# endif
-# define VEC_SIZE 32
+# define VEC_SIZE 32
+# define PAGE_SIZE 4096
+ .section SECTION(.text), "ax", @progbits
+ENTRY_P2ALIGN(MEMRCHR, 6)
+# ifdef __ILP32__
+ /* Clear upper bits. */
+ and %RDX_LP, %RDX_LP
+# else
+ test %RDX_LP, %RDX_LP
+# endif
+ jz L(zero_0)
- .section SECTION(.text),"ax",@progbits
-ENTRY (MEMRCHR)
- /* Broadcast CHAR to YMM0. */
vmovd %esi, %xmm0
- vpbroadcastb %xmm0, %ymm0
-
- sub $VEC_SIZE, %RDX_LP
- jbe L(last_vec_or_less)
-
- add %RDX_LP, %RDI_LP
-
- /* Check the last VEC_SIZE bytes. */
- vpcmpeqb (%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
- testl %eax, %eax
- jnz L(last_vec_x0)
+ /* Get end pointer. Minus one for two reasons. 1) It is necessary for a
+ correct page cross check and 2) it correctly sets up end ptr to be
+ subtract by lzcnt aligned. */
+ leaq -1(%rdx, %rdi), %rax
- subq $(VEC_SIZE * 4), %rdi
- movl %edi, %ecx
- andl $(VEC_SIZE - 1), %ecx
- jz L(aligned_more)
+ vpbroadcastb %xmm0, %ymm0
- /* Align data for aligned loads in the loop. */
- addq $VEC_SIZE, %rdi
- addq $VEC_SIZE, %rdx
- andq $-VEC_SIZE, %rdi
- subq %rcx, %rdx
+ /* Check if we can load 1x VEC without cross a page. */
+ testl $(PAGE_SIZE - VEC_SIZE), %eax
+ jz L(page_cross)
+
+ vpcmpeqb -(VEC_SIZE - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ cmpq $VEC_SIZE, %rdx
+ ja L(more_1x_vec)
+
+L(ret_vec_x0_test):
+ /* If ecx is zero (no matches) lzcnt will set it 32 (VEC_SIZE) which
+ will gurantee edx (len) is less than it. */
+ lzcntl %ecx, %ecx
+
+ /* Hoist vzeroupper (not great for RTM) to save code size. This allows
+ all logic for edx (len) <= VEC_SIZE to fit in first cache line. */
+ COND_VZEROUPPER
+ cmpl %ecx, %edx
+ jle L(zero_0)
+ subq %rcx, %rax
+ ret
- .p2align 4
-L(aligned_more):
- subq $(VEC_SIZE * 4), %rdx
- jbe L(last_4x_vec_or_less)
-
- /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
- since data is only aligned to VEC_SIZE. */
- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
-
- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
- vpmovmskb %ymm2, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
-
- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
- vpmovmskb %ymm3, %eax
- testl %eax, %eax
- jnz L(last_vec_x1)
-
- vpcmpeqb (%rdi), %ymm0, %ymm4
- vpmovmskb %ymm4, %eax
- testl %eax, %eax
- jnz L(last_vec_x0)
-
- /* Align data to 4 * VEC_SIZE for loop with fewer branches.
- There are some overlaps with above if data isn't aligned
- to 4 * VEC_SIZE. */
- movl %edi, %ecx
- andl $(VEC_SIZE * 4 - 1), %ecx
- jz L(loop_4x_vec)
-
- addq $(VEC_SIZE * 4), %rdi
- addq $(VEC_SIZE * 4), %rdx
- andq $-(VEC_SIZE * 4), %rdi
- subq %rcx, %rdx
+ /* Fits in aligning bytes of first cache line. */
+L(zero_0):
+ xorl %eax, %eax
+ ret
- .p2align 4
-L(loop_4x_vec):
- /* Compare 4 * VEC at a time forward. */
- subq $(VEC_SIZE * 4), %rdi
- subq $(VEC_SIZE * 4), %rdx
- jbe L(last_4x_vec_or_less)
-
- vmovdqa (%rdi), %ymm1
- vmovdqa VEC_SIZE(%rdi), %ymm2
- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3
- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4
-
- vpcmpeqb %ymm1, %ymm0, %ymm1
- vpcmpeqb %ymm2, %ymm0, %ymm2
- vpcmpeqb %ymm3, %ymm0, %ymm3
- vpcmpeqb %ymm4, %ymm0, %ymm4
-
- vpor %ymm1, %ymm2, %ymm5
- vpor %ymm3, %ymm4, %ymm6
- vpor %ymm5, %ymm6, %ymm5
-
- vpmovmskb %ymm5, %eax
- testl %eax, %eax
- jz L(loop_4x_vec)
-
- /* There is a match. */
- vpmovmskb %ymm4, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
-
- vpmovmskb %ymm3, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
-
- vpmovmskb %ymm2, %eax
- testl %eax, %eax
- jnz L(last_vec_x1)
-
- vpmovmskb %ymm1, %eax
- bsrl %eax, %eax
- addq %rdi, %rax
+ .p2align 4,, 9
+L(ret_vec_x0):
+ lzcntl %ecx, %ecx
+ subq %rcx, %rax
L(return_vzeroupper):
ZERO_UPPER_VEC_REGISTERS_RETURN
- .p2align 4
-L(last_4x_vec_or_less):
- addl $(VEC_SIZE * 4), %edx
- cmpl $(VEC_SIZE * 2), %edx
- jbe L(last_2x_vec)
-
- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
-
- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
- vpmovmskb %ymm2, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
-
- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
- vpmovmskb %ymm3, %eax
- testl %eax, %eax
- jnz L(last_vec_x1_check)
- cmpl $(VEC_SIZE * 3), %edx
- jbe L(zero)
-
- vpcmpeqb (%rdi), %ymm0, %ymm4
- vpmovmskb %ymm4, %eax
- testl %eax, %eax
- jz L(zero)
- bsrl %eax, %eax
- subq $(VEC_SIZE * 4), %rdx
- addq %rax, %rdx
- jl L(zero)
- addq %rdi, %rax
- VZEROUPPER_RETURN
-
- .p2align 4
+ .p2align 4,, 10
+L(more_1x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0)
+
+ /* Align rax (string pointer). */
+ andq $-VEC_SIZE, %rax
+
+ /* Recompute remaining length after aligning. */
+ movq %rax, %rdx
+ /* Need this comparison next no matter what. */
+ vpcmpeqb -(VEC_SIZE)(%rax), %ymm0, %ymm1
+ subq %rdi, %rdx
+ decq %rax
+ vpmovmskb %ymm1, %ecx
+ /* Fall through for short (hotter than length). */
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(more_2x_vec)
L(last_2x_vec):
- vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3_check)
cmpl $VEC_SIZE, %edx
- jbe L(zero)
-
- vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
- testl %eax, %eax
- jz L(zero)
- bsrl %eax, %eax
- subq $(VEC_SIZE * 2), %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $(VEC_SIZE * 2), %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
-
- .p2align 4
-L(last_vec_x0):
- bsrl %eax, %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
+ jbe L(ret_vec_x0_test)
+
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0)
+
+ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ /* 64-bit lzcnt. This will naturally add 32 to position. */
+ lzcntq %rcx, %rcx
+ COND_VZEROUPPER
+ cmpl %ecx, %edx
+ jle L(zero_0)
+ subq %rcx, %rax
+ ret
- .p2align 4
-L(last_vec_x1):
- bsrl %eax, %eax
- addl $VEC_SIZE, %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
- .p2align 4
-L(last_vec_x2):
- bsrl %eax, %eax
- addl $(VEC_SIZE * 2), %eax
- addq %rdi, %rax
+ /* Inexpensive place to put this regarding code size / target alignments
+ / ICache NLP. Necessary for 2-byte encoding of jump to page cross
+ case which in turn is necessary for hot path (len <= VEC_SIZE) to fit
+ in first cache line. */
+L(page_cross):
+ movq %rax, %rsi
+ andq $-VEC_SIZE, %rsi
+ vpcmpeqb (%rsi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ /* Shift out negative alignment (because we are starting from endptr and
+ working backwards). */
+ movl %eax, %r8d
+ /* notl because eax already has endptr - 1. (-x = ~(x - 1)). */
+ notl %r8d
+ shlxl %r8d, %ecx, %ecx
+ cmpq %rdi, %rsi
+ ja L(more_1x_vec)
+ lzcntl %ecx, %ecx
+ COND_VZEROUPPER
+ cmpl %ecx, %edx
+ jle L(zero_0)
+ subq %rcx, %rax
+ ret
+ .p2align 4,, 11
+L(ret_vec_x1):
+ /* This will naturally add 32 to position. */
+ lzcntq %rcx, %rcx
+ subq %rcx, %rax
VZEROUPPER_RETURN
+ .p2align 4,, 10
+L(more_2x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0)
- .p2align 4
-L(last_vec_x3):
- bsrl %eax, %eax
- addl $(VEC_SIZE * 3), %eax
- addq %rdi, %rax
- ret
+ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1)
- .p2align 4
-L(last_vec_x1_check):
- bsrl %eax, %eax
- subq $(VEC_SIZE * 3), %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $VEC_SIZE, %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
- .p2align 4
-L(last_vec_x3_check):
- bsrl %eax, %eax
- subq $VEC_SIZE, %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $(VEC_SIZE * 3), %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
+ /* Needed no matter what. */
+ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
- .p2align 4
-L(zero):
- xorl %eax, %eax
- VZEROUPPER_RETURN
+ subq $(VEC_SIZE * 4), %rdx
+ ja L(more_4x_vec)
+
+ cmpl $(VEC_SIZE * -1), %edx
+ jle L(ret_vec_x2_test)
+
+L(last_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x2)
+
+ /* Needed no matter what. */
+ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 3), %rax
+ COND_VZEROUPPER
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ ja L(zero_2)
+ ret
- .p2align 4
-L(null):
+ /* First in aligning bytes. */
+L(zero_2):
xorl %eax, %eax
ret
- .p2align 4
-L(last_vec_or_less_aligned):
- movl %edx, %ecx
+ .p2align 4,, 4
+L(ret_vec_x2_test):
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 2), %rax
+ COND_VZEROUPPER
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ ja L(zero_2)
+ ret
- vpcmpeqb (%rdi), %ymm0, %ymm1
- movl $1, %edx
- /* Support rdx << 32. */
- salq %cl, %rdx
- subq $1, %rdx
+ .p2align 4,, 11
+L(ret_vec_x2):
+ /* ecx must be non-zero. */
+ bsrl %ecx, %ecx
+ leaq (VEC_SIZE * -3 + 1)(%rcx, %rax), %rax
+ VZEROUPPER_RETURN
- vpmovmskb %ymm1, %eax
+ .p2align 4,, 14
+L(ret_vec_x3):
+ /* ecx must be non-zero. */
+ bsrl %ecx, %ecx
+ leaq (VEC_SIZE * -4 + 1)(%rcx, %rax), %rax
+ VZEROUPPER_RETURN
- /* Remove the trailing bytes. */
- andl %edx, %eax
- testl %eax, %eax
- jz L(zero)
- bsrl %eax, %eax
- addq %rdi, %rax
- VZEROUPPER_RETURN
.p2align 4
-L(last_vec_or_less):
- addl $VEC_SIZE, %edx
+L(more_4x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x2)
- /* Check for zero length. */
- testl %edx, %edx
- jz L(null)
+ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
- movl %edi, %ecx
- andl $(VEC_SIZE - 1), %ecx
- jz L(last_vec_or_less_aligned)
+ testl %ecx, %ecx
+ jnz L(ret_vec_x3)
- movl %ecx, %esi
- movl %ecx, %r8d
- addl %edx, %esi
- andq $-VEC_SIZE, %rdi
+ /* Check if near end before re-aligning (otherwise might do an
+ unnecissary loop iteration). */
+ addq $-(VEC_SIZE * 4), %rax
+ cmpq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec)
- subl $VEC_SIZE, %esi
- ja L(last_vec_2x_aligned)
+ /* Align rax to (VEC_SIZE - 1). */
+ orq $(VEC_SIZE * 4 - 1), %rax
+ movq %rdi, %rdx
+ /* Get endptr for loop in rdx. NB: Can't just do while rax > rdi because
+ lengths that overflow can be valid and break the comparison. */
+ orq $(VEC_SIZE * 4 - 1), %rdx
- /* Check the last VEC. */
- vpcmpeqb (%rdi), %ymm0, %ymm1
- vpmovmskb %ymm1, %eax
-
- /* Remove the leading and trailing bytes. */
- sarl %cl, %eax
- movl %edx, %ecx
+ .p2align 4
+L(loop_4x_vec):
+ /* Need this comparison next no matter what. */
+ vpcmpeqb -(VEC_SIZE * 1 - 1)(%rax), %ymm0, %ymm1
+ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm2
+ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm3
+ vpcmpeqb -(VEC_SIZE * 4 - 1)(%rax), %ymm0, %ymm4
- movl $1, %edx
- sall %cl, %edx
- subl $1, %edx
+ vpor %ymm1, %ymm2, %ymm2
+ vpor %ymm3, %ymm4, %ymm4
+ vpor %ymm2, %ymm4, %ymm4
+ vpmovmskb %ymm4, %esi
- andl %edx, %eax
- testl %eax, %eax
- jz L(zero)
+ testl %esi, %esi
+ jnz L(loop_end)
- bsrl %eax, %eax
- addq %rdi, %rax
- addq %r8, %rax
- VZEROUPPER_RETURN
+ addq $(VEC_SIZE * -4), %rax
+ cmpq %rdx, %rax
+ jne L(loop_4x_vec)
- .p2align 4
-L(last_vec_2x_aligned):
- movl %esi, %ecx
+ subl %edi, %edx
+ incl %edx
- /* Check the last VEC. */
- vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm1
+L(last_4x_vec):
+ /* Used no matter what. */
+ vpcmpeqb -(VEC_SIZE * 1 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
- movl $1, %edx
- sall %cl, %edx
- subl $1, %edx
+ cmpl $(VEC_SIZE * 2), %edx
+ jbe L(last_2x_vec)
- vpmovmskb %ymm1, %eax
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0_end)
- /* Remove the trailing bytes. */
- andl %edx, %eax
+ vpcmpeqb -(VEC_SIZE * 2 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1_end)
- testl %eax, %eax
- jnz L(last_vec_x1)
+ /* Used no matter what. */
+ vpcmpeqb -(VEC_SIZE * 3 - 1)(%rax), %ymm0, %ymm1
+ vpmovmskb %ymm1, %ecx
- /* Check the second last VEC. */
- vpcmpeqb (%rdi), %ymm0, %ymm1
+ cmpl $(VEC_SIZE * 3), %edx
+ ja L(last_vec)
+
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 2), %rax
+ COND_VZEROUPPER
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ jbe L(ret0)
+ xorl %eax, %eax
+L(ret0):
+ ret
- movl %r8d, %ecx
- vpmovmskb %ymm1, %eax
+ .p2align 4
+L(loop_end):
+ vpmovmskb %ymm1, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0_end)
+
+ vpmovmskb %ymm2, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1_end)
+
+ vpmovmskb %ymm3, %ecx
+ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3)
+ then it won't affect the result in esi (VEC4). If ecx is non-zero
+ then CHAR in VEC3 and bsrq will use that position. */
+ salq $32, %rcx
+ orq %rsi, %rcx
+ bsrq %rcx, %rcx
+ leaq (VEC_SIZE * -4 + 1)(%rcx, %rax), %rax
+ VZEROUPPER_RETURN
- /* Remove the leading bytes. Must use unsigned right shift for
- bsrl below. */
- shrl %cl, %eax
- testl %eax, %eax
- jz L(zero)
+ .p2align 4,, 4
+L(ret_vec_x1_end):
+ /* 64-bit version will automatically add 32 (VEC_SIZE). */
+ lzcntq %rcx, %rcx
+ subq %rcx, %rax
+ VZEROUPPER_RETURN
- bsrl %eax, %eax
- addq %rdi, %rax
- addq %r8, %rax
+ .p2align 4,, 4
+L(ret_vec_x0_end):
+ lzcntl %ecx, %ecx
+ subq %rcx, %rax
VZEROUPPER_RETURN
-END (MEMRCHR)
+
+ /* 2 bytes until next cache line. */
+END(MEMRCHR)
#endif
#if IS_IN (libc)
# include <sysdep.h>
+# include "evex256-vecs.h"
+# if VEC_SIZE != 32
+# error "VEC_SIZE != 32 unimplemented"
+# endif
+
+# ifndef MEMRCHR
+# define MEMRCHR __memrchr_evex
+# endif
+
+# define PAGE_SIZE 4096
+# define VECMATCH VEC(0)
+
+ .section SECTION(.text), "ax", @progbits
+ENTRY_P2ALIGN(MEMRCHR, 6)
+# ifdef __ILP32__
+ /* Clear upper bits. */
+ and %RDX_LP, %RDX_LP
+# else
+ test %RDX_LP, %RDX_LP
+# endif
+ jz L(zero_0)
+
+ /* Get end pointer. Minus one for two reasons. 1) It is necessary for a
+ correct page cross check and 2) it correctly sets up end ptr to be
+ subtract by lzcnt aligned. */
+ leaq -1(%rdi, %rdx), %rax
+ vpbroadcastb %esi, %VECMATCH
+
+ /* Check if we can load 1x VEC without cross a page. */
+ testl $(PAGE_SIZE - VEC_SIZE), %eax
+ jz L(page_cross)
+
+ /* Don't use rax for pointer here because EVEX has better encoding with
+ offset % VEC_SIZE == 0. */
+ vpcmpb $0, -(VEC_SIZE)(%rdi, %rdx), %VECMATCH, %k0
+ kmovd %k0, %ecx
+
+ /* Fall through for rdx (len) <= VEC_SIZE (expect small sizes). */
+ cmpq $VEC_SIZE, %rdx
+ ja L(more_1x_vec)
+L(ret_vec_x0_test):
+
+ /* If ecx is zero (no matches) lzcnt will set it 32 (VEC_SIZE) which
+ will guarantee edx (len) is less than it. */
+ lzcntl %ecx, %ecx
+ cmpl %ecx, %edx
+ jle L(zero_0)
+ subq %rcx, %rax
+ ret
-# define VMOVA vmovdqa64
-
-# define YMMMATCH ymm16
-
-# define VEC_SIZE 32
-
- .section .text.evex,"ax",@progbits
-ENTRY (__memrchr_evex)
- /* Broadcast CHAR to YMMMATCH. */
- vpbroadcastb %esi, %YMMMATCH
-
- sub $VEC_SIZE, %RDX_LP
- jbe L(last_vec_or_less)
-
- add %RDX_LP, %RDI_LP
-
- /* Check the last VEC_SIZE bytes. */
- vpcmpb $0, (%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(last_vec_x0)
-
- subq $(VEC_SIZE * 4), %rdi
- movl %edi, %ecx
- andl $(VEC_SIZE - 1), %ecx
- jz L(aligned_more)
-
- /* Align data for aligned loads in the loop. */
- addq $VEC_SIZE, %rdi
- addq $VEC_SIZE, %rdx
- andq $-VEC_SIZE, %rdi
- subq %rcx, %rdx
-
- .p2align 4
-L(aligned_more):
- subq $(VEC_SIZE * 4), %rdx
- jbe L(last_4x_vec_or_less)
-
- /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
- since data is only aligned to VEC_SIZE. */
- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
-
- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
- kmovd %k2, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
-
- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
- kmovd %k3, %eax
- testl %eax, %eax
- jnz L(last_vec_x1)
-
- vpcmpb $0, (%rdi), %YMMMATCH, %k4
- kmovd %k4, %eax
- testl %eax, %eax
- jnz L(last_vec_x0)
-
- /* Align data to 4 * VEC_SIZE for loop with fewer branches.
- There are some overlaps with above if data isn't aligned
- to 4 * VEC_SIZE. */
- movl %edi, %ecx
- andl $(VEC_SIZE * 4 - 1), %ecx
- jz L(loop_4x_vec)
-
- addq $(VEC_SIZE * 4), %rdi
- addq $(VEC_SIZE * 4), %rdx
- andq $-(VEC_SIZE * 4), %rdi
- subq %rcx, %rdx
+ /* Fits in aligning bytes of first cache line. */
+L(zero_0):
+ xorl %eax, %eax
+ ret
- .p2align 4
-L(loop_4x_vec):
- /* Compare 4 * VEC at a time forward. */
- subq $(VEC_SIZE * 4), %rdi
- subq $(VEC_SIZE * 4), %rdx
- jbe L(last_4x_vec_or_less)
-
- vpcmpb $0, (%rdi), %YMMMATCH, %k1
- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2
- kord %k1, %k2, %k5
- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3
- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4
-
- kord %k3, %k4, %k6
- kortestd %k5, %k6
- jz L(loop_4x_vec)
-
- /* There is a match. */
- kmovd %k4, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
-
- kmovd %k3, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
-
- kmovd %k2, %eax
- testl %eax, %eax
- jnz L(last_vec_x1)
-
- kmovd %k1, %eax
- bsrl %eax, %eax
- addq %rdi, %rax
+ .p2align 4,, 9
+L(ret_vec_x0_dec):
+ decq %rax
+L(ret_vec_x0):
+ lzcntl %ecx, %ecx
+ subq %rcx, %rax
ret
- .p2align 4
-L(last_4x_vec_or_less):
- addl $(VEC_SIZE * 4), %edx
- cmpl $(VEC_SIZE * 2), %edx
- jbe L(last_2x_vec)
+ .p2align 4,, 10
+L(more_1x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0)
- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3)
+ /* Align rax (pointer to string). */
+ andq $-VEC_SIZE, %rax
- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
- kmovd %k2, %eax
- testl %eax, %eax
- jnz L(last_vec_x2)
+ /* Recompute length after aligning. */
+ movq %rax, %rdx
- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
- kmovd %k3, %eax
- testl %eax, %eax
- jnz L(last_vec_x1_check)
- cmpl $(VEC_SIZE * 3), %edx
- jbe L(zero)
+ /* Need no matter what. */
+ vpcmpb $0, -(VEC_SIZE)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- vpcmpb $0, (%rdi), %YMMMATCH, %k4
- kmovd %k4, %eax
- testl %eax, %eax
- jz L(zero)
- bsrl %eax, %eax
- subq $(VEC_SIZE * 4), %rdx
- addq %rax, %rdx
- jl L(zero)
- addq %rdi, %rax
- ret
+ subq %rdi, %rdx
- .p2align 4
+ cmpq $(VEC_SIZE * 2), %rdx
+ ja L(more_2x_vec)
L(last_2x_vec):
- vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jnz L(last_vec_x3_check)
+
+ /* Must dec rax because L(ret_vec_x0_test) expects it. */
+ decq %rax
cmpl $VEC_SIZE, %edx
- jbe L(zero)
-
- vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
- testl %eax, %eax
- jz L(zero)
- bsrl %eax, %eax
- subq $(VEC_SIZE * 2), %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $(VEC_SIZE * 2), %eax
- addq %rdi, %rax
+ jbe L(ret_vec_x0_test)
+
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0)
+
+ /* Don't use rax for pointer here because EVEX has better encoding with
+ offset % VEC_SIZE == 0. */
+ vpcmpb $0, -(VEC_SIZE * 2)(%rdi, %rdx), %VECMATCH, %k0
+ kmovd %k0, %ecx
+ /* NB: 64-bit lzcnt. This will naturally add 32 to position. */
+ lzcntq %rcx, %rcx
+ cmpl %ecx, %edx
+ jle L(zero_0)
+ subq %rcx, %rax
ret
- .p2align 4
-L(last_vec_x0):
- bsrl %eax, %eax
- addq %rdi, %rax
+ /* Inexpensive place to put this regarding code size / target alignments
+ / ICache NLP. Necessary for 2-byte encoding of jump to page cross
+ case which in turn is necessary for hot path (len <= VEC_SIZE) to fit
+ in first cache line. */
+L(page_cross):
+ movq %rax, %rsi
+ andq $-VEC_SIZE, %rsi
+ vpcmpb $0, (%rsi), %VECMATCH, %k0
+ kmovd %k0, %r8d
+ /* Shift out negative alignment (because we are starting from endptr and
+ working backwards). */
+ movl %eax, %ecx
+ /* notl because eax already has endptr - 1. (-x = ~(x - 1)). */
+ notl %ecx
+ shlxl %ecx, %r8d, %ecx
+ cmpq %rdi, %rsi
+ ja L(more_1x_vec)
+ lzcntl %ecx, %ecx
+ cmpl %ecx, %edx
+ jle L(zero_1)
+ subq %rcx, %rax
ret
- .p2align 4
-L(last_vec_x1):
- bsrl %eax, %eax
- addl $VEC_SIZE, %eax
- addq %rdi, %rax
+ /* Continue creating zero labels that fit in aligning bytes and get
+ 2-byte encoding / are in the same cache line as condition. */
+L(zero_1):
+ xorl %eax, %eax
ret
- .p2align 4
-L(last_vec_x2):
- bsrl %eax, %eax
- addl $(VEC_SIZE * 2), %eax
- addq %rdi, %rax
+ .p2align 4,, 8
+L(ret_vec_x1):
+ /* This will naturally add 32 to position. */
+ bsrl %ecx, %ecx
+ leaq -(VEC_SIZE * 2)(%rcx, %rax), %rax
ret
- .p2align 4
-L(last_vec_x3):
- bsrl %eax, %eax
- addl $(VEC_SIZE * 3), %eax
- addq %rdi, %rax
- ret
+ .p2align 4,, 8
+L(more_2x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0_dec)
- .p2align 4
-L(last_vec_x1_check):
- bsrl %eax, %eax
- subq $(VEC_SIZE * 3), %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $VEC_SIZE, %eax
- addq %rdi, %rax
- ret
+ vpcmpb $0, -(VEC_SIZE * 2)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1)
- .p2align 4
-L(last_vec_x3_check):
- bsrl %eax, %eax
- subq $VEC_SIZE, %rdx
- addq %rax, %rdx
- jl L(zero)
- addl $(VEC_SIZE * 3), %eax
- addq %rdi, %rax
- ret
+ /* Need no matter what. */
+ vpcmpb $0, -(VEC_SIZE * 3)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- .p2align 4
-L(zero):
- xorl %eax, %eax
+ subq $(VEC_SIZE * 4), %rdx
+ ja L(more_4x_vec)
+
+ cmpl $(VEC_SIZE * -1), %edx
+ jle L(ret_vec_x2_test)
+L(last_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x2)
+
+
+ /* Need no matter what. */
+ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 3 + 1), %rax
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ ja L(zero_1)
ret
- .p2align 4
-L(last_vec_or_less_aligned):
- movl %edx, %ecx
-
- vpcmpb $0, (%rdi), %YMMMATCH, %k1
-
- movl $1, %edx
- /* Support rdx << 32. */
- salq %cl, %rdx
- subq $1, %rdx
-
- kmovd %k1, %eax
-
- /* Remove the trailing bytes. */
- andl %edx, %eax
- testl %eax, %eax
- jz L(zero)
-
- bsrl %eax, %eax
- addq %rdi, %rax
+ .p2align 4,, 8
+L(ret_vec_x2_test):
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 2 + 1), %rax
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ ja L(zero_1)
ret
- .p2align 4
-L(last_vec_or_less):
- addl $VEC_SIZE, %edx
-
- /* Check for zero length. */
- testl %edx, %edx
- jz L(zero)
-
- movl %edi, %ecx
- andl $(VEC_SIZE - 1), %ecx
- jz L(last_vec_or_less_aligned)
-
- movl %ecx, %esi
- movl %ecx, %r8d
- addl %edx, %esi
- andq $-VEC_SIZE, %rdi
+ .p2align 4,, 8
+L(ret_vec_x2):
+ bsrl %ecx, %ecx
+ leaq -(VEC_SIZE * 3)(%rcx, %rax), %rax
+ ret
- subl $VEC_SIZE, %esi
- ja L(last_vec_2x_aligned)
+ .p2align 4,, 8
+L(ret_vec_x3):
+ bsrl %ecx, %ecx
+ leaq -(VEC_SIZE * 4)(%rcx, %rax), %rax
+ ret
- /* Check the last VEC. */
- vpcmpb $0, (%rdi), %YMMMATCH, %k1
- kmovd %k1, %eax
+ .p2align 4,, 8
+L(more_4x_vec):
+ testl %ecx, %ecx
+ jnz L(ret_vec_x2)
- /* Remove the leading and trailing bytes. */
- sarl %cl, %eax
- movl %edx, %ecx
+ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- movl $1, %edx
- sall %cl, %edx
- subl $1, %edx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x3)
- andl %edx, %eax
- testl %eax, %eax
- jz L(zero)
+ /* Check if near end before re-aligning (otherwise might do an
+ unnecessary loop iteration). */
+ addq $-(VEC_SIZE * 4), %rax
+ cmpq $(VEC_SIZE * 4), %rdx
+ jbe L(last_4x_vec)
- bsrl %eax, %eax
- addq %rdi, %rax
- addq %r8, %rax
- ret
+ decq %rax
+ andq $-(VEC_SIZE * 4), %rax
+ movq %rdi, %rdx
+ /* Get endptr for loop in rdx. NB: Can't just do while rax > rdi because
+ lengths that overflow can be valid and break the comparison. */
+ andq $-(VEC_SIZE * 4), %rdx
.p2align 4
-L(last_vec_2x_aligned):
- movl %esi, %ecx
-
- /* Check the last VEC. */
- vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1
+L(loop_4x_vec):
+ /* Store 1 were not-equals and 0 where equals in k1 (used to mask later
+ on). */
+ vpcmpb $4, (VEC_SIZE * 3)(%rax), %VECMATCH, %k1
+
+ /* VEC(2/3) will have zero-byte where we found a CHAR. */
+ vpxorq (VEC_SIZE * 2)(%rax), %VECMATCH, %VEC(2)
+ vpxorq (VEC_SIZE * 1)(%rax), %VECMATCH, %VEC(3)
+ vpcmpb $0, (VEC_SIZE * 0)(%rax), %VECMATCH, %k4
+
+ /* Combine VEC(2/3) with min and maskz with k1 (k1 has zero bit where
+ CHAR is found and VEC(2/3) have zero-byte where CHAR is found. */
+ vpminub %VEC(2), %VEC(3), %VEC(3){%k1}{z}
+ vptestnmb %VEC(3), %VEC(3), %k2
+
+ /* Any 1s and we found CHAR. */
+ kortestd %k2, %k4
+ jnz L(loop_end)
+
+ addq $-(VEC_SIZE * 4), %rax
+ cmpq %rdx, %rax
+ jne L(loop_4x_vec)
+
+ /* Need to re-adjust rdx / rax for L(last_4x_vec). */
+ subq $-(VEC_SIZE * 4), %rdx
+ movq %rdx, %rax
+ subl %edi, %edx
+L(last_4x_vec):
+
+ /* Used no matter what. */
+ vpcmpb $0, (VEC_SIZE * -1)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- movl $1, %edx
- sall %cl, %edx
- subl $1, %edx
+ cmpl $(VEC_SIZE * 2), %edx
+ jbe L(last_2x_vec)
- kmovd %k1, %eax
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0_dec)
- /* Remove the trailing bytes. */
- andl %edx, %eax
- testl %eax, %eax
- jnz L(last_vec_x1)
+ vpcmpb $0, (VEC_SIZE * -2)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- /* Check the second last VEC. */
- vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1)
- movl %r8d, %ecx
+ /* Used no matter what. */
+ vpcmpb $0, (VEC_SIZE * -3)(%rax), %VECMATCH, %k0
+ kmovd %k0, %ecx
- kmovd %k1, %eax
+ cmpl $(VEC_SIZE * 3), %edx
+ ja L(last_vec)
- /* Remove the leading bytes. Must use unsigned right shift for
- bsrl below. */
- shrl %cl, %eax
- testl %eax, %eax
- jz L(zero)
+ lzcntl %ecx, %ecx
+ subq $(VEC_SIZE * 2 + 1), %rax
+ subq %rcx, %rax
+ cmpq %rax, %rdi
+ jbe L(ret_1)
+ xorl %eax, %eax
+L(ret_1):
+ ret
- bsrl %eax, %eax
- addq %rdi, %rax
- addq %r8, %rax
+ .p2align 4,, 6
+L(loop_end):
+ kmovd %k1, %ecx
+ notl %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x0_end)
+
+ vptestnmb %VEC(2), %VEC(2), %k0
+ kmovd %k0, %ecx
+ testl %ecx, %ecx
+ jnz L(ret_vec_x1_end)
+
+ kmovd %k2, %ecx
+ kmovd %k4, %esi
+ /* Combine last 2 VEC matches. If ecx (VEC3) is zero (no CHAR in VEC3)
+ then it won't affect the result in esi (VEC4). If ecx is non-zero
+ then CHAR in VEC3 and bsrq will use that position. */
+ salq $32, %rcx
+ orq %rsi, %rcx
+ bsrq %rcx, %rcx
+ addq %rcx, %rax
+ ret
+ .p2align 4,, 4
+L(ret_vec_x0_end):
+ addq $(VEC_SIZE), %rax
+L(ret_vec_x1_end):
+ bsrl %ecx, %ecx
+ leaq (VEC_SIZE * 2)(%rax, %rcx), %rax
ret
-END (__memrchr_evex)
+
+END(MEMRCHR)
#endif
#if IS_IN (libc)
+# define USE_WITH_AVX2 1
+
# define VEC_SIZE 32
+# define MOV_SIZE 4
+# define RET_SIZE 4
+
# define VEC(i) ymm##i
-# define VMOVU vmovdqu
-# define VMOVA vmovdqa
-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
- vmovd d, %xmm0; \
- movq r, %rax; \
- vpbroadcastb %xmm0, %ymm0
+# define VMOVU vmovdqu
+# define VMOVA vmovdqa
-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
vmovd d, %xmm0; \
- movq r, %rax; \
- vpbroadcastd %xmm0, %ymm0
+ movq r, %rax;
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ MEMSET_SET_VEC0_AND_SET_RETURN(d, r)
+
+# define MEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastb %xmm0, %ymm0
+# define MEMSET_VDUP_TO_VEC0_LOW() vpbroadcastb %xmm0, %xmm0
+
+# define WMEMSET_VDUP_TO_VEC0_HIGH() vpbroadcastd %xmm0, %ymm0
+# define WMEMSET_VDUP_TO_VEC0_LOW() vpbroadcastd %xmm0, %xmm0
# ifndef SECTION
# define SECTION(p) p##.avx
# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
# endif
+# define USE_XMM_LESS_VEC
# include "memset-vec-unaligned-erms.S"
#endif
#if IS_IN (libc)
+# define USE_WITH_AVX512 1
+
# define VEC_SIZE 64
+# define MOV_SIZE 6
+# define RET_SIZE 1
+
# define XMM0 xmm16
# define YMM0 ymm16
# define VEC0 zmm16
# define VEC(i) VEC##i
-# define VMOVU vmovdqu64
-# define VMOVA vmovdqa64
+
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+
# define VZEROUPPER
-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
- movq r, %rax; \
- vpbroadcastb d, %VEC0
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ vpbroadcastb d, %VEC0; \
+ movq r, %rax
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ vpbroadcastd d, %VEC0; \
+ movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
- movq r, %rax; \
- vpbroadcastd d, %VEC0
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex512
# define MEMSET_SYMBOL(p,s) p##_avx512_##s
--- /dev/null
+/* memset implement with rep stosb
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+ .text
+ENTRY (__memset_chk_erms)
+ cmp %RDX_LP, %RCX_LP
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+
+/* Only used to measure performance of REP STOSB. */
+ENTRY (__memset_erms)
+ /* Skip zero length. */
+ test %RDX_LP, %RDX_LP
+ jz L(stosb_return_zero)
+ mov %RDX_LP, %RCX_LP
+ movzbl %sil, %eax
+ mov %RDI_LP, %RDX_LP
+ rep stosb
+ mov %RDX_LP, %RAX_LP
+ ret
+L(stosb_return_zero):
+ movq %rdi, %rax
+ ret
+END (__memset_erms)
+#endif
#if IS_IN (libc)
+# define USE_WITH_EVEX 1
+
# define VEC_SIZE 32
+# define MOV_SIZE 6
+# define RET_SIZE 1
+
# define XMM0 xmm16
# define YMM0 ymm16
# define VEC0 ymm16
# define VEC(i) VEC##i
-# define VMOVU vmovdqu64
-# define VMOVA vmovdqa64
+
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
+
# define VZEROUPPER
-# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
- movq r, %rax; \
- vpbroadcastb d, %VEC0
+# define MEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ vpbroadcastb d, %VEC0; \
+ movq r, %rax
+
+# define WMEMSET_SET_VEC0_AND_SET_RETURN(d, r) \
+ vpbroadcastd d, %VEC0; \
+ movq r, %rax
+
+# define MEMSET_VDUP_TO_VEC0_HIGH()
+# define MEMSET_VDUP_TO_VEC0_LOW()
-# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
- movq r, %rax; \
- vpbroadcastd d, %VEC0
+# define WMEMSET_VDUP_TO_VEC0_HIGH()
+# define WMEMSET_VDUP_TO_VEC0_LOW()
# define SECTION(p) p##.evex
# define MEMSET_SYMBOL(p,s) p##_evex_##s
# endif
# undef weak_alias
-# define weak_alias(original, alias) \
- .weak bzero; bzero = __bzero
-
+# define weak_alias(original, alias)
# undef strong_alias
# define strong_alias(ignored1, ignored2)
#endif
-/* memset/bzero with unaligned store and rep stosb
- Copyright (C) 2016-2021 Free Software Foundation, Inc.
+/* memset with unaligned store and rep stosb
+ Copyright (C) 2016-2022 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
#ifndef MOVQ
# if VEC_SIZE > 16
# define MOVQ vmovq
+# define MOVD vmovd
# else
# define MOVQ movq
+# define MOVD movd
# endif
#endif
+#if VEC_SIZE == 64
+# define LOOP_4X_OFFSET (VEC_SIZE * 4)
+#else
+# define LOOP_4X_OFFSET (0)
+#endif
+
+#if defined USE_WITH_EVEX || defined USE_WITH_AVX512
+# define END_REG rcx
+# define LOOP_REG rdi
+# define LESS_VEC_REG rax
+#else
+# define END_REG rdi
+# define LOOP_REG rdx
+# define LESS_VEC_REG rdi
+#endif
+
+#ifdef USE_XMM_LESS_VEC
+# define XMM_SMALL 1
+#else
+# define XMM_SMALL 0
+#endif
+
+#ifdef USE_LESS_VEC_MASK_STORE
+# define SET_REG64 rcx
+# define SET_REG32 ecx
+# define SET_REG16 cx
+# define SET_REG8 cl
+#else
+# define SET_REG64 rsi
+# define SET_REG32 esi
+# define SET_REG16 si
+# define SET_REG8 sil
+#endif
+
#define PAGE_SIZE 4096
+/* Macro to calculate size of small memset block for aligning
+ purposes. */
+#define SMALL_MEMSET_ALIGN(mov_sz, ret_sz) (2 * (mov_sz) + (ret_sz) + 1)
+
+
#ifndef SECTION
# error SECTION is not defined!
#endif
- .section SECTION(.text),"ax",@progbits
-#if VEC_SIZE == 16 && IS_IN (libc)
-ENTRY (__bzero)
- mov %RDI_LP, %RAX_LP /* Set return value. */
- mov %RSI_LP, %RDX_LP /* Set n. */
- pxor %XMM0, %XMM0
- jmp L(entry_from_bzero)
-END (__bzero)
-weak_alias (__bzero, bzero)
-#endif
-
+ .section SECTION(.text), "ax", @progbits
#if IS_IN (libc)
# if defined SHARED
ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
shl $2, %RDX_LP
- WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
- jmp L(entry_from_bzero)
+ WMEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi)
+ WMEMSET_VDUP_TO_VEC0_LOW()
+ cmpq $VEC_SIZE, %rdx
+ jb L(less_vec_from_wmemset)
+ WMEMSET_VDUP_TO_VEC0_HIGH()
+ jmp L(entry_from_wmemset)
END (WMEMSET_SYMBOL (__wmemset, unaligned))
#endif
#endif
ENTRY (MEMSET_SYMBOL (__memset, unaligned))
- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+ MEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi)
# ifdef __ILP32__
/* Clear the upper 32 bits. */
mov %edx, %edx
# endif
-L(entry_from_bzero):
cmpq $VEC_SIZE, %rdx
jb L(less_vec)
+ MEMSET_VDUP_TO_VEC0_HIGH()
+L(entry_from_wmemset):
cmpq $(VEC_SIZE * 2), %rdx
ja L(more_2x_vec)
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
#if defined USE_MULTIARCH && IS_IN (libc)
END (MEMSET_SYMBOL (__memset, unaligned))
-# if VEC_SIZE == 16
-ENTRY (__memset_chk_erms)
- cmp %RDX_LP, %RCX_LP
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memset_chk_erms)
-
-/* Only used to measure performance of REP STOSB. */
-ENTRY (__memset_erms)
- /* Skip zero length. */
- test %RDX_LP, %RDX_LP
- jnz L(stosb)
- movq %rdi, %rax
- ret
-# else
-/* Provide a hidden symbol to debugger. */
- .hidden MEMSET_SYMBOL (__memset, erms)
-ENTRY (MEMSET_SYMBOL (__memset, erms))
-# endif
-L(stosb):
- mov %RDX_LP, %RCX_LP
- movzbl %sil, %eax
- mov %RDI_LP, %RDX_LP
- rep stosb
- mov %RDX_LP, %RAX_LP
- VZEROUPPER_RETURN
-# if VEC_SIZE == 16
-END (__memset_erms)
-# else
-END (MEMSET_SYMBOL (__memset, erms))
-# endif
-
# if defined SHARED && IS_IN (libc)
ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
cmp %RDX_LP, %RCX_LP
END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
# endif
-ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+ENTRY_P2ALIGN (MEMSET_SYMBOL (__memset, unaligned_erms), 6)
+ MEMSET_SET_VEC0_AND_SET_RETURN (%esi, %rdi)
# ifdef __ILP32__
/* Clear the upper 32 bits. */
mov %edx, %edx
# endif
cmp $VEC_SIZE, %RDX_LP
jb L(less_vec)
+ MEMSET_VDUP_TO_VEC0_HIGH ()
cmp $(VEC_SIZE * 2), %RDX_LP
ja L(stosb_more_2x_vec)
/* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx)
VZEROUPPER_RETURN
-
- .p2align 4
-L(stosb_more_2x_vec):
- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
- ja L(stosb)
-#else
- .p2align 4
#endif
-L(more_2x_vec):
- /* Stores to first 2x VEC before cmp as any path forward will
- require it. */
- VMOVU %VEC(0), (%rdi)
- VMOVU %VEC(0), VEC_SIZE(%rdi)
- cmpq $(VEC_SIZE * 4), %rdx
- ja L(loop_start)
- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
- VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
-L(return):
-#if VEC_SIZE > 16
- ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4,, 4
+L(last_2x_vec):
+#ifdef USE_LESS_VEC_MASK_STORE
+ VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi, %rdx)
+ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi, %rdx)
#else
- ret
+ VMOVU %VEC(0), (VEC_SIZE * -2)(%rdi)
+ VMOVU %VEC(0), (VEC_SIZE * -1)(%rdi)
#endif
+ VZEROUPPER_RETURN
-L(loop_start):
- VMOVU %VEC(0), (VEC_SIZE * 2)(%rdi)
- VMOVU %VEC(0), (VEC_SIZE * 3)(%rdi)
- cmpq $(VEC_SIZE * 8), %rdx
- jbe L(loop_end)
- andq $-(VEC_SIZE * 2), %rdi
- subq $-(VEC_SIZE * 4), %rdi
- leaq -(VEC_SIZE * 4)(%rax, %rdx), %rcx
- .p2align 4
-L(loop):
- VMOVA %VEC(0), (%rdi)
- VMOVA %VEC(0), VEC_SIZE(%rdi)
- VMOVA %VEC(0), (VEC_SIZE * 2)(%rdi)
- VMOVA %VEC(0), (VEC_SIZE * 3)(%rdi)
- subq $-(VEC_SIZE * 4), %rdi
- cmpq %rcx, %rdi
- jb L(loop)
-L(loop_end):
- /* NB: rax is set as ptr in MEMSET_VDUP_TO_VEC0_AND_SET_RETURN.
- rdx as length is also unchanged. */
- VMOVU %VEC(0), -(VEC_SIZE * 4)(%rax, %rdx)
- VMOVU %VEC(0), -(VEC_SIZE * 3)(%rax, %rdx)
- VMOVU %VEC(0), -(VEC_SIZE * 2)(%rax, %rdx)
- VMOVU %VEC(0), -VEC_SIZE(%rax, %rdx)
- VZEROUPPER_SHORT_RETURN
-
- .p2align 4
+ /* If have AVX512 mask instructions put L(less_vec) close to
+ entry as it doesn't take much space and is likely a hot target.
+ */
+#ifdef USE_LESS_VEC_MASK_STORE
+ .p2align 4,, 10
L(less_vec):
+L(less_vec_from_wmemset):
/* Less than 1 VEC. */
# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
# error Unsupported VEC_SIZE!
# endif
-# ifdef USE_LESS_VEC_MASK_STORE
/* Clear high bits from edi. Only keeping bits relevant to page
cross check. Note that we are using rax which is set in
- MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out.
- */
+ MEMSET_VDUP_TO_VEC0_AND_SET_RETURN as ptr from here on out. */
andl $(PAGE_SIZE - 1), %edi
- /* Check if VEC_SIZE store cross page. Mask stores suffer serious
- performance degradation when it has to fault supress. */
+ /* Check if VEC_SIZE store cross page. Mask stores suffer
+ serious performance degradation when it has to fault supress.
+ */
cmpl $(PAGE_SIZE - VEC_SIZE), %edi
+ /* This is generally considered a cold target. */
ja L(cross_page)
# if VEC_SIZE > 32
movq $-1, %rcx
bzhil %edx, %ecx, %ecx
kmovd %ecx, %k1
# endif
- vmovdqu8 %VEC(0), (%rax) {%k1}
+ vmovdqu8 %VEC(0), (%rax){%k1}
VZEROUPPER_RETURN
- .p2align 4
-L(cross_page):
-# endif
-# if VEC_SIZE > 32
- cmpb $32, %dl
- jae L(between_32_63)
+# if defined USE_MULTIARCH && IS_IN (libc)
+ /* Include L(stosb_local) here if including L(less_vec) between
+ L(stosb_more_2x_vec) and ENTRY. This is to cache align the
+ L(stosb_more_2x_vec) target. */
+ .p2align 4,, 10
+L(stosb_local):
+ movzbl %sil, %eax
+ mov %RDX_LP, %RCX_LP
+ mov %RDI_LP, %RDX_LP
+ rep stosb
+ mov %RDX_LP, %RAX_LP
+ VZEROUPPER_RETURN
# endif
-# if VEC_SIZE > 16
- cmpb $16, %dl
- jae L(between_16_31)
+#endif
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+ .p2align 4
+L(stosb_more_2x_vec):
+ cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
+ ja L(stosb_local)
+#endif
+ /* Fallthrough goes to L(loop_4x_vec). Tests for memset (2x, 4x]
+ and (4x, 8x] jump to target. */
+L(more_2x_vec):
+ /* Store next 2x vec regardless. */
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(0), (VEC_SIZE * 1)(%rdi)
+
+
+ /* Two different methods of setting up pointers / compare. The two
+ methods are based on the fact that EVEX/AVX512 mov instructions take
+ more bytes then AVX2/SSE2 mov instructions. As well that EVEX/AVX512
+ machines also have fast LEA_BID. Both setup and END_REG to avoid complex
+ address mode. For EVEX/AVX512 this saves code size and keeps a few
+ targets in one fetch block. For AVX2/SSE2 this helps prevent AGU
+ bottlenecks. */
+#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512)
+ /* If AVX2/SSE2 compute END_REG (rdi) with ALU. */
+ addq %rdx, %END_REG
+#endif
+
+ cmpq $(VEC_SIZE * 4), %rdx
+ jbe L(last_2x_vec)
+
+
+#if defined USE_WITH_EVEX || defined USE_WITH_AVX512
+ /* If EVEX/AVX512 compute END_REG - (VEC_SIZE * 4 + LOOP_4X_OFFSET) with
+ LEA_BID. */
+
+ /* END_REG is rcx for EVEX/AVX512. */
+ leaq -(VEC_SIZE * 4 + LOOP_4X_OFFSET)(%rdi, %rdx), %END_REG
+#endif
+
+ /* Store next 2x vec regardless. */
+ VMOVU %VEC(0), (VEC_SIZE * 2)(%rax)
+ VMOVU %VEC(0), (VEC_SIZE * 3)(%rax)
+
+
+#if defined USE_WITH_EVEX || defined USE_WITH_AVX512
+ /* If LOOP_4X_OFFSET don't readjust LOOP_REG (rdi), just add
+ extra offset to addresses in loop. Used for AVX512 to save space
+ as no way to get (VEC_SIZE * 4) in imm8. */
+# if LOOP_4X_OFFSET == 0
+ subq $-(VEC_SIZE * 4), %LOOP_REG
# endif
- MOVQ %XMM0, %rcx
- cmpb $8, %dl
- jae L(between_8_15)
- cmpb $4, %dl
- jae L(between_4_7)
- cmpb $1, %dl
- ja L(between_2_3)
- jb 1f
- movb %cl, (%rax)
-1:
+ /* Avoid imm32 compare here to save code size. */
+ cmpq %rdi, %rcx
+#else
+ addq $-(VEC_SIZE * 4), %END_REG
+ cmpq $(VEC_SIZE * 8), %rdx
+#endif
+ jbe L(last_4x_vec)
+#if !(defined USE_WITH_EVEX || defined USE_WITH_AVX512)
+ /* Set LOOP_REG (rdx). */
+ leaq (VEC_SIZE * 4)(%rax), %LOOP_REG
+#endif
+ /* Align dst for loop. */
+ andq $(VEC_SIZE * -2), %LOOP_REG
+ .p2align 4
+L(loop):
+ VMOVA %VEC(0), LOOP_4X_OFFSET(%LOOP_REG)
+ VMOVA %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%LOOP_REG)
+ VMOVA %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%LOOP_REG)
+ VMOVA %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%LOOP_REG)
+ subq $-(VEC_SIZE * 4), %LOOP_REG
+ cmpq %END_REG, %LOOP_REG
+ jb L(loop)
+ .p2align 4,, MOV_SIZE
+L(last_4x_vec):
+ VMOVU %VEC(0), LOOP_4X_OFFSET(%END_REG)
+ VMOVU %VEC(0), (VEC_SIZE + LOOP_4X_OFFSET)(%END_REG)
+ VMOVU %VEC(0), (VEC_SIZE * 2 + LOOP_4X_OFFSET)(%END_REG)
+ VMOVU %VEC(0), (VEC_SIZE * 3 + LOOP_4X_OFFSET)(%END_REG)
+L(return):
+#if VEC_SIZE > 16
+ ZERO_UPPER_VEC_REGISTERS_RETURN
+#else
+ ret
+#endif
+
+ .p2align 4,, 10
+#ifndef USE_LESS_VEC_MASK_STORE
+# if defined USE_MULTIARCH && IS_IN (libc)
+ /* If no USE_LESS_VEC_MASK put L(stosb_local) here. Will be in
+ range for 2-byte jump encoding. */
+L(stosb_local):
+ movzbl %sil, %eax
+ mov %RDX_LP, %RCX_LP
+ mov %RDI_LP, %RDX_LP
+ rep stosb
+ mov %RDX_LP, %RAX_LP
VZEROUPPER_RETURN
-# if VEC_SIZE > 32
+# endif
+ /* Define L(less_vec) only if not otherwise defined. */
+ .p2align 4
+L(less_vec):
+ /* Broadcast esi to partial register (i.e VEC_SIZE == 32 broadcast to
+ xmm). This is only does anything for AVX2. */
+ MEMSET_VDUP_TO_VEC0_LOW ()
+L(less_vec_from_wmemset):
+#endif
+L(cross_page):
+#if VEC_SIZE > 32
+ cmpl $32, %edx
+ jge L(between_32_63)
+#endif
+#if VEC_SIZE > 16
+ cmpl $16, %edx
+ jge L(between_16_31)
+#endif
+#ifndef USE_XMM_LESS_VEC
+ MOVQ %XMM0, %SET_REG64
+#endif
+ cmpl $8, %edx
+ jge L(between_8_15)
+ cmpl $4, %edx
+ jge L(between_4_7)
+ cmpl $1, %edx
+ jg L(between_2_3)
+ jl L(between_0_0)
+ movb %SET_REG8, (%LESS_VEC_REG)
+L(between_0_0):
+ ret
+
+ /* Align small targets only if not doing so would cross a fetch line.
+ */
+#if VEC_SIZE > 32
+ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, RET_SIZE)
/* From 32 to 63. No branch when size == 32. */
L(between_32_63):
- VMOVU %YMM0, -32(%rax,%rdx)
- VMOVU %YMM0, (%rax)
+ VMOVU %YMM0, (%LESS_VEC_REG)
+ VMOVU %YMM0, -32(%LESS_VEC_REG, %rdx)
VZEROUPPER_RETURN
-# endif
-# if VEC_SIZE > 16
- /* From 16 to 31. No branch when size == 16. */
+#endif
+
+#if VEC_SIZE >= 32
+ .p2align 4,, SMALL_MEMSET_ALIGN(MOV_SIZE, 1)
L(between_16_31):
- VMOVU %XMM0, -16(%rax,%rdx)
- VMOVU %XMM0, (%rax)
- VZEROUPPER_RETURN
-# endif
- /* From 8 to 15. No branch when size == 8. */
+ /* From 16 to 31. No branch when size == 16. */
+ VMOVU %XMM0, (%LESS_VEC_REG)
+ VMOVU %XMM0, -16(%LESS_VEC_REG, %rdx)
+ ret
+#endif
+
+ /* Move size is 3 for SSE2, EVEX, and AVX512. Move size is 4 for AVX2.
+ */
+ .p2align 4,, SMALL_MEMSET_ALIGN(3 + XMM_SMALL, 1)
L(between_8_15):
- movq %rcx, -8(%rax,%rdx)
- movq %rcx, (%rax)
- VZEROUPPER_RETURN
+ /* From 8 to 15. No branch when size == 8. */
+#ifdef USE_XMM_LESS_VEC
+ MOVQ %XMM0, (%rdi)
+ MOVQ %XMM0, -8(%rdi, %rdx)
+#else
+ movq %SET_REG64, (%LESS_VEC_REG)
+ movq %SET_REG64, -8(%LESS_VEC_REG, %rdx)
+#endif
+ ret
+
+ /* Move size is 2 for SSE2, EVEX, and AVX512. Move size is 4 for AVX2.
+ */
+ .p2align 4,, SMALL_MEMSET_ALIGN(2 << XMM_SMALL, 1)
L(between_4_7):
/* From 4 to 7. No branch when size == 4. */
- movl %ecx, -4(%rax,%rdx)
- movl %ecx, (%rax)
- VZEROUPPER_RETURN
+#ifdef USE_XMM_LESS_VEC
+ MOVD %XMM0, (%rdi)
+ MOVD %XMM0, -4(%rdi, %rdx)
+#else
+ movl %SET_REG32, (%LESS_VEC_REG)
+ movl %SET_REG32, -4(%LESS_VEC_REG, %rdx)
+#endif
+ ret
+
+ /* 4 * XMM_SMALL for the third mov for AVX2. */
+ .p2align 4,, 4 * XMM_SMALL + SMALL_MEMSET_ALIGN(3, 1)
L(between_2_3):
/* From 2 to 3. No branch when size == 2. */
- movw %cx, -2(%rax,%rdx)
- movw %cx, (%rax)
- VZEROUPPER_RETURN
+#ifdef USE_XMM_LESS_VEC
+ movb %SET_REG8, (%rdi)
+ movb %SET_REG8, 1(%rdi)
+ movb %SET_REG8, -1(%rdi, %rdx)
+#else
+ movw %SET_REG16, (%LESS_VEC_REG)
+ movb %SET_REG8, -1(%LESS_VEC_REG, %rdx)
+#endif
+ ret
END (MEMSET_SYMBOL (__memset, unaligned_erms))
--- /dev/null
+/* Common config for SSE2 VECs
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _SSE2_VECS_H
+#define _SSE2_VECS_H 1
+
+#ifdef VEC_SIZE
+# error "Multiple VEC configs included!"
+#endif
+
+#define VEC_SIZE 16
+#include "vec-macros.h"
+
+#define USE_WITH_SSE2 1
+#define SECTION(p) p
+
+/* 3-byte mov instructions with SSE2. */
+#define MOV_SIZE 3
+/* No vzeroupper needed. */
+#define RET_SIZE 1
+#define VZEROUPPER
+
+#define VMOVU movups
+#define VMOVA movaps
+#define VMOVNT movntdq
+
+#define VEC_xmm VEC_any_xmm
+#define VEC VEC_any_xmm
+
+
+#endif
+++ /dev/null
-/* strcasecmp_l optimized with AVX.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#define STRCMP_SSE42 __strcasecmp_l_avx
-#define USE_AVX 1
-#define USE_AS_STRCASECMP_L
-#include "strcmp-sse42.S"
--- /dev/null
+#ifndef STRCMP
+# define STRCMP __strcasecmp_l_avx2_rtm
+#endif
+
+#define _GLABEL(x) x ## _rtm
+#define GLABEL(x) _GLABEL(x)
+
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+
+#define SECTION(p) p##.avx.rtm
+
+#include "strcasecmp_l-avx2.S"
--- /dev/null
+/* strcasecmp_l optimized with AVX2.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef STRCMP
+# define STRCMP __strcasecmp_l_avx2
+#endif
+#define USE_AS_STRCASECMP_L
+#include "strcmp-avx2.S"
--- /dev/null
+/* strcasecmp_l optimized with EVEX.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef STRCMP
+# define STRCMP __strcasecmp_l_evex
+#endif
+#define USE_AS_STRCASECMP_L
+#include "strcmp-evex.S"
# define PAGE_SIZE 4096
.section SECTION(.text),"ax",@progbits
-ENTRY (STRCHR)
+ENTRY_P2ALIGN (STRCHR, 5)
/* Broadcast CHAR to YMM0. */
vmovd %esi, %xmm0
movl %edi, %eax
andl $(PAGE_SIZE - 1), %eax
VPBROADCAST %xmm0, %ymm0
- vpxor %xmm9, %xmm9, %xmm9
+ vpxor %xmm1, %xmm1, %xmm1
/* Check if we cross page boundary with one vector load. */
cmpl $(PAGE_SIZE - VEC_SIZE), %eax
/* Check the first VEC_SIZE bytes. Search for both CHAR and the
null byte. */
- vmovdqu (%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqu (%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jz L(aligned_more)
tzcntl %eax, %eax
# ifndef USE_AS_STRCHRNUL
- /* Found CHAR or the null byte. */
- cmp (%rdi, %rax), %CHAR_REG
- jne L(zero)
-# endif
- addq %rdi, %rax
- VZEROUPPER_RETURN
-
- /* .p2align 5 helps keep performance more consistent if ENTRY()
- alignment % 32 was either 16 or 0. As well this makes the
- alignment % 32 of the loop_4x_vec fixed which makes tuning it
- easier. */
- .p2align 5
-L(first_vec_x4):
- tzcntl %eax, %eax
- addq $(VEC_SIZE * 3 + 1), %rdi
-# ifndef USE_AS_STRCHRNUL
- /* Found CHAR or the null byte. */
+ /* Found CHAR or the null byte. */
cmp (%rdi, %rax), %CHAR_REG
+ /* NB: Use a branch instead of cmovcc here. The expectation is
+ that with strchr the user will branch based on input being
+ null. Since this branch will be 100% predictive of the user
+ branch a branch miss here should save what otherwise would
+ be branch miss in the user code. Otherwise using a branch 1)
+ saves code size and 2) is faster in highly predictable
+ environments. */
jne L(zero)
# endif
addq %rdi, %rax
- VZEROUPPER_RETURN
+L(return_vzeroupper):
+ ZERO_UPPER_VEC_REGISTERS_RETURN
# ifndef USE_AS_STRCHRNUL
L(zero):
.p2align 4
L(first_vec_x1):
- tzcntl %eax, %eax
+ /* Use bsf to save code size. */
+ bsfl %eax, %eax
incq %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
addq %rdi, %rax
VZEROUPPER_RETURN
- .p2align 4
+ .p2align 4,, 10
L(first_vec_x2):
- tzcntl %eax, %eax
+ /* Use bsf to save code size. */
+ bsfl %eax, %eax
addq $(VEC_SIZE + 1), %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
addq %rdi, %rax
VZEROUPPER_RETURN
- .p2align 4
+ .p2align 4,, 8
L(first_vec_x3):
- tzcntl %eax, %eax
+ /* Use bsf to save code size. */
+ bsfl %eax, %eax
addq $(VEC_SIZE * 2 + 1), %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
addq %rdi, %rax
VZEROUPPER_RETURN
+ .p2align 4,, 10
+L(first_vec_x4):
+ /* Use bsf to save code size. */
+ bsfl %eax, %eax
+ addq $(VEC_SIZE * 3 + 1), %rdi
+# ifndef USE_AS_STRCHRNUL
+ /* Found CHAR or the null byte. */
+ cmp (%rdi, %rax), %CHAR_REG
+ jne L(zero)
+# endif
+ addq %rdi, %rax
+ VZEROUPPER_RETURN
+
+
+
.p2align 4
L(aligned_more):
/* Align data to VEC_SIZE - 1. This is the same number of
L(cross_page_continue):
/* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time
since data is only aligned to VEC_SIZE. */
- vmovdqa 1(%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqa 1(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jnz L(first_vec_x1)
- vmovdqa (VEC_SIZE + 1)(%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqa (VEC_SIZE + 1)(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jnz L(first_vec_x2)
- vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jnz L(first_vec_x3)
- vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jnz L(first_vec_x4)
- /* Align data to VEC_SIZE * 4 - 1. */
- addq $(VEC_SIZE * 4 + 1), %rdi
- andq $-(VEC_SIZE * 4), %rdi
+ /* Align data to VEC_SIZE * 4 - 1. */
+ incq %rdi
+ orq $(VEC_SIZE * 4 - 1), %rdi
.p2align 4
L(loop_4x_vec):
/* Compare 4 * VEC at a time forward. */
- vmovdqa (%rdi), %ymm5
- vmovdqa (VEC_SIZE)(%rdi), %ymm6
- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm7
- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm8
+ vmovdqa 1(%rdi), %ymm6
+ vmovdqa (VEC_SIZE + 1)(%rdi), %ymm7
/* Leaves only CHARS matching esi as 0. */
- vpxor %ymm5, %ymm0, %ymm1
vpxor %ymm6, %ymm0, %ymm2
vpxor %ymm7, %ymm0, %ymm3
- vpxor %ymm8, %ymm0, %ymm4
- VPMINU %ymm1, %ymm5, %ymm1
VPMINU %ymm2, %ymm6, %ymm2
VPMINU %ymm3, %ymm7, %ymm3
- VPMINU %ymm4, %ymm8, %ymm4
- VPMINU %ymm1, %ymm2, %ymm5
- VPMINU %ymm3, %ymm4, %ymm6
+ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm6
+ vmovdqa (VEC_SIZE * 3 + 1)(%rdi), %ymm7
+
+ vpxor %ymm6, %ymm0, %ymm4
+ vpxor %ymm7, %ymm0, %ymm5
+
+ VPMINU %ymm4, %ymm6, %ymm4
+ VPMINU %ymm5, %ymm7, %ymm5
- VPMINU %ymm5, %ymm6, %ymm6
+ VPMINU %ymm2, %ymm3, %ymm6
+ VPMINU %ymm4, %ymm5, %ymm7
- VPCMPEQ %ymm6, %ymm9, %ymm6
- vpmovmskb %ymm6, %ecx
+ VPMINU %ymm6, %ymm7, %ymm7
+
+ VPCMPEQ %ymm7, %ymm1, %ymm7
+ vpmovmskb %ymm7, %ecx
subq $-(VEC_SIZE * 4), %rdi
testl %ecx, %ecx
jz L(loop_4x_vec)
-
- VPCMPEQ %ymm1, %ymm9, %ymm1
- vpmovmskb %ymm1, %eax
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpmovmskb %ymm2, %eax
testl %eax, %eax
jnz L(last_vec_x0)
- VPCMPEQ %ymm5, %ymm9, %ymm2
- vpmovmskb %ymm2, %eax
+ VPCMPEQ %ymm3, %ymm1, %ymm3
+ vpmovmskb %ymm3, %eax
testl %eax, %eax
jnz L(last_vec_x1)
- VPCMPEQ %ymm3, %ymm9, %ymm3
- vpmovmskb %ymm3, %eax
+ VPCMPEQ %ymm4, %ymm1, %ymm4
+ vpmovmskb %ymm4, %eax
/* rcx has combined result from all 4 VEC. It will only be used
if the first 3 other VEC all did not contain a match. */
salq $32, %rcx
orq %rcx, %rax
tzcntq %rax, %rax
- subq $(VEC_SIZE * 2), %rdi
+ subq $(VEC_SIZE * 2 - 1), %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
cmp (%rdi, %rax), %CHAR_REG
VZEROUPPER_RETURN
- .p2align 4
+ .p2align 4,, 10
L(last_vec_x0):
- tzcntl %eax, %eax
- addq $-(VEC_SIZE * 4), %rdi
+ /* Use bsf to save code size. */
+ bsfl %eax, %eax
+ addq $-(VEC_SIZE * 4 - 1), %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
cmp (%rdi, %rax), %CHAR_REG
addq %rdi, %rax
VZEROUPPER_RETURN
-# ifndef USE_AS_STRCHRNUL
-L(zero_end):
- xorl %eax, %eax
- VZEROUPPER_RETURN
-# endif
- .p2align 4
+ .p2align 4,, 10
L(last_vec_x1):
tzcntl %eax, %eax
- subq $(VEC_SIZE * 3), %rdi
+ subq $(VEC_SIZE * 3 - 1), %rdi
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
cmp (%rdi, %rax), %CHAR_REG
addq %rdi, %rax
VZEROUPPER_RETURN
+# ifndef USE_AS_STRCHRNUL
+L(zero_end):
+ xorl %eax, %eax
+ VZEROUPPER_RETURN
+# endif
/* Cold case for crossing page with first load. */
- .p2align 4
+ .p2align 4,, 8
L(cross_page_boundary):
movq %rdi, %rdx
/* Align rdi to VEC_SIZE - 1. */
orq $(VEC_SIZE - 1), %rdi
- vmovdqa -(VEC_SIZE - 1)(%rdi), %ymm8
- VPCMPEQ %ymm8, %ymm0, %ymm1
- VPCMPEQ %ymm8, %ymm9, %ymm2
- vpor %ymm1, %ymm2, %ymm1
- vpmovmskb %ymm1, %eax
+ vmovdqa -(VEC_SIZE - 1)(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm3
+ VPCMPEQ %ymm2, %ymm1, %ymm2
+ vpor %ymm3, %ymm2, %ymm3
+ vpmovmskb %ymm3, %eax
/* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
so no need to manually mod edx. */
sarxl %edx, %eax, %eax
xorl %ecx, %ecx
/* Found CHAR or the null byte. */
cmp (%rdx, %rax), %CHAR_REG
- leaq (%rdx, %rax), %rax
- cmovne %rcx, %rax
-# else
- addq %rdx, %rax
+ jne L(zero_end)
# endif
-L(return_vzeroupper):
- ZERO_UPPER_VEC_REGISTERS_RETURN
+ addq %rdx, %rax
+ VZEROUPPER_RETURN
END (STRCHR)
-# endif
+#endif
# ifdef USE_AS_WCSCHR
# define VPBROADCAST vpbroadcastd
# define VPCMP vpcmpd
+# define VPTESTN vptestnmd
# define VPMINU vpminud
# define CHAR_REG esi
# define SHIFT_REG ecx
# else
# define VPBROADCAST vpbroadcastb
# define VPCMP vpcmpb
+# define VPTESTN vptestnmb
# define VPMINU vpminub
# define CHAR_REG sil
# define SHIFT_REG edx
# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
.section .text.evex,"ax",@progbits
-ENTRY (STRCHR)
+ENTRY_P2ALIGN (STRCHR, 5)
/* Broadcast CHAR to YMM0. */
VPBROADCAST %esi, %YMM0
movl %edi, %eax
andl $(PAGE_SIZE - 1), %eax
- vpxorq %XMMZERO, %XMMZERO, %XMMZERO
-
/* Check if we cross page boundary with one vector load.
Otherwise it is safe to use an unaligned load. */
cmpl $(PAGE_SIZE - VEC_SIZE), %eax
vpxorq %YMM1, %YMM0, %YMM2
VPMINU %YMM2, %YMM1, %YMM2
/* Each bit in K0 represents a CHAR or a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM2, %k0
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %eax
testl %eax, %eax
jz L(aligned_more)
tzcntl %eax, %eax
+# ifndef USE_AS_STRCHRNUL
+ /* Found CHAR or the null byte. */
+ cmp (%rdi, %rax, CHAR_SIZE), %CHAR_REG
+ /* NB: Use a branch instead of cmovcc here. The expectation is
+ that with strchr the user will branch based on input being
+ null. Since this branch will be 100% predictive of the user
+ branch a branch miss here should save what otherwise would
+ be branch miss in the user code. Otherwise using a branch 1)
+ saves code size and 2) is faster in highly predictable
+ environments. */
+ jne L(zero)
+# endif
# ifdef USE_AS_WCSCHR
/* NB: Multiply wchar_t count by 4 to get the number of bytes.
*/
leaq (%rdi, %rax, CHAR_SIZE), %rax
# else
addq %rdi, %rax
-# endif
-# ifndef USE_AS_STRCHRNUL
- /* Found CHAR or the null byte. */
- cmp (%rax), %CHAR_REG
- jne L(zero)
# endif
ret
- /* .p2align 5 helps keep performance more consistent if ENTRY()
- alignment % 32 was either 16 or 0. As well this makes the
- alignment % 32 of the loop_4x_vec fixed which makes tuning it
- easier. */
- .p2align 5
-L(first_vec_x3):
- tzcntl %eax, %eax
-# ifndef USE_AS_STRCHRNUL
- /* Found CHAR or the null byte. */
- cmp (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %CHAR_REG
- jne L(zero)
-# endif
- /* NB: Multiply sizeof char type (1 or 4) to get the number of
- bytes. */
- leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
- ret
-# ifndef USE_AS_STRCHRNUL
-L(zero):
- xorl %eax, %eax
- ret
-# endif
- .p2align 4
+ .p2align 4,, 10
L(first_vec_x4):
# ifndef USE_AS_STRCHRNUL
/* Check to see if first match was CHAR (k0) or null (k1). */
leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
ret
+# ifndef USE_AS_STRCHRNUL
+L(zero):
+ xorl %eax, %eax
+ ret
+# endif
+
+
.p2align 4
L(first_vec_x1):
- tzcntl %eax, %eax
+ /* Use bsf here to save 1-byte keeping keeping the block in 1x
+ fetch block. eax guranteed non-zero. */
+ bsfl %eax, %eax
# ifndef USE_AS_STRCHRNUL
/* Found CHAR or the null byte. */
cmp (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %CHAR_REG
leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
ret
- .p2align 4
+ .p2align 4,, 10
L(first_vec_x2):
# ifndef USE_AS_STRCHRNUL
/* Check to see if first match was CHAR (k0) or null (k1). */
leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
ret
+ .p2align 4,, 10
+L(first_vec_x3):
+ /* Use bsf here to save 1-byte keeping keeping the block in 1x
+ fetch block. eax guranteed non-zero. */
+ bsfl %eax, %eax
+# ifndef USE_AS_STRCHRNUL
+ /* Found CHAR or the null byte. */
+ cmp (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %CHAR_REG
+ jne L(zero)
+# endif
+ /* NB: Multiply sizeof char type (1 or 4) to get the number of
+ bytes. */
+ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
+
.p2align 4
L(aligned_more):
/* Align data to VEC_SIZE. */
vpxorq %YMM1, %YMM0, %YMM2
VPMINU %YMM2, %YMM1, %YMM2
/* Each bit in K0 represents a CHAR or a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM2, %k0
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %eax
testl %eax, %eax
jnz L(first_vec_x1)
/* Each bit in K0 represents a CHAR in YMM1. */
VPCMP $0, %YMM1, %YMM0, %k0
/* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMM1, %YMMZERO, %k1
+ VPTESTN %YMM1, %YMM1, %k1
kortestd %k0, %k1
jnz L(first_vec_x2)
vpxorq %YMM1, %YMM0, %YMM2
VPMINU %YMM2, %YMM1, %YMM2
/* Each bit in K0 represents a CHAR or a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM2, %k0
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %eax
testl %eax, %eax
jnz L(first_vec_x3)
/* Each bit in K0 represents a CHAR in YMM1. */
VPCMP $0, %YMM1, %YMM0, %k0
/* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMM1, %YMMZERO, %k1
+ VPTESTN %YMM1, %YMM1, %k1
kortestd %k0, %k1
jnz L(first_vec_x4)
VPMINU %YMM3, %YMM4, %YMM4
VPMINU %YMM2, %YMM4, %YMM4{%k4}{z}
- VPCMP $0, %YMMZERO, %YMM4, %k1
+ VPTESTN %YMM4, %YMM4, %k1
kmovd %k1, %ecx
subq $-(VEC_SIZE * 4), %rdi
testl %ecx, %ecx
jz L(loop_4x_vec)
- VPCMP $0, %YMMZERO, %YMM1, %k0
+ VPTESTN %YMM1, %YMM1, %k0
kmovd %k0, %eax
testl %eax, %eax
jnz L(last_vec_x1)
- VPCMP $0, %YMMZERO, %YMM2, %k0
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %eax
testl %eax, %eax
jnz L(last_vec_x2)
- VPCMP $0, %YMMZERO, %YMM3, %k0
+ VPTESTN %YMM3, %YMM3, %k0
kmovd %k0, %eax
/* Combine YMM3 matches (eax) with YMM4 matches (ecx). */
# ifdef USE_AS_WCSCHR
sall $8, %ecx
orl %ecx, %eax
- tzcntl %eax, %eax
+ bsfl %eax, %eax
# else
salq $32, %rcx
orq %rcx, %rax
- tzcntq %rax, %rax
+ bsfq %rax, %rax
# endif
# ifndef USE_AS_STRCHRNUL
/* Check if match was CHAR or null. */
leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
ret
-# ifndef USE_AS_STRCHRNUL
-L(zero_end):
- xorl %eax, %eax
- ret
+ .p2align 4,, 8
+L(last_vec_x1):
+ bsfl %eax, %eax
+# ifdef USE_AS_WCSCHR
+ /* NB: Multiply wchar_t count by 4 to get the number of bytes.
+ */
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
+# else
+ addq %rdi, %rax
# endif
- .p2align 4
-L(last_vec_x1):
- tzcntl %eax, %eax
# ifndef USE_AS_STRCHRNUL
/* Check if match was null. */
- cmp (%rdi, %rax, CHAR_SIZE), %CHAR_REG
+ cmp (%rax), %CHAR_REG
jne L(zero_end)
# endif
- /* NB: Multiply sizeof char type (1 or 4) to get the number of
- bytes. */
- leaq (%rdi, %rax, CHAR_SIZE), %rax
+
ret
- .p2align 4
+ .p2align 4,, 8
L(last_vec_x2):
- tzcntl %eax, %eax
+ bsfl %eax, %eax
# ifndef USE_AS_STRCHRNUL
/* Check if match was null. */
cmp (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %CHAR_REG
ret
/* Cold case for crossing page with first load. */
- .p2align 4
+ .p2align 4,, 8
L(cross_page_boundary):
movq %rdi, %rdx
/* Align rdi. */
vpxorq %YMM1, %YMM0, %YMM2
VPMINU %YMM2, %YMM1, %YMM2
/* Each bit in K0 represents a CHAR or a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM2, %k0
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %eax
- /* Remove the leading bits. */
+ /* Remove the leading bits. */
# ifdef USE_AS_WCSCHR
movl %edx, %SHIFT_REG
/* NB: Divide shift count by 4 since each bit in K1 represent 4
/* If eax is zero continue. */
testl %eax, %eax
jz L(cross_page_continue)
- tzcntl %eax, %eax
-# ifndef USE_AS_STRCHRNUL
- /* Check to see if match was CHAR or null. */
- cmp (%rdx, %rax, CHAR_SIZE), %CHAR_REG
- jne L(zero_end)
-# endif
+ bsfl %eax, %eax
+
# ifdef USE_AS_WCSCHR
/* NB: Multiply wchar_t count by 4 to get the number of
bytes. */
leaq (%rdx, %rax, CHAR_SIZE), %rax
# else
addq %rdx, %rax
+# endif
+# ifndef USE_AS_STRCHRNUL
+ /* Check to see if match was CHAR or null. */
+ cmp (%rax), %CHAR_REG
+ je L(cross_page_ret)
+L(zero_end):
+ xorl %eax, %eax
+L(cross_page_ret):
# endif
ret
END (STRCHR)
-# endif
+#endif
# include <sysdep.h>
+# if defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+# endif
+
# ifndef STRCMP
# define STRCMP __strcmp_avx2
# endif
# define PAGE_SIZE 4096
-/* VEC_SIZE = Number of bytes in a ymm register */
+ /* VEC_SIZE = Number of bytes in a ymm register. */
# define VEC_SIZE 32
-/* Shift for dividing by (VEC_SIZE * 4). */
-# define DIVIDE_BY_VEC_4_SHIFT 7
-# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
-# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
-# endif
+# define VMOVU vmovdqu
+# define VMOVA vmovdqa
# ifdef USE_AS_WCSCMP
-/* Compare packed dwords. */
+ /* Compare packed dwords. */
# define VPCMPEQ vpcmpeqd
-/* Compare packed dwords and store minimum. */
+ /* Compare packed dwords and store minimum. */
# define VPMINU vpminud
-/* 1 dword char == 4 bytes. */
+ /* 1 dword char == 4 bytes. */
# define SIZE_OF_CHAR 4
# else
-/* Compare packed bytes. */
+ /* Compare packed bytes. */
# define VPCMPEQ vpcmpeqb
-/* Compare packed bytes and store minimum. */
+ /* Compare packed bytes and store minimum. */
# define VPMINU vpminub
-/* 1 byte char == 1 byte. */
+ /* 1 byte char == 1 byte. */
# define SIZE_OF_CHAR 1
# endif
+# ifdef USE_AS_STRNCMP
+# define LOOP_REG r9d
+# define LOOP_REG64 r9
+
+# define OFFSET_REG8 r9b
+# define OFFSET_REG r9d
+# define OFFSET_REG64 r9
+# else
+# define LOOP_REG edx
+# define LOOP_REG64 rdx
+
+# define OFFSET_REG8 dl
+# define OFFSET_REG edx
+# define OFFSET_REG64 rdx
+# endif
+
# ifndef VZEROUPPER
# define VZEROUPPER vzeroupper
# endif
+# if defined USE_AS_STRNCMP
+# define VEC_OFFSET 0
+# else
+# define VEC_OFFSET (-VEC_SIZE)
+# endif
+
+# ifdef USE_AS_STRCASECMP_L
+# define BYTE_LOOP_REG OFFSET_REG
+# else
+# define BYTE_LOOP_REG ecx
+# endif
+
+# ifdef USE_AS_STRCASECMP_L
+# ifdef USE_AS_STRNCMP
+# define STRCASECMP __strncasecmp_avx2
+# define LOCALE_REG rcx
+# define LOCALE_REG_LP RCX_LP
+# define STRCASECMP_NONASCII __strncasecmp_l_nonascii
+# else
+# define STRCASECMP __strcasecmp_avx2
+# define LOCALE_REG rdx
+# define LOCALE_REG_LP RDX_LP
+# define STRCASECMP_NONASCII __strcasecmp_l_nonascii
+# endif
+# endif
+
+# define xmmZERO xmm15
+# define ymmZERO ymm15
+
+# define LCASE_MIN_ymm %ymm10
+# define LCASE_MAX_ymm %ymm11
+# define CASE_ADD_ymm %ymm12
+
+# define LCASE_MIN_xmm %xmm10
+# define LCASE_MAX_xmm %xmm11
+# define CASE_ADD_xmm %xmm12
+
+ /* r11 is never use elsewhere so this is safe to maintain. */
+# define TOLOWER_BASE %r11
+
# ifndef SECTION
# define SECTION(p) p##.avx
# endif
+# ifdef USE_AS_STRCASECMP_L
+# define REG(x, y) x ## y
+# define TOLOWER(reg1_in, reg1_out, reg2_in, reg2_out, ext) \
+ vpaddb REG(LCASE_MIN_, ext), reg1_in, REG(%ext, 8); \
+ vpaddb REG(LCASE_MIN_, ext), reg2_in, REG(%ext, 9); \
+ vpcmpgtb REG(LCASE_MAX_, ext), REG(%ext, 8), REG(%ext, 8); \
+ vpcmpgtb REG(LCASE_MAX_, ext), REG(%ext, 9), REG(%ext, 9); \
+ vpandn REG(CASE_ADD_, ext), REG(%ext, 8), REG(%ext, 8); \
+ vpandn REG(CASE_ADD_, ext), REG(%ext, 9), REG(%ext, 9); \
+ vpaddb REG(%ext, 8), reg1_in, reg1_out; \
+ vpaddb REG(%ext, 9), reg2_in, reg2_out
+
+# define TOLOWER_gpr(src, dst) movl (TOLOWER_BASE, src, 4), dst
+# define TOLOWER_ymm(...) TOLOWER(__VA_ARGS__, ymm)
+# define TOLOWER_xmm(...) TOLOWER(__VA_ARGS__, xmm)
+
+# define CMP_R1_R2(s1_reg, s2_reg, scratch_reg, reg_out, ext) \
+ TOLOWER (s1_reg, scratch_reg, s2_reg, s2_reg, ext); \
+ VPCMPEQ scratch_reg, s2_reg, reg_out
+
+# define CMP_R1_S2(s1_reg, s2_mem, scratch_reg, reg_out, ext) \
+ VMOVU s2_mem, reg_out; \
+ CMP_R1_R2(s1_reg, reg_out, scratch_reg, reg_out, ext)
+
+# define CMP_R1_R2_ymm(...) CMP_R1_R2(__VA_ARGS__, ymm)
+# define CMP_R1_R2_xmm(...) CMP_R1_R2(__VA_ARGS__, xmm)
+
+# define CMP_R1_S2_ymm(...) CMP_R1_S2(__VA_ARGS__, ymm)
+# define CMP_R1_S2_xmm(...) CMP_R1_S2(__VA_ARGS__, xmm)
+
+# else
+# define TOLOWER_gpr(...)
+# define TOLOWER_ymm(...)
+# define TOLOWER_xmm(...)
+
+# define CMP_R1_R2_ymm(s1_reg, s2_reg, scratch_reg, reg_out) \
+ VPCMPEQ s2_reg, s1_reg, reg_out
+
+# define CMP_R1_R2_xmm(...) CMP_R1_R2_ymm(__VA_ARGS__)
+
+# define CMP_R1_S2_ymm(...) CMP_R1_R2_ymm(__VA_ARGS__)
+# define CMP_R1_S2_xmm(...) CMP_R1_R2_xmm(__VA_ARGS__)
+# endif
+
/* Warning!
wcscmp/wcsncmp have to use SIGNED comparison for elements.
strcmp/strncmp have to use UNSIGNED comparison for elements.
the maximum offset is reached before a difference is found, zero is
returned. */
- .section SECTION(.text),"ax",@progbits
-ENTRY (STRCMP)
+ .section SECTION(.text), "ax", @progbits
+ .align 16
+ .type STRCMP, @function
+ .globl STRCMP
+ .hidden STRCMP
+
+# ifndef GLABEL
+# define GLABEL(...) __VA_ARGS__
+# endif
+
+# ifdef USE_AS_STRCASECMP_L
+ENTRY (GLABEL(STRCASECMP))
+ movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
+ mov %fs:(%rax), %LOCALE_REG_LP
+
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
+END (GLABEL(STRCASECMP))
+ /* FALLTHROUGH to strcasecmp/strncasecmp_l. */
+# endif
+
+ .p2align 4
+STRCMP:
+ cfi_startproc
+ _CET_ENDBR
+ CALL_MCOUNT
+
+# if defined USE_AS_STRCASECMP_L
+ /* We have to fall back on the C implementation for locales with
+ encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ mov LOCALE_T___LOCALES + LC_CTYPE * LP_SIZE(%LOCALE_REG), %RAX_LP
+# else
+ mov (%LOCALE_REG), %RAX_LP
+# endif
+ testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
+ jne STRCASECMP_NONASCII
+ leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
+# endif
+
# ifdef USE_AS_STRNCMP
- /* Check for simple cases (0 or 1) in offset. */
+ /* Don't overwrite LOCALE_REG (rcx) until we have pass
+ L(one_or_less). Otherwise we might use the wrong locale in
+ the OVERFLOW_STRCMP (strcasecmp_l). */
+# ifdef __ILP32__
+ /* Clear the upper 32 bits. */
+ movl %edx, %edx
+# endif
cmp $1, %RDX_LP
- je L(char0)
- jb L(zero)
+ /* Signed comparison intentional. We use this branch to also
+ test cases where length >= 2^63. These very large sizes can be
+ handled with strcmp as there is no way for that length to
+ actually bound the buffer. */
+ jle L(one_or_less)
# ifdef USE_AS_WCSCMP
- /* Convert units: from wide to byte char. */
- shl $2, %RDX_LP
+ movq %rdx, %rcx
+
+ /* Multiplying length by sizeof(wchar_t) can result in overflow.
+ Check if that is possible. All cases where overflow are possible
+ are cases where length is large enough that it can never be a
+ bound on valid memory so just use wcscmp. */
+ shrq $56, %rcx
+ jnz OVERFLOW_STRCMP
+
+ leaq (, %rdx, 4), %rdx
# endif
- /* Register %r11 tracks the maximum offset. */
- mov %RDX_LP, %R11_LP
+# endif
+ vpxor %xmmZERO, %xmmZERO, %xmmZERO
+# if defined USE_AS_STRCASECMP_L
+ .section .rodata.cst32, "aM", @progbits, 32
+ .align 32
+L(lcase_min):
+ .quad 0x3f3f3f3f3f3f3f3f
+ .quad 0x3f3f3f3f3f3f3f3f
+ .quad 0x3f3f3f3f3f3f3f3f
+ .quad 0x3f3f3f3f3f3f3f3f
+L(lcase_max):
+ .quad 0x9999999999999999
+ .quad 0x9999999999999999
+ .quad 0x9999999999999999
+ .quad 0x9999999999999999
+L(case_add):
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+
+ vmovdqa L(lcase_min)(%rip), LCASE_MIN_ymm
+ vmovdqa L(lcase_max)(%rip), LCASE_MAX_ymm
+ vmovdqa L(case_add)(%rip), CASE_ADD_ymm
# endif
movl %edi, %eax
- xorl %edx, %edx
- /* Make %xmm7 (%ymm7) all zeros in this function. */
- vpxor %xmm7, %xmm7, %xmm7
orl %esi, %eax
- andl $(PAGE_SIZE - 1), %eax
- cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax
- jg L(cross_page)
- /* Start comparing 4 vectors. */
- vmovdqu (%rdi), %ymm1
- VPCMPEQ (%rsi), %ymm1, %ymm0
- VPMINU %ymm1, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm0, %ymm0
- vpmovmskb %ymm0, %ecx
- testl %ecx, %ecx
- je L(next_3_vectors)
- tzcntl %ecx, %edx
+ sall $20, %eax
+ /* Check if s1 or s2 may cross a page in next 4x VEC loads. */
+ cmpl $((PAGE_SIZE -(VEC_SIZE * 4)) << 20), %eax
+ ja L(page_cross)
+
+L(no_page_cross):
+ /* Safe to compare 4x vectors. */
+ VMOVU (%rdi), %ymm0
+ /* 1s where s1 and s2 equal. Just VPCMPEQ if its not strcasecmp.
+ Otherwise converts ymm0 and load from rsi to lower. ymm2 is
+ scratch and ymm1 is the return. */
+ CMP_R1_S2_ymm (%ymm0, (%rsi), %ymm2, %ymm1)
+ /* 1s at null CHAR. */
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ /* 1s where s1 and s2 equal AND not null CHAR. */
+ vpandn %ymm1, %ymm2, %ymm1
+
+ /* All 1s -> keep going, any 0s -> return. */
+ vpmovmskb %ymm1, %ecx
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx) is after the maximum
- offset (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq $VEC_SIZE, %rdx
+ jbe L(vec_0_test_len)
# endif
+
+ /* All 1s represents all equals. incl will overflow to zero in
+ all equals case. Otherwise 1s will carry until position of first
+ mismatch. */
+ incl %ecx
+ jz L(more_3x_vec)
+
+ .p2align 4,, 4
+L(return_vec_0):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
+ movl (%rdi, %rcx), %edx
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- je L(return)
-L(wcscmp_return):
+ cmpl (%rsi, %rcx), %edx
+ je L(ret0)
setl %al
negl %eax
orl $1, %eax
-L(return):
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
+L(ret0):
L(return_vzeroupper):
ZERO_UPPER_VEC_REGISTERS_RETURN
- .p2align 4
-L(return_vec_size):
- tzcntl %ecx, %edx
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
- the maximum offset (%r11). */
- addq $VEC_SIZE, %rdx
- cmpq %r11, %rdx
- jae L(zero)
-# ifdef USE_AS_WCSCMP
+ .p2align 4,, 8
+L(vec_0_test_len):
+ notl %ecx
+ bzhil %edx, %ecx, %eax
+ jnz L(return_vec_0)
+ /* Align if will cross fetch block. */
+ .p2align 4,, 2
+L(ret_zero):
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ VZEROUPPER_RETURN
+
+ .p2align 4,, 5
+L(one_or_less):
+# ifdef USE_AS_STRCASECMP_L
+ /* Set locale argument for strcasecmp. */
+ movq %LOCALE_REG, %rdx
# endif
-# else
+ jb L(ret_zero)
+ /* 'nbe' covers the case where length is negative (large
+ unsigned). */
+ jnbe OVERFLOW_STRCMP
# ifdef USE_AS_WCSCMP
+ movl (%rdi), %edx
xorl %eax, %eax
- movl VEC_SIZE(%rdi, %rdx), %ecx
- cmpl VEC_SIZE(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+ cmpl (%rsi), %edx
+ je L(ret1)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
- movzbl VEC_SIZE(%rdi, %rdx), %eax
- movzbl VEC_SIZE(%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (%rdi), %eax
+ movzbl (%rsi), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
+L(ret1):
+ ret
# endif
- VZEROUPPER_RETURN
- .p2align 4
-L(return_2_vec_size):
- tzcntl %ecx, %edx
+ .p2align 4,, 10
+L(return_vec_1):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
- after the maximum offset (%r11). */
- addq $(VEC_SIZE * 2), %rdx
- cmpq %r11, %rdx
- jae L(zero)
-# ifdef USE_AS_WCSCMP
+ /* rdx must be > CHAR_PER_VEC so save to subtract w.o fear of
+ overflow. */
+ addq $-VEC_SIZE, %rdx
+ cmpq %rcx, %rdx
+ jbe L(ret_zero)
+# endif
+# ifdef USE_AS_WCSCMP
+ movl VEC_SIZE(%rdi, %rcx), %edx
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
-# endif
+ cmpl VEC_SIZE(%rsi, %rcx), %edx
+ je L(ret2)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
-# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx
- cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax
- movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx
- subl %edx, %eax
-# endif
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
+ movzbl VEC_SIZE(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
+L(ret2):
VZEROUPPER_RETURN
- .p2align 4
-L(return_3_vec_size):
- tzcntl %ecx, %edx
+ .p2align 4,, 10
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
- after the maximum offset (%r11). */
- addq $(VEC_SIZE * 3), %rdx
- cmpq %r11, %rdx
- jae L(zero)
-# ifdef USE_AS_WCSCMP
+L(return_vec_3):
+ salq $32, %rcx
+# endif
+
+L(return_vec_2):
+# ifndef USE_AS_STRNCMP
+ tzcntl %ecx, %ecx
+# else
+ tzcntq %rcx, %rcx
+ cmpq %rcx, %rdx
+ jbe L(ret_zero)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 2)(%rdi, %rcx), %edx
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
-# endif
+ cmpl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ je L(ret3)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
+ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+# endif
+L(ret3):
+ VZEROUPPER_RETURN
+
+# ifndef USE_AS_STRNCMP
+ .p2align 4,, 10
+L(return_vec_3):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 3)(%rdi, %rcx), %edx
xorl %eax, %eax
- movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx
- cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+ cmpl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ je L(ret4)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
- movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax
- movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
-# endif
+L(ret4):
VZEROUPPER_RETURN
+# endif
+
+ .p2align 4,, 10
+L(more_3x_vec):
+ /* Safe to compare 4x vectors. */
+ VMOVU VEC_SIZE(%rdi), %ymm0
+ CMP_R1_S2_ymm (%ymm0, VEC_SIZE(%rsi), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_1)
- .p2align 4
-L(next_3_vectors):
- vmovdqu VEC_SIZE(%rdi), %ymm6
- VPCMPEQ VEC_SIZE(%rsi), %ymm6, %ymm3
- VPMINU %ymm6, %ymm3, %ymm3
- VPCMPEQ %ymm7, %ymm3, %ymm3
- vpmovmskb %ymm3, %ecx
- testl %ecx, %ecx
- jne L(return_vec_size)
- vmovdqu (VEC_SIZE * 2)(%rdi), %ymm5
- vmovdqu (VEC_SIZE * 3)(%rdi), %ymm4
- vmovdqu (VEC_SIZE * 3)(%rsi), %ymm0
- VPCMPEQ (VEC_SIZE * 2)(%rsi), %ymm5, %ymm2
- VPMINU %ymm5, %ymm2, %ymm2
- VPCMPEQ %ymm4, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm2, %ymm2
- vpmovmskb %ymm2, %ecx
- testl %ecx, %ecx
- jne L(return_2_vec_size)
- VPMINU %ymm4, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm0, %ymm0
- vpmovmskb %ymm0, %ecx
- testl %ecx, %ecx
- jne L(return_3_vec_size)
-L(main_loop_header):
- leaq (VEC_SIZE * 4)(%rdi), %rdx
- movl $PAGE_SIZE, %ecx
- /* Align load via RAX. */
- andq $-(VEC_SIZE * 4), %rdx
- subq %rdi, %rdx
- leaq (%rdi, %rdx), %rax
# ifdef USE_AS_STRNCMP
- /* Starting from this point, the maximum offset, or simply the
- 'offset', DECREASES by the same amount when base pointers are
- moved forward. Return 0 when:
- 1) On match: offset <= the matched vector index.
- 2) On mistmach, offset is before the mistmatched index.
+ subq $(VEC_SIZE * 2), %rdx
+ jbe L(ret_zero)
+# endif
+
+ VMOVU (VEC_SIZE * 2)(%rdi), %ymm0
+ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 2)(%rsi), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_2)
+
+ VMOVU (VEC_SIZE * 3)(%rdi), %ymm0
+ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 3)(%rsi), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_3)
+
+# ifdef USE_AS_STRNCMP
+ cmpq $(VEC_SIZE * 2), %rdx
+ jbe L(ret_zero)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ /* any non-zero positive value that doesn't inference with 0x1.
*/
- subq %rdx, %r11
- jbe L(zero)
-# endif
- addq %rsi, %rdx
- movq %rdx, %rsi
- andl $(PAGE_SIZE - 1), %esi
- /* Number of bytes before page crossing. */
- subq %rsi, %rcx
- /* Number of VEC_SIZE * 4 blocks before page crossing. */
- shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx
- /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */
- movl %ecx, %esi
- jmp L(loop_start)
+ movl $2, %r8d
+
+# else
+ xorl %r8d, %r8d
+# endif
+
+ /* The prepare labels are various entry points from the page
+ cross logic. */
+L(prepare_loop):
+
+# ifdef USE_AS_STRNCMP
+ /* Store N + (VEC_SIZE * 4) and place check at the begining of
+ the loop. */
+ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx
+# endif
+L(prepare_loop_no_len):
+
+ /* Align s1 and adjust s2 accordingly. */
+ subq %rdi, %rsi
+ andq $-(VEC_SIZE * 4), %rdi
+ addq %rdi, %rsi
+
+# ifdef USE_AS_STRNCMP
+ subq %rdi, %rdx
+# endif
+L(prepare_loop_aligned):
+ /* eax stores distance from rsi to next page cross. These cases
+ need to be handled specially as the 4x loop could potentially
+ read memory past the length of s1 or s2 and across a page
+ boundary. */
+ movl $-(VEC_SIZE * 4), %eax
+ subl %esi, %eax
+ andl $(PAGE_SIZE - 1), %eax
+
+ /* Loop 4x comparisons at a time. */
.p2align 4
L(loop):
+
+ /* End condition for strncmp. */
# ifdef USE_AS_STRNCMP
- /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease
- the maximum offset (%r11) by the same amount. */
- subq $(VEC_SIZE * 4), %r11
- jbe L(zero)
-# endif
- addq $(VEC_SIZE * 4), %rax
- addq $(VEC_SIZE * 4), %rdx
-L(loop_start):
- testl %esi, %esi
- leal -1(%esi), %esi
- je L(loop_cross_page)
-L(back_to_loop):
- /* Main loop, comparing 4 vectors are a time. */
- vmovdqa (%rax), %ymm0
- vmovdqa VEC_SIZE(%rax), %ymm3
- VPCMPEQ (%rdx), %ymm0, %ymm4
- VPCMPEQ VEC_SIZE(%rdx), %ymm3, %ymm1
- VPMINU %ymm0, %ymm4, %ymm4
- VPMINU %ymm3, %ymm1, %ymm1
- vmovdqa (VEC_SIZE * 2)(%rax), %ymm2
- VPMINU %ymm1, %ymm4, %ymm0
- vmovdqa (VEC_SIZE * 3)(%rax), %ymm3
- VPCMPEQ (VEC_SIZE * 2)(%rdx), %ymm2, %ymm5
- VPCMPEQ (VEC_SIZE * 3)(%rdx), %ymm3, %ymm6
- VPMINU %ymm2, %ymm5, %ymm5
- VPMINU %ymm3, %ymm6, %ymm6
- VPMINU %ymm5, %ymm0, %ymm0
- VPMINU %ymm6, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm0, %ymm0
-
- /* Test each mask (32 bits) individually because for VEC_SIZE
- == 32 is not possible to OR the four masks and keep all bits
- in a 64-bit integer register, differing from SSE2 strcmp
- where ORing is possible. */
- vpmovmskb %ymm0, %ecx
+ subq $(VEC_SIZE * 4), %rdx
+ jbe L(ret_zero)
+# endif
+
+ subq $-(VEC_SIZE * 4), %rdi
+ subq $-(VEC_SIZE * 4), %rsi
+
+ /* Check if rsi loads will cross a page boundary. */
+ addl $-(VEC_SIZE * 4), %eax
+ jnb L(page_cross_during_loop)
+
+ /* Loop entry after handling page cross during loop. */
+L(loop_skip_page_cross_check):
+ VMOVA (VEC_SIZE * 0)(%rdi), %ymm0
+ VMOVA (VEC_SIZE * 1)(%rdi), %ymm2
+ VMOVA (VEC_SIZE * 2)(%rdi), %ymm4
+ VMOVA (VEC_SIZE * 3)(%rdi), %ymm6
+
+ /* ymm1 all 1s where s1 and s2 equal. All 0s otherwise. */
+ CMP_R1_S2_ymm (%ymm0, (VEC_SIZE * 0)(%rsi), %ymm3, %ymm1)
+ CMP_R1_S2_ymm (%ymm2, (VEC_SIZE * 1)(%rsi), %ymm5, %ymm3)
+ CMP_R1_S2_ymm (%ymm4, (VEC_SIZE * 2)(%rsi), %ymm7, %ymm5)
+ CMP_R1_S2_ymm (%ymm6, (VEC_SIZE * 3)(%rsi), %ymm13, %ymm7)
+
+ /* If any mismatches or null CHAR then 0 CHAR, otherwise non-
+ zero. */
+ vpand %ymm0, %ymm1, %ymm1
+
+
+ vpand %ymm2, %ymm3, %ymm3
+ vpand %ymm4, %ymm5, %ymm5
+ vpand %ymm6, %ymm7, %ymm7
+
+ VPMINU %ymm1, %ymm3, %ymm3
+ VPMINU %ymm5, %ymm7, %ymm7
+
+ /* Reduce all 0 CHARs for the 4x VEC into ymm7. */
+ VPMINU %ymm3, %ymm7, %ymm7
+
+ /* If any 0 CHAR then done. */
+ VPCMPEQ %ymm7, %ymmZERO, %ymm7
+ vpmovmskb %ymm7, %LOOP_REG
+ testl %LOOP_REG, %LOOP_REG
+ jz L(loop)
+
+ /* Find which VEC has the mismatch of end of string. */
+ VPCMPEQ %ymm1, %ymmZERO, %ymm1
+ vpmovmskb %ymm1, %ecx
testl %ecx, %ecx
- je L(loop)
- VPCMPEQ %ymm7, %ymm4, %ymm0
- vpmovmskb %ymm0, %edi
- testl %edi, %edi
- je L(test_vec)
- tzcntl %edi, %ecx
+ jnz L(return_vec_0_end)
+
+
+ VPCMPEQ %ymm3, %ymmZERO, %ymm3
+ vpmovmskb %ymm3, %ecx
+ testl %ecx, %ecx
+ jnz L(return_vec_1_end)
+
+L(return_vec_2_3_end):
# ifdef USE_AS_STRNCMP
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ subq $(VEC_SIZE * 2), %rdx
+ jbe L(ret_zero_end)
+# endif
+
+ VPCMPEQ %ymm5, %ymmZERO, %ymm5
+ vpmovmskb %ymm5, %ecx
+ testl %ecx, %ecx
+ jnz L(return_vec_2_end)
+
+ /* LOOP_REG contains matches for null/mismatch from the loop. If
+ VEC 0,1,and 2 all have no null and no mismatches then mismatch
+ must entirely be from VEC 3 which is fully represented by
+ LOOP_REG. */
+ tzcntl %LOOP_REG, %LOOP_REG
+
+# ifdef USE_AS_STRNCMP
+ subl $-(VEC_SIZE), %LOOP_REG
+ cmpq %LOOP_REG64, %rdx
+ jbe L(ret_zero_end)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 2 - VEC_OFFSET)(%rdi, %LOOP_REG64), %ecx
xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ cmpl (VEC_SIZE * 2 - VEC_OFFSET)(%rsi, %LOOP_REG64), %ecx
+ je L(ret5)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rdi, %LOOP_REG64), %eax
+ movzbl (VEC_SIZE * 2 - VEC_OFFSET)(%rsi, %LOOP_REG64), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret5):
VZEROUPPER_RETURN
- .p2align 4
-L(test_vec):
# ifdef USE_AS_STRNCMP
- /* The first vector matched. Return 0 if the maximum offset
- (%r11) <= VEC_SIZE. */
- cmpq $VEC_SIZE, %r11
- jbe L(zero)
+ .p2align 4,, 2
+L(ret_zero_end):
+ xorl %eax, %eax
+ VZEROUPPER_RETURN
# endif
- VPCMPEQ %ymm7, %ymm1, %ymm1
- vpmovmskb %ymm1, %ecx
- testl %ecx, %ecx
- je L(test_2_vec)
- tzcntl %ecx, %edi
+
+
+ /* The L(return_vec_N_end) differ from L(return_vec_N) in that
+ they use the value of `r8` to negate the return value. This is
+ because the page cross logic can swap `rdi` and `rsi`. */
+ .p2align 4,, 10
# ifdef USE_AS_STRNCMP
- addq $VEC_SIZE, %rdi
- cmpq %rdi, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+L(return_vec_1_end):
+ salq $32, %rcx
+# endif
+L(return_vec_0_end):
+# ifndef USE_AS_STRNCMP
+ tzcntl %ecx, %ecx
+# else
+ tzcntq %rcx, %rcx
+ cmpq %rcx, %rdx
+ jbe L(ret_zero_end)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl (%rdi, %rcx), %edx
xorl %eax, %eax
- movl (%rsi, %rdi), %ecx
- cmpl (%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rdi), %eax
- movzbl (%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ cmpl (%rsi, %rcx), %edx
+ je L(ret6)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
+# endif
+L(ret6):
+ VZEROUPPER_RETURN
+
+# ifndef USE_AS_STRNCMP
+ .p2align 4,, 10
+L(return_vec_1_end):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ movl VEC_SIZE(%rdi, %rcx), %edx
xorl %eax, %eax
- movl VEC_SIZE(%rsi, %rdi), %ecx
- cmpl VEC_SIZE(%rdx, %rdi), %ecx
- jne L(wcscmp_return)
+ cmpl VEC_SIZE(%rsi, %rcx), %edx
+ je L(ret7)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
- movzbl VEC_SIZE(%rax, %rdi), %eax
- movzbl VEC_SIZE(%rdx, %rdi), %edx
- subl %edx, %eax
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
+ movzbl VEC_SIZE(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
-# endif
+L(ret7):
VZEROUPPER_RETURN
+# endif
- .p2align 4
-L(test_2_vec):
+ .p2align 4,, 10
+L(return_vec_2_end):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_STRNCMP
- /* The first 2 vectors matched. Return 0 if the maximum offset
- (%r11) <= 2 * VEC_SIZE. */
- cmpq $(VEC_SIZE * 2), %r11
- jbe L(zero)
+ cmpq %rcx, %rdx
+ jbe L(ret_zero_page_cross)
# endif
- VPCMPEQ %ymm7, %ymm5, %ymm5
- vpmovmskb %ymm5, %ecx
- testl %ecx, %ecx
- je L(test_3_vec)
- tzcntl %ecx, %edi
-# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 2), %rdi
- cmpq %rdi, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 2)(%rdi, %rcx), %edx
xorl %eax, %eax
- movl (%rsi, %rdi), %ecx
- cmpl (%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rdi), %eax
- movzbl (%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ cmpl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ je L(ret11)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx
- cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax
- movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret11):
VZEROUPPER_RETURN
- .p2align 4
-L(test_3_vec):
+
+ /* Page cross in rsi in next 4x VEC. */
+
+ /* TODO: Improve logic here. */
+ .p2align 4,, 10
+L(page_cross_during_loop):
+ /* eax contains [distance_from_page - (VEC_SIZE * 4)]. */
+
+ /* Optimistically rsi and rdi and both aligned inwhich case we
+ don't need any logic here. */
+ cmpl $-(VEC_SIZE * 4), %eax
+ /* Don't adjust eax before jumping back to loop and we will
+ never hit page cross case again. */
+ je L(loop_skip_page_cross_check)
+
+ /* Check if we can safely load a VEC. */
+ cmpl $-(VEC_SIZE * 3), %eax
+ jle L(less_1x_vec_till_page_cross)
+
+ VMOVA (%rdi), %ymm0
+ CMP_R1_S2_ymm (%ymm0, (%rsi), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_0_end)
+
+ /* if distance >= 2x VEC then eax > -(VEC_SIZE * 2). */
+ cmpl $-(VEC_SIZE * 2), %eax
+ jg L(more_2x_vec_till_page_cross)
+
+ .p2align 4,, 4
+L(less_1x_vec_till_page_cross):
+ subl $-(VEC_SIZE * 4), %eax
+ /* Guranteed safe to read from rdi - VEC_SIZE here. The only
+ concerning case is first iteration if incoming s1 was near start
+ of a page and s2 near end. If s1 was near the start of the page
+ we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe
+ to read back -VEC_SIZE. If rdi is truly at the start of a page
+ here, it means the previous page (rdi - VEC_SIZE) has already
+ been loaded earlier so must be valid. */
+ VMOVU -VEC_SIZE(%rdi, %rax), %ymm0
+ CMP_R1_S2_ymm (%ymm0, -VEC_SIZE(%rsi, %rax), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+
+ /* Mask of potentially valid bits. The lower bits can be out of
+ range comparisons (but safe regarding page crosses). */
+ movl $-1, %r10d
+ shlxl %esi, %r10d, %r10d
+ notl %ecx
+
# ifdef USE_AS_STRNCMP
- /* The first 3 vectors matched. Return 0 if the maximum offset
- (%r11) <= 3 * VEC_SIZE. */
- cmpq $(VEC_SIZE * 3), %r11
- jbe L(zero)
-# endif
- VPCMPEQ %ymm7, %ymm6, %ymm6
- vpmovmskb %ymm6, %esi
- tzcntl %esi, %ecx
+ cmpq %rax, %rdx
+ jbe L(return_page_cross_end_check)
+# endif
+ movl %eax, %OFFSET_REG
+ addl $(PAGE_SIZE - VEC_SIZE * 4), %eax
+
+ andl %r10d, %ecx
+ jz L(loop_skip_page_cross_check)
+
+ .p2align 4,, 3
+L(return_page_cross_end):
+ tzcntl %ecx, %ecx
+
# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 3), %rcx
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %esi
- cmpl (%rdx, %rcx), %esi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ leal -VEC_SIZE(%OFFSET_REG64, %rcx), %ecx
+L(return_page_cross_cmp_mem):
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ addl %OFFSET_REG, %ecx
+# endif
+# ifdef USE_AS_WCSCMP
+ movl VEC_OFFSET(%rdi, %rcx), %edx
xorl %eax, %eax
- movl (VEC_SIZE * 3)(%rsi, %rcx), %esi
- cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax
- movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ cmpl VEC_OFFSET(%rsi, %rcx), %edx
+ je L(ret8)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
+# else
+ movzbl VEC_OFFSET(%rdi, %rcx), %eax
+ movzbl VEC_OFFSET(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret8):
VZEROUPPER_RETURN
- .p2align 4
-L(loop_cross_page):
- xorl %r10d, %r10d
- movq %rdx, %rcx
- /* Align load via RDX. We load the extra ECX bytes which should
- be ignored. */
- andl $((VEC_SIZE * 4) - 1), %ecx
- /* R10 is -RCX. */
- subq %rcx, %r10
-
- /* This works only if VEC_SIZE * 2 == 64. */
-# if (VEC_SIZE * 2) != 64
-# error (VEC_SIZE * 2) != 64
-# endif
-
- /* Check if the first VEC_SIZE * 2 bytes should be ignored. */
- cmpl $(VEC_SIZE * 2), %ecx
- jge L(loop_cross_page_2_vec)
-
- vmovdqu (%rax, %r10), %ymm2
- vmovdqu VEC_SIZE(%rax, %r10), %ymm3
- VPCMPEQ (%rdx, %r10), %ymm2, %ymm0
- VPCMPEQ VEC_SIZE(%rdx, %r10), %ymm3, %ymm1
- VPMINU %ymm2, %ymm0, %ymm0
- VPMINU %ymm3, %ymm1, %ymm1
- VPCMPEQ %ymm7, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm1, %ymm1
-
- vpmovmskb %ymm0, %edi
- vpmovmskb %ymm1, %esi
-
- salq $32, %rsi
- xorq %rsi, %rdi
-
- /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */
- shrq %cl, %rdi
-
- testq %rdi, %rdi
- je L(loop_cross_page_2_vec)
- tzcntq %rdi, %rcx
# ifdef USE_AS_STRNCMP
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ .p2align 4,, 10
+L(return_page_cross_end_check):
+ andl %r10d, %ecx
+ tzcntl %ecx, %ecx
+ leal -VEC_SIZE(%rax, %rcx), %ecx
+ cmpl %ecx, %edx
+ ja L(return_page_cross_cmp_mem)
xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
-# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
-# endif
VZEROUPPER_RETURN
+# endif
- .p2align 4
-L(loop_cross_page_2_vec):
- /* The first VEC_SIZE * 2 bytes match or are ignored. */
- vmovdqu (VEC_SIZE * 2)(%rax, %r10), %ymm2
- vmovdqu (VEC_SIZE * 3)(%rax, %r10), %ymm3
- VPCMPEQ (VEC_SIZE * 2)(%rdx, %r10), %ymm2, %ymm5
- VPMINU %ymm2, %ymm5, %ymm5
- VPCMPEQ (VEC_SIZE * 3)(%rdx, %r10), %ymm3, %ymm6
- VPCMPEQ %ymm7, %ymm5, %ymm5
- VPMINU %ymm3, %ymm6, %ymm6
- VPCMPEQ %ymm7, %ymm6, %ymm6
-
- vpmovmskb %ymm5, %edi
- vpmovmskb %ymm6, %esi
-
- salq $32, %rsi
- xorq %rsi, %rdi
- xorl %r8d, %r8d
- /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
- subl $(VEC_SIZE * 2), %ecx
- jle 1f
- /* Skip ECX bytes. */
- shrq %cl, %rdi
- /* R8 has number of bytes skipped. */
- movl %ecx, %r8d
-1:
- /* Before jumping back to the loop, set ESI to the number of
- VEC_SIZE * 4 blocks before page crossing. */
- movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
-
- testq %rdi, %rdi
+ .p2align 4,, 10
+L(more_2x_vec_till_page_cross):
+ /* If more 2x vec till cross we will complete a full loop
+ iteration here. */
+
+ VMOVU VEC_SIZE(%rdi), %ymm0
+ CMP_R1_S2_ymm (%ymm0, VEC_SIZE(%rsi), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_1_end)
+
# ifdef USE_AS_STRNCMP
- /* At this point, if %rdi value is 0, it already tested
- VEC_SIZE*4+%r10 byte starting from %rax. This label
- checks whether strncmp maximum offset reached or not. */
- je L(string_nbyte_offset_check)
-# else
- je L(back_to_loop)
+ cmpq $(VEC_SIZE * 2), %rdx
+ jbe L(ret_zero_in_loop_page_cross)
# endif
- tzcntq %rdi, %rcx
- addq %r10, %rcx
- /* Adjust for number of bytes skipped. */
- addq %r8, %rcx
+
+ subl $-(VEC_SIZE * 4), %eax
+
+ /* Safe to include comparisons from lower bytes. */
+ VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %ymm0
+ CMP_R1_S2_ymm (%ymm0, -(VEC_SIZE * 2)(%rsi, %rax), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_page_cross_0)
+
+ VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %ymm0
+ CMP_R1_S2_ymm (%ymm0, -(VEC_SIZE * 1)(%rsi, %rax), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+ jnz L(return_vec_page_cross_1)
+
# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 2), %rcx
- subq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ /* Must check length here as length might proclude reading next
+ page. */
+ cmpq %rax, %rdx
+ jbe L(ret_zero_in_loop_page_cross)
+# endif
+
+ /* Finish the loop. */
+ VMOVA (VEC_SIZE * 2)(%rdi), %ymm4
+ VMOVA (VEC_SIZE * 3)(%rdi), %ymm6
+
+ CMP_R1_S2_ymm (%ymm4, (VEC_SIZE * 2)(%rsi), %ymm7, %ymm5)
+ CMP_R1_S2_ymm (%ymm6, (VEC_SIZE * 3)(%rsi), %ymm13, %ymm7)
+ vpand %ymm4, %ymm5, %ymm5
+ vpand %ymm6, %ymm7, %ymm7
+ VPMINU %ymm5, %ymm7, %ymm7
+ VPCMPEQ %ymm7, %ymmZERO, %ymm7
+ vpmovmskb %ymm7, %LOOP_REG
+ testl %LOOP_REG, %LOOP_REG
+ jnz L(return_vec_2_3_end)
+
+ /* Best for code size to include ucond-jmp here. Would be faster
+ if this case is hot to duplicate the L(return_vec_2_3_end) code
+ as fall-through and have jump back to loop on mismatch
+ comparison. */
+ subq $-(VEC_SIZE * 4), %rdi
+ subq $-(VEC_SIZE * 4), %rsi
+ addl $(PAGE_SIZE - VEC_SIZE * 8), %eax
+# ifdef USE_AS_STRNCMP
+ subq $(VEC_SIZE * 4), %rdx
+ ja L(loop_skip_page_cross_check)
+L(ret_zero_in_loop_page_cross):
xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ VZEROUPPER_RETURN
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rsi, %rcx), %edi
- cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax
- movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ jmp L(loop_skip_page_cross_check)
# endif
- VZEROUPPER_RETURN
+
+ .p2align 4,, 10
+L(return_vec_page_cross_0):
+ addl $-VEC_SIZE, %eax
+L(return_vec_page_cross_1):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_STRNCMP
-L(string_nbyte_offset_check):
- leaq (VEC_SIZE * 4)(%r10), %r10
- cmpq %r10, %r11
- jbe L(zero)
- jmp L(back_to_loop)
+ leal -VEC_SIZE(%rax, %rcx), %ecx
+ cmpq %rcx, %rdx
+ jbe L(ret_zero_in_loop_page_cross)
+# else
+ addl %eax, %ecx
# endif
- .p2align 4
-L(cross_page_loop):
- /* Check one byte/dword at a time. */
# ifdef USE_AS_WCSCMP
- cmpl %ecx, %eax
+ movl VEC_OFFSET(%rdi, %rcx), %edx
+ xorl %eax, %eax
+ cmpl VEC_OFFSET(%rsi, %rcx), %edx
+ je L(ret9)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
+ movzbl VEC_OFFSET(%rdi, %rcx), %eax
+ movzbl VEC_OFFSET(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
- jne L(different)
- addl $SIZE_OF_CHAR, %edx
- cmpl $(VEC_SIZE * 4), %edx
- je L(main_loop_header)
-# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
+L(ret9):
+ VZEROUPPER_RETURN
+
+
+ .p2align 4,, 10
+L(page_cross):
+# ifndef USE_AS_STRNCMP
+ /* If both are VEC aligned we don't need any special logic here.
+ Only valid for strcmp where stop condition is guranteed to be
+ reachable by just reading memory. */
+ testl $((VEC_SIZE - 1) << 20), %eax
+ jz L(no_page_cross)
# endif
+
+ movl %edi, %eax
+ movl %esi, %ecx
+ andl $(PAGE_SIZE - 1), %eax
+ andl $(PAGE_SIZE - 1), %ecx
+
+ xorl %OFFSET_REG, %OFFSET_REG
+
+ /* Check which is closer to page cross, s1 or s2. */
+ cmpl %eax, %ecx
+ jg L(page_cross_s2)
+
+ /* The previous page cross check has false positives. Check for
+ true positive as page cross logic is very expensive. */
+ subl $(PAGE_SIZE - VEC_SIZE * 4), %eax
+ jbe L(no_page_cross)
+
+ /* Set r8 to not interfere with normal return value (rdi and rsi
+ did not swap). */
# ifdef USE_AS_WCSCMP
- movl (%rdi, %rdx), %eax
- movl (%rsi, %rdx), %ecx
+ /* any non-zero positive value that doesn't inference with 0x1.
+ */
+ movl $2, %r8d
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %ecx
+ xorl %r8d, %r8d
# endif
- /* Check null char. */
- testl %eax, %eax
- jne L(cross_page_loop)
- /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
- comparisons. */
- subl %ecx, %eax
-# ifndef USE_AS_WCSCMP
-L(different):
+
+ /* Check if less than 1x VEC till page cross. */
+ subl $(VEC_SIZE * 3), %eax
+ jg L(less_1x_vec_till_page)
+
+ /* If more than 1x VEC till page cross, loop throuh safely
+ loadable memory until within 1x VEC of page cross. */
+
+ .p2align 4,, 10
+L(page_cross_loop):
+
+ VMOVU (%rdi, %OFFSET_REG64), %ymm0
+ CMP_R1_S2_ymm (%ymm0, (%rsi, %OFFSET_REG64), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+ incl %ecx
+
+ jnz L(check_ret_vec_page_cross)
+ addl $VEC_SIZE, %OFFSET_REG
+# ifdef USE_AS_STRNCMP
+ cmpq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross)
# endif
- VZEROUPPER_RETURN
+ addl $VEC_SIZE, %eax
+ jl L(page_cross_loop)
+
+ subl %eax, %OFFSET_REG
+ /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed
+ to not cross page so is safe to load. Since we have already
+ loaded at least 1 VEC from rsi it is also guranteed to be
+ safe. */
+
+ VMOVU (%rdi, %OFFSET_REG64), %ymm0
+ CMP_R1_S2_ymm (%ymm0, (%rsi, %OFFSET_REG64), %ymm2, %ymm1)
+ VPCMPEQ %ymm0, %ymmZERO, %ymm2
+ vpandn %ymm1, %ymm2, %ymm1
+ vpmovmskb %ymm1, %ecx
+
+# ifdef USE_AS_STRNCMP
+ leal VEC_SIZE(%OFFSET_REG64), %eax
+ cmpq %rax, %rdx
+ jbe L(check_ret_vec_page_cross2)
+ addq %rdi, %rdx
+# endif
+ incl %ecx
+ jz L(prepare_loop_no_len)
+ .p2align 4,, 4
+L(ret_vec_page_cross):
+# ifndef USE_AS_STRNCMP
+L(check_ret_vec_page_cross):
+# endif
+ tzcntl %ecx, %ecx
+ addl %OFFSET_REG, %ecx
+L(ret_vec_page_cross_cont):
# ifdef USE_AS_WCSCMP
- .p2align 4
-L(different):
- /* Use movl to avoid modifying EFLAGS. */
- movl $0, %eax
+ movl (%rdi, %rcx), %edx
+ xorl %eax, %eax
+ cmpl (%rsi, %rcx), %edx
+ je L(ret12)
setl %al
negl %eax
- orl $1, %eax
- VZEROUPPER_RETURN
+ xorl %r8d, %eax
+# else
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret12):
+ VZEROUPPER_RETURN
# ifdef USE_AS_STRNCMP
- .p2align 4
-L(zero):
+ .p2align 4,, 10
+L(check_ret_vec_page_cross2):
+ incl %ecx
+L(check_ret_vec_page_cross):
+ tzcntl %ecx, %ecx
+ addl %OFFSET_REG, %ecx
+ cmpq %rcx, %rdx
+ ja L(ret_vec_page_cross_cont)
+ .p2align 4,, 2
+L(ret_zero_page_cross):
xorl %eax, %eax
VZEROUPPER_RETURN
+# endif
- .p2align 4
-L(char0):
-# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (%rdi), %ecx
- cmpl (%rsi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rsi), %ecx
- movzbl (%rdi), %eax
- subl %ecx, %eax
-# endif
- VZEROUPPER_RETURN
+ .p2align 4,, 4
+L(page_cross_s2):
+ /* Ensure this is a true page cross. */
+ subl $(PAGE_SIZE - VEC_SIZE * 4), %ecx
+ jbe L(no_page_cross)
+
+
+ movl %ecx, %eax
+ movq %rdi, %rcx
+ movq %rsi, %rdi
+ movq %rcx, %rsi
+
+ /* set r8 to negate return value as rdi and rsi swapped. */
+# ifdef USE_AS_WCSCMP
+ movl $-4, %r8d
+# else
+ movl $-1, %r8d
# endif
+ xorl %OFFSET_REG, %OFFSET_REG
- .p2align 4
-L(last_vector):
- addq %rdx, %rdi
- addq %rdx, %rsi
+ /* Check if more than 1x VEC till page cross. */
+ subl $(VEC_SIZE * 3), %eax
+ jle L(page_cross_loop)
+
+ .p2align 4,, 6
+L(less_1x_vec_till_page):
+ /* Find largest load size we can use. */
+ cmpl $16, %eax
+ ja L(less_16_till_page)
+
+ VMOVU (%rdi), %xmm0
+ CMP_R1_S2_xmm (%xmm0, (%rsi), %xmm2, %xmm1)
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ incw %cx
+ jnz L(check_ret_vec_page_cross)
+ movl $16, %OFFSET_REG
# ifdef USE_AS_STRNCMP
- subq %rdx, %r11
+ cmpq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subl %eax, %OFFSET_REG
+# else
+ /* Explicit check for 16 byte alignment. */
+ subl %eax, %OFFSET_REG
+ jz L(prepare_loop)
# endif
- tzcntl %ecx, %edx
+
+ VMOVU (%rdi, %OFFSET_REG64), %xmm0
+ CMP_R1_S2_xmm (%xmm0, (%rsi, %OFFSET_REG64), %xmm2, %xmm1)
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ incw %cx
+ jnz L(check_ret_vec_page_cross)
+
# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
+ addl $16, %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subq $-(VEC_SIZE * 4), %rdx
+
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
+# else
+ leaq (16 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq (16 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
# endif
-# ifdef USE_AS_WCSCMP
+ jmp L(prepare_loop_aligned)
+
+# ifdef USE_AS_STRNCMP
+ .p2align 4,, 2
+L(ret_zero_page_cross_slow_case0):
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ ret
# endif
- VZEROUPPER_RETURN
- /* Comparing on page boundary region requires special treatment:
- It must done one vector at the time, starting with the wider
- ymm vector if possible, if not, with xmm. If fetching 16 bytes
- (xmm) still passes the boundary, byte comparison must be done.
- */
- .p2align 4
-L(cross_page):
- /* Try one ymm vector at a time. */
- cmpl $(PAGE_SIZE - VEC_SIZE), %eax
- jg L(cross_page_1_vector)
-L(loop_1_vector):
- vmovdqu (%rdi, %rdx), %ymm1
- VPCMPEQ (%rsi, %rdx), %ymm1, %ymm0
- VPMINU %ymm1, %ymm0, %ymm0
- VPCMPEQ %ymm7, %ymm0, %ymm0
- vpmovmskb %ymm0, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
- addl $VEC_SIZE, %edx
+ .p2align 4,, 10
+L(less_16_till_page):
+ /* Find largest load size we can use. */
+ cmpl $24, %eax
+ ja L(less_8_till_page)
- addl $VEC_SIZE, %eax
-# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
-# endif
- cmpl $(PAGE_SIZE - VEC_SIZE), %eax
- jle L(loop_1_vector)
-L(cross_page_1_vector):
- /* Less than 32 bytes to check, try one xmm vector. */
- cmpl $(PAGE_SIZE - 16), %eax
- jg L(cross_page_1_xmm)
- vmovdqu (%rdi, %rdx), %xmm1
- VPCMPEQ (%rsi, %rdx), %xmm1, %xmm0
- VPMINU %xmm1, %xmm0, %xmm0
- VPCMPEQ %xmm7, %xmm0, %xmm0
- vpmovmskb %xmm0, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
+ vmovq (%rdi), %xmm0
+ vmovq (%rsi), %xmm1
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1)
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ incb %cl
+ jnz L(check_ret_vec_page_cross)
- addl $16, %edx
-# ifndef USE_AS_WCSCMP
- addl $16, %eax
+
+# ifdef USE_AS_STRNCMP
+ cmpq $8, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
# endif
+ movl $24, %OFFSET_REG
+ /* Explicit check for 16 byte alignment. */
+ subl %eax, %OFFSET_REG
+
+
+
+ vmovq (%rdi, %OFFSET_REG64), %xmm0
+ vmovq (%rsi, %OFFSET_REG64), %xmm1
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1)
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ incb %cl
+ jnz L(check_ret_vec_page_cross)
+
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
-# endif
-
-L(cross_page_1_xmm):
-# ifndef USE_AS_WCSCMP
- /* Less than 16 bytes to check, try 8 byte vector. NB: No need
- for wcscmp nor wcsncmp since wide char is 4 bytes. */
- cmpl $(PAGE_SIZE - 8), %eax
- jg L(cross_page_8bytes)
- vmovq (%rdi, %rdx), %xmm1
- vmovq (%rsi, %rdx), %xmm0
- VPCMPEQ %xmm0, %xmm1, %xmm0
- VPMINU %xmm1, %xmm0, %xmm0
- VPCMPEQ %xmm7, %xmm0, %xmm0
- vpmovmskb %xmm0, %ecx
- /* Only last 8 bits are valid. */
- andl $0xff, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
+ addl $8, %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subq $-(VEC_SIZE * 4), %rdx
+
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
+# else
+ leaq (8 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq (8 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
+# endif
+ jmp L(prepare_loop_aligned)
+
- addl $8, %edx
- addl $8, %eax
+ .p2align 4,, 10
+L(less_8_till_page):
+# ifdef USE_AS_WCSCMP
+ /* If using wchar then this is the only check before we reach
+ the page boundary. */
+ movl (%rdi), %eax
+ movl (%rsi), %ecx
+ cmpl %ecx, %eax
+ jnz L(ret_less_8_wcs)
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ addq %rdi, %rdx
+ /* We already checked for len <= 1 so cannot hit that case here.
+ */
# endif
+ testl %eax, %eax
+ jnz L(prepare_loop_no_len)
+ ret
-L(cross_page_8bytes):
- /* Less than 8 bytes to check, try 4 byte vector. */
- cmpl $(PAGE_SIZE - 4), %eax
- jg L(cross_page_4bytes)
- vmovd (%rdi, %rdx), %xmm1
- vmovd (%rsi, %rdx), %xmm0
- VPCMPEQ %xmm0, %xmm1, %xmm0
- VPMINU %xmm1, %xmm0, %xmm0
- VPCMPEQ %xmm7, %xmm0, %xmm0
- vpmovmskb %xmm0, %ecx
- /* Only last 4 bits are valid. */
- andl $0xf, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
+ .p2align 4,, 8
+L(ret_less_8_wcs):
+ setl %OFFSET_REG8
+ negl %OFFSET_REG
+ movl %OFFSET_REG, %eax
+ xorl %r8d, %eax
+ ret
+
+# else
+
+ /* Find largest load size we can use. */
+ cmpl $28, %eax
+ ja L(less_4_till_page)
+
+ vmovd (%rdi), %xmm0
+ vmovd (%rsi), %xmm1
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1)
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ subl $0xf, %ecx
+ jnz L(check_ret_vec_page_cross)
- addl $4, %edx
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq $4, %rdx
+ jbe L(ret_zero_page_cross_slow_case1)
# endif
+ movl $28, %OFFSET_REG
+ /* Explicit check for 16 byte alignment. */
+ subl %eax, %OFFSET_REG
-L(cross_page_4bytes):
-# endif
- /* Less than 4 bytes to check, try one byte/dword at a time. */
-# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
-# endif
-# ifdef USE_AS_WCSCMP
- movl (%rdi, %rdx), %eax
- movl (%rsi, %rdx), %ecx
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %ecx
+
+
+ vmovd (%rdi, %OFFSET_REG64), %xmm0
+ vmovd (%rsi, %OFFSET_REG64), %xmm1
+ VPCMPEQ %xmm0, %xmmZERO, %xmm2
+ CMP_R1_R2_xmm (%xmm0, %xmm1, %xmm3, %xmm1)
+ vpandn %xmm1, %xmm2, %xmm1
+ vpmovmskb %ymm1, %ecx
+ subl $0xf, %ecx
+ jnz L(check_ret_vec_page_cross)
+
+# ifdef USE_AS_STRNCMP
+ addl $4, %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case1)
+ subq $-(VEC_SIZE * 4), %rdx
+
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
+# else
+ leaq (4 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64), %rdi
+ leaq (4 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64), %rsi
+# endif
+ jmp L(prepare_loop_aligned)
+
+# ifdef USE_AS_STRNCMP
+ .p2align 4,, 2
+L(ret_zero_page_cross_slow_case1):
+ xorl %eax, %eax
+ ret
+# endif
+
+ .p2align 4,, 10
+L(less_4_till_page):
+ subq %rdi, %rsi
+ /* Extremely slow byte comparison loop. */
+L(less_4_loop):
+ movzbl (%rdi), %eax
+ movzbl (%rsi, %rdi), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %BYTE_LOOP_REG)
+ subl %BYTE_LOOP_REG, %eax
+ jnz L(ret_less_4_loop)
+ testl %ecx, %ecx
+ jz L(ret_zero_4_loop)
+# ifdef USE_AS_STRNCMP
+ decq %rdx
+ jz L(ret_zero_4_loop)
+# endif
+ incq %rdi
+ /* end condition is reach page boundary (rdi is aligned). */
+ testl $31, %edi
+ jnz L(less_4_loop)
+ leaq -(VEC_SIZE * 4)(%rdi, %rsi), %rsi
+ addq $-(VEC_SIZE * 4), %rdi
+# ifdef USE_AS_STRNCMP
+ subq $-(VEC_SIZE * 4), %rdx
+# endif
+ jmp L(prepare_loop_aligned)
+
+L(ret_zero_4_loop):
+ xorl %eax, %eax
+ ret
+L(ret_less_4_loop):
+ xorl %r8d, %eax
+ subl %r8d, %eax
+ ret
# endif
- testl %eax, %eax
- jne L(cross_page_loop)
- subl %ecx, %eax
- VZEROUPPER_RETURN
-END (STRCMP)
+ cfi_endproc
+ .size STRCMP, .-STRCMP
#endif
#if IS_IN (libc)
# include <sysdep.h>
+# if defined USE_AS_STRCASECMP_L
+# include "locale-defines.h"
+# endif
# ifndef STRCMP
# define STRCMP __strcmp_evex
# define PAGE_SIZE 4096
-/* VEC_SIZE = Number of bytes in a ymm register */
+ /* VEC_SIZE = Number of bytes in a ymm register. */
# define VEC_SIZE 32
+# define CHAR_PER_VEC (VEC_SIZE / SIZE_OF_CHAR)
-/* Shift for dividing by (VEC_SIZE * 4). */
-# define DIVIDE_BY_VEC_4_SHIFT 7
-# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
-# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
-# endif
-
-# define VMOVU vmovdqu64
-# define VMOVA vmovdqa64
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
# ifdef USE_AS_WCSCMP
-/* Compare packed dwords. */
-# define VPCMP vpcmpd
-# define SHIFT_REG32 r8d
-# define SHIFT_REG64 r8
-/* 1 dword char == 4 bytes. */
+# ifndef OVERFLOW_STRCMP
+# define OVERFLOW_STRCMP __wcscmp_evex
+# endif
+
+# define TESTEQ subl $0xff,
+ /* Compare packed dwords. */
+# define VPCMP vpcmpd
+# define VPMINU vpminud
+# define VPTESTM vptestmd
+# define VPTESTNM vptestnmd
+ /* 1 dword char == 4 bytes. */
# define SIZE_OF_CHAR 4
# else
-/* Compare packed bytes. */
-# define VPCMP vpcmpb
-# define SHIFT_REG32 ecx
-# define SHIFT_REG64 rcx
-/* 1 byte char == 1 byte. */
+# ifndef OVERFLOW_STRCMP
+# define OVERFLOW_STRCMP __strcmp_evex
+# endif
+
+# define TESTEQ incl
+ /* Compare packed bytes. */
+# define VPCMP vpcmpb
+# define VPMINU vpminub
+# define VPTESTM vptestmb
+# define VPTESTNM vptestnmb
+ /* 1 byte char == 1 byte. */
# define SIZE_OF_CHAR 1
# endif
-# define XMMZERO xmm16
-# define XMM0 xmm17
-# define XMM1 xmm18
+# ifdef USE_AS_STRNCMP
+# define LOOP_REG r9d
+# define LOOP_REG64 r9
+
+# define OFFSET_REG8 r9b
+# define OFFSET_REG r9d
+# define OFFSET_REG64 r9
+# else
+# define LOOP_REG edx
+# define LOOP_REG64 rdx
+
+# define OFFSET_REG8 dl
+# define OFFSET_REG edx
+# define OFFSET_REG64 rdx
+# endif
+
+# if defined USE_AS_STRNCMP || defined USE_AS_WCSCMP
+# define VEC_OFFSET 0
+# else
+# define VEC_OFFSET (-VEC_SIZE)
+# endif
+
+# define XMM0 xmm17
+# define XMM1 xmm18
+
+# define XMM10 xmm27
+# define XMM11 xmm28
+# define XMM12 xmm29
+# define XMM13 xmm30
+# define XMM14 xmm31
+
+
+# define YMM0 ymm17
+# define YMM1 ymm18
+# define YMM2 ymm19
+# define YMM3 ymm20
+# define YMM4 ymm21
+# define YMM5 ymm22
+# define YMM6 ymm23
+# define YMM7 ymm24
+# define YMM8 ymm25
+# define YMM9 ymm26
+# define YMM10 ymm27
+# define YMM11 ymm28
+# define YMM12 ymm29
+# define YMM13 ymm30
+# define YMM14 ymm31
+
+# ifdef USE_AS_STRCASECMP_L
+# define BYTE_LOOP_REG OFFSET_REG
+# else
+# define BYTE_LOOP_REG ecx
+# endif
+
+# ifdef USE_AS_STRCASECMP_L
+# ifdef USE_AS_STRNCMP
+# define STRCASECMP __strncasecmp_evex
+# define LOCALE_REG rcx
+# define LOCALE_REG_LP RCX_LP
+# define STRCASECMP_NONASCII __strncasecmp_l_nonascii
+# else
+# define STRCASECMP __strcasecmp_evex
+# define LOCALE_REG rdx
+# define LOCALE_REG_LP RDX_LP
+# define STRCASECMP_NONASCII __strcasecmp_l_nonascii
+# endif
+# endif
+
+# define LCASE_MIN_YMM %YMM12
+# define LCASE_MAX_YMM %YMM13
+# define CASE_ADD_YMM %YMM14
+
+# define LCASE_MIN_XMM %XMM12
+# define LCASE_MAX_XMM %XMM13
+# define CASE_ADD_XMM %XMM14
+
+ /* NB: wcsncmp uses r11 but strcasecmp is never used in
+ conjunction with wcscmp. */
+# define TOLOWER_BASE %r11
+
+# ifdef USE_AS_STRCASECMP_L
+# define _REG(x, y) x ## y
+# define REG(x, y) _REG(x, y)
+# define TOLOWER(reg1, reg2, ext) \
+ vpsubb REG(LCASE_MIN_, ext), reg1, REG(%ext, 10); \
+ vpsubb REG(LCASE_MIN_, ext), reg2, REG(%ext, 11); \
+ vpcmpub $1, REG(LCASE_MAX_, ext), REG(%ext, 10), %k5; \
+ vpcmpub $1, REG(LCASE_MAX_, ext), REG(%ext, 11), %k6; \
+ vpaddb reg1, REG(CASE_ADD_, ext), reg1{%k5}; \
+ vpaddb reg2, REG(CASE_ADD_, ext), reg2{%k6}
+
+# define TOLOWER_gpr(src, dst) movl (TOLOWER_BASE, src, 4), dst
+# define TOLOWER_YMM(...) TOLOWER(__VA_ARGS__, YMM)
+# define TOLOWER_XMM(...) TOLOWER(__VA_ARGS__, XMM)
+
+# define CMP_R1_R2(s1_reg, s2_reg, reg_out, ext) \
+ TOLOWER (s1_reg, s2_reg, ext); \
+ VPCMP $0, s1_reg, s2_reg, reg_out
-# define YMMZERO ymm16
-# define YMM0 ymm17
-# define YMM1 ymm18
-# define YMM2 ymm19
-# define YMM3 ymm20
-# define YMM4 ymm21
-# define YMM5 ymm22
-# define YMM6 ymm23
-# define YMM7 ymm24
+# define CMP_R1_S2(s1_reg, s2_mem, s2_reg, reg_out, ext) \
+ VMOVU s2_mem, s2_reg; \
+ CMP_R1_R2(s1_reg, s2_reg, reg_out, ext)
+
+# define CMP_R1_R2_YMM(...) CMP_R1_R2(__VA_ARGS__, YMM)
+# define CMP_R1_R2_XMM(...) CMP_R1_R2(__VA_ARGS__, XMM)
+
+# define CMP_R1_S2_YMM(...) CMP_R1_S2(__VA_ARGS__, YMM)
+# define CMP_R1_S2_XMM(...) CMP_R1_S2(__VA_ARGS__, XMM)
+
+# else
+# define TOLOWER_gpr(...)
+# define TOLOWER_YMM(...)
+# define TOLOWER_XMM(...)
+
+# define CMP_R1_R2_YMM(s1_reg, s2_reg, reg_out) \
+ VPCMP $0, s2_reg, s1_reg, reg_out
+
+# define CMP_R1_R2_XMM(...) CMP_R1_R2_YMM(__VA_ARGS__)
+
+# define CMP_R1_S2_YMM(s1_reg, s2_mem, unused, reg_out) \
+ VPCMP $0, s2_mem, s1_reg, reg_out
+
+# define CMP_R1_S2_XMM(...) CMP_R1_S2_YMM(__VA_ARGS__)
+# endif
/* Warning!
wcscmp/wcsncmp have to use SIGNED comparison for elements.
/* The main idea of the string comparison (byte or dword) using 256-bit
EVEX instructions consists of comparing (VPCMP) two ymm vectors. The
latter can be on either packed bytes or dwords depending on
- USE_AS_WCSCMP. In order to check the null char, algorithm keeps the
+ USE_AS_WCSCMP. In order to check the null CHAR, algorithm keeps the
matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2
KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes)
are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd
the maximum offset is reached before a difference is found, zero is
returned. */
- .section .text.evex,"ax",@progbits
-ENTRY (STRCMP)
+ .section .text.evex, "ax", @progbits
+ .align 16
+ .type STRCMP, @function
+ .globl STRCMP
+ .hidden STRCMP
+
+# ifdef USE_AS_STRCASECMP_L
+ENTRY (STRCASECMP)
+ movq __libc_tsd_LOCALE@gottpoff(%rip), %rax
+ mov %fs:(%rax), %LOCALE_REG_LP
+
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
+END (STRCASECMP)
+ /* FALLTHROUGH to strcasecmp/strncasecmp_l. */
+# endif
+
+ .p2align 4
+STRCMP:
+ cfi_startproc
+ _CET_ENDBR
+ CALL_MCOUNT
+
+# if defined USE_AS_STRCASECMP_L
+ /* We have to fall back on the C implementation for locales with
+ encodings not matching ASCII for single bytes. */
+# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
+ mov LOCALE_T___LOCALES + LC_CTYPE * LP_SIZE(%LOCALE_REG), %RAX_LP
+# else
+ mov (%LOCALE_REG), %RAX_LP
+# endif
+ testl $1, LOCALE_DATA_VALUES + _NL_CTYPE_NONASCII_CASE * SIZEOF_VALUES(%rax)
+ jne STRCASECMP_NONASCII
+ leaq _nl_C_LC_CTYPE_tolower + 128 * 4(%rip), TOLOWER_BASE
+# endif
+
# ifdef USE_AS_STRNCMP
- /* Check for simple cases (0 or 1) in offset. */
- cmp $1, %RDX_LP
- je L(char0)
- jb L(zero)
-# ifdef USE_AS_WCSCMP
- /* Convert units: from wide to byte char. */
- shl $2, %RDX_LP
+ /* Don't overwrite LOCALE_REG (rcx) until we have pass
+ L(one_or_less). Otherwise we might use the wrong locale in
+ the OVERFLOW_STRCMP (strcasecmp_l). */
+# ifdef __ILP32__
+ /* Clear the upper 32 bits. */
+ movl %edx, %edx
# endif
- /* Register %r11 tracks the maximum offset. */
- mov %RDX_LP, %R11_LP
+ cmp $1, %RDX_LP
+ /* Signed comparison intentional. We use this branch to also
+ test cases where length >= 2^63. These very large sizes can be
+ handled with strcmp as there is no way for that length to
+ actually bound the buffer. */
+ jle L(one_or_less)
+# endif
+
+# if defined USE_AS_STRCASECMP_L
+ .section .rodata.cst32, "aM", @progbits, 32
+ .align 32
+L(lcase_min):
+ .quad 0x4141414141414141
+ .quad 0x4141414141414141
+ .quad 0x4141414141414141
+ .quad 0x4141414141414141
+L(lcase_max):
+ .quad 0x1a1a1a1a1a1a1a1a
+ .quad 0x1a1a1a1a1a1a1a1a
+ .quad 0x1a1a1a1a1a1a1a1a
+ .quad 0x1a1a1a1a1a1a1a1a
+L(case_add):
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .quad 0x2020202020202020
+ .previous
+
+ vmovdqa64 L(lcase_min)(%rip), LCASE_MIN_YMM
+ vmovdqa64 L(lcase_max)(%rip), LCASE_MAX_YMM
+ vmovdqa64 L(case_add)(%rip), CASE_ADD_YMM
# endif
+
movl %edi, %eax
- xorl %edx, %edx
- /* Make %XMMZERO (%YMMZERO) all zeros in this function. */
- vpxorq %XMMZERO, %XMMZERO, %XMMZERO
orl %esi, %eax
- andl $(PAGE_SIZE - 1), %eax
- cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax
- jg L(cross_page)
- /* Start comparing 4 vectors. */
+ /* Shift out the bits irrelivant to page boundary ([63:12]). */
+ sall $20, %eax
+ /* Check if s1 or s2 may cross a page in next 4x VEC loads. */
+ cmpl $((PAGE_SIZE -(VEC_SIZE * 4)) << 20), %eax
+ ja L(page_cross)
+
+L(no_page_cross):
+ /* Safe to compare 4x vectors. */
VMOVU (%rdi), %YMM0
- VMOVU (%rsi), %YMM1
-
- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
- VPCMP $4, %YMM0, %YMM1, %k0
-
- /* Check for NULL in YMM0. */
- VPCMP $0, %YMMZERO, %YMM0, %k1
- /* Check for NULL in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k2
- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
- kord %k1, %k2, %k1
-
- /* Each bit in K1 represents:
- 1. A mismatch in YMM0 and YMM1. Or
- 2. A NULL in YMM0 or YMM1.
- */
- kord %k0, %k1, %k1
-
- ktestd %k1, %k1
- je L(next_3_vectors)
+ VPTESTM %YMM0, %YMM0, %k2
+ /* Each bit cleared in K1 represents a mismatch or a null CHAR
+ in YMM0 and 32 bytes at (%rsi). */
+ CMP_R1_S2_YMM (%YMM0, (%rsi), %YMM1, %k1){%k2}
kmovd %k1, %ecx
- tzcntl %ecx, %edx
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edx
-# endif
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx) is after the maximum
- offset (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq $CHAR_PER_VEC, %rdx
+ jbe L(vec_0_test_len)
# endif
+
+ /* TESTEQ is `incl` for strcmp/strncmp and `subl $0xff` for
+ wcscmp/wcsncmp. */
+
+ /* All 1s represents all equals. TESTEQ will overflow to zero in
+ all equals case. Otherwise 1s will carry until position of first
+ mismatch. */
+ TESTEQ %ecx
+ jz L(more_3x_vec)
+
+ .p2align 4,, 4
+L(return_vec_0):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
+ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- je L(return)
-L(wcscmp_return):
+ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret0)
setl %al
negl %eax
orl $1, %eax
-L(return):
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
+L(ret0):
ret
- .p2align 4
-L(return_vec_size):
- kmovd %k1, %ecx
- tzcntl %ecx, %edx
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edx
-# endif
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
- the maximum offset (%r11). */
- addq $VEC_SIZE, %rdx
- cmpq %r11, %rdx
- jae L(zero)
-# ifdef USE_AS_WCSCMP
+ .p2align 4,, 4
+L(vec_0_test_len):
+ notl %ecx
+ bzhil %edx, %ecx, %eax
+ jnz L(return_vec_0)
+ /* Align if will cross fetch block. */
+ .p2align 4,, 2
+L(ret_zero):
xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ ret
+
+ .p2align 4,, 5
+L(one_or_less):
+# ifdef USE_AS_STRCASECMP_L
+ /* Set locale argument for strcasecmp. */
+ movq %LOCALE_REG, %rdx
# endif
-# else
+ jb L(ret_zero)
+ /* 'nbe' covers the case where length is negative (large
+ unsigned). */
+ jnbe OVERFLOW_STRCMP
# ifdef USE_AS_WCSCMP
+ movl (%rdi), %edx
xorl %eax, %eax
- movl VEC_SIZE(%rdi, %rdx), %ecx
- cmpl VEC_SIZE(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+ cmpl (%rsi), %edx
+ je L(ret1)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
- movzbl VEC_SIZE(%rdi, %rdx), %eax
- movzbl VEC_SIZE(%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (%rdi), %eax
+ movzbl (%rsi), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
-# endif
+L(ret1):
ret
+# endif
- .p2align 4
-L(return_2_vec_size):
- kmovd %k1, %ecx
- tzcntl %ecx, %edx
+ .p2align 4,, 10
+L(return_vec_1):
+ tzcntl %ecx, %ecx
+# ifdef USE_AS_STRNCMP
+ /* rdx must be > CHAR_PER_VEC so its safe to subtract without
+ worrying about underflow. */
+ addq $-CHAR_PER_VEC, %rdx
+ cmpq %rcx, %rdx
+ jbe L(ret_zero)
+# endif
# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edx
+ movl VEC_SIZE(%rdi, %rcx, SIZE_OF_CHAR), %edx
+ xorl %eax, %eax
+ cmpl VEC_SIZE(%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret2)
+ setl %al
+ negl %eax
+ orl $1, %eax
+# else
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
+ movzbl VEC_SIZE(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
+L(ret2):
+ ret
+
+ .p2align 4,, 10
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
- after the maximum offset (%r11). */
- addq $(VEC_SIZE * 2), %rdx
- cmpq %r11, %rdx
- jae L(zero)
-# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+L(return_vec_3):
+# if CHAR_PER_VEC <= 16
+ sall $CHAR_PER_VEC, %ecx
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ salq $CHAR_PER_VEC, %rcx
# endif
+# endif
+L(return_vec_2):
+# if (CHAR_PER_VEC <= 16) || !(defined USE_AS_STRNCMP)
+ tzcntl %ecx, %ecx
# else
+ tzcntq %rcx, %rcx
+# endif
+
+# ifdef USE_AS_STRNCMP
+ cmpq %rcx, %rdx
+ jbe L(ret_zero)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 2)(%rdi, %rcx, SIZE_OF_CHAR), %edx
+ xorl %eax, %eax
+ cmpl (VEC_SIZE * 2)(%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret3)
+ setl %al
+ negl %eax
+ orl $1, %eax
+# else
+ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+# endif
+L(ret3):
+ ret
+
+# ifndef USE_AS_STRNCMP
+ .p2align 4,, 10
+L(return_vec_3):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 3)(%rdi, %rcx, SIZE_OF_CHAR), %edx
xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx
- cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+ cmpl (VEC_SIZE * 3)(%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret4)
+ setl %al
+ negl %eax
+ orl $1, %eax
# else
- movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax
- movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx
- subl %edx, %eax
+ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
# endif
-# endif
+L(ret4):
ret
+# endif
- .p2align 4
-L(return_3_vec_size):
+ /* 32 byte align here ensures the main loop is ideally aligned
+ for DSB. */
+ .p2align 5
+L(more_3x_vec):
+ /* Safe to compare 4x vectors. */
+ VMOVU (VEC_SIZE)(%rdi), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, VEC_SIZE(%rsi), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_1)
+
+# ifdef USE_AS_STRNCMP
+ subq $(CHAR_PER_VEC * 2), %rdx
+ jbe L(ret_zero)
+# endif
+
+ VMOVU (VEC_SIZE * 2)(%rdi), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, (VEC_SIZE * 2)(%rsi), %YMM1, %k1){%k2}
kmovd %k1, %ecx
- tzcntl %ecx, %edx
+ TESTEQ %ecx
+ jnz L(return_vec_2)
+
+ VMOVU (VEC_SIZE * 3)(%rdi), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, (VEC_SIZE * 3)(%rsi), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_3)
+
+# ifdef USE_AS_STRNCMP
+ cmpq $(CHAR_PER_VEC * 2), %rdx
+ jbe L(ret_zero)
+# endif
+
+
# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edx
+ /* any non-zero positive value that doesn't inference with 0x1.
+ */
+ movl $2, %r8d
+
+# else
+ xorl %r8d, %r8d
# endif
+
+ /* The prepare labels are various entry points from the page
+ cross logic. */
+L(prepare_loop):
+
# ifdef USE_AS_STRNCMP
- /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
- after the maximum offset (%r11). */
- addq $(VEC_SIZE * 3), %rdx
- cmpq %r11, %rdx
- jae L(zero)
# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+L(prepare_loop_no_len):
+ movl %edi, %ecx
+ andl $(VEC_SIZE * 4 - 1), %ecx
+ shrl $2, %ecx
+ leaq (CHAR_PER_VEC * 2)(%rdx, %rcx), %rdx
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ /* Store N + (VEC_SIZE * 4) and place check at the begining of
+ the loop. */
+ leaq (VEC_SIZE * 2)(%rdi, %rdx), %rdx
+L(prepare_loop_no_len):
# endif
# else
-# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx
- cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax
- movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx
- subl %edx, %eax
-# endif
+L(prepare_loop_no_len):
# endif
- ret
- .p2align 4
-L(next_3_vectors):
- VMOVU VEC_SIZE(%rdi), %YMM0
- VMOVU VEC_SIZE(%rsi), %YMM1
- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
- VPCMP $4, %YMM0, %YMM1, %k0
- VPCMP $0, %YMMZERO, %YMM0, %k1
- VPCMP $0, %YMMZERO, %YMM1, %k2
- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
- ktestd %k1, %k1
- jne L(return_vec_size)
-
- VMOVU (VEC_SIZE * 2)(%rdi), %YMM2
- VMOVU (VEC_SIZE * 3)(%rdi), %YMM3
- VMOVU (VEC_SIZE * 2)(%rsi), %YMM4
- VMOVU (VEC_SIZE * 3)(%rsi), %YMM5
-
- /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */
- VPCMP $4, %YMM2, %YMM4, %k0
- VPCMP $0, %YMMZERO, %YMM2, %k1
- VPCMP $0, %YMMZERO, %YMM4, %k2
- /* Each bit in K1 represents a NULL in YMM2 or YMM4. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
- ktestd %k1, %k1
- jne L(return_2_vec_size)
-
- /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */
- VPCMP $4, %YMM3, %YMM5, %k0
- VPCMP $0, %YMMZERO, %YMM3, %k1
- VPCMP $0, %YMMZERO, %YMM5, %k2
- /* Each bit in K1 represents a NULL in YMM3 or YMM5. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
- ktestd %k1, %k1
- jne L(return_3_vec_size)
-L(main_loop_header):
- leaq (VEC_SIZE * 4)(%rdi), %rdx
- movl $PAGE_SIZE, %ecx
- /* Align load via RAX. */
- andq $-(VEC_SIZE * 4), %rdx
+ /* Align s1 and adjust s2 accordingly. */
+ subq %rdi, %rsi
+ andq $-(VEC_SIZE * 4), %rdi
+L(prepare_loop_readj):
+ addq %rdi, %rsi
+# if (defined USE_AS_STRNCMP) && !(defined USE_AS_WCSCMP)
subq %rdi, %rdx
- leaq (%rdi, %rdx), %rax
-# ifdef USE_AS_STRNCMP
- /* Starting from this point, the maximum offset, or simply the
- 'offset', DECREASES by the same amount when base pointers are
- moved forward. Return 0 when:
- 1) On match: offset <= the matched vector index.
- 2) On mistmach, offset is before the mistmatched index.
- */
- subq %rdx, %r11
- jbe L(zero)
-# endif
- addq %rsi, %rdx
- movq %rdx, %rsi
- andl $(PAGE_SIZE - 1), %esi
- /* Number of bytes before page crossing. */
- subq %rsi, %rcx
- /* Number of VEC_SIZE * 4 blocks before page crossing. */
- shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx
- /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */
- movl %ecx, %esi
- jmp L(loop_start)
+# endif
+
+L(prepare_loop_aligned):
+ /* eax stores distance from rsi to next page cross. These cases
+ need to be handled specially as the 4x loop could potentially
+ read memory past the length of s1 or s2 and across a page
+ boundary. */
+ movl $-(VEC_SIZE * 4), %eax
+ subl %esi, %eax
+ andl $(PAGE_SIZE - 1), %eax
+
+ /* Loop 4x comparisons at a time. */
.p2align 4
L(loop):
+
+ /* End condition for strncmp. */
# ifdef USE_AS_STRNCMP
- /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease
- the maximum offset (%r11) by the same amount. */
- subq $(VEC_SIZE * 4), %r11
- jbe L(zero)
-# endif
- addq $(VEC_SIZE * 4), %rax
- addq $(VEC_SIZE * 4), %rdx
-L(loop_start):
- testl %esi, %esi
- leal -1(%esi), %esi
- je L(loop_cross_page)
-L(back_to_loop):
- /* Main loop, comparing 4 vectors are a time. */
- VMOVA (%rax), %YMM0
- VMOVA VEC_SIZE(%rax), %YMM2
- VMOVA (VEC_SIZE * 2)(%rax), %YMM4
- VMOVA (VEC_SIZE * 3)(%rax), %YMM6
- VMOVU (%rdx), %YMM1
- VMOVU VEC_SIZE(%rdx), %YMM3
- VMOVU (VEC_SIZE * 2)(%rdx), %YMM5
- VMOVU (VEC_SIZE * 3)(%rdx), %YMM7
-
- VPCMP $4, %YMM0, %YMM1, %k0
- VPCMP $0, %YMMZERO, %YMM0, %k1
- VPCMP $0, %YMMZERO, %YMM1, %k2
- kord %k1, %k2, %k1
- /* Each bit in K4 represents a NULL or a mismatch in YMM0 and
- YMM1. */
- kord %k0, %k1, %k4
-
- VPCMP $4, %YMM2, %YMM3, %k0
- VPCMP $0, %YMMZERO, %YMM2, %k1
- VPCMP $0, %YMMZERO, %YMM3, %k2
- kord %k1, %k2, %k1
- /* Each bit in K5 represents a NULL or a mismatch in YMM2 and
- YMM3. */
- kord %k0, %k1, %k5
-
- VPCMP $4, %YMM4, %YMM5, %k0
- VPCMP $0, %YMMZERO, %YMM4, %k1
- VPCMP $0, %YMMZERO, %YMM5, %k2
- kord %k1, %k2, %k1
- /* Each bit in K6 represents a NULL or a mismatch in YMM4 and
- YMM5. */
- kord %k0, %k1, %k6
-
- VPCMP $4, %YMM6, %YMM7, %k0
- VPCMP $0, %YMMZERO, %YMM6, %k1
- VPCMP $0, %YMMZERO, %YMM7, %k2
- kord %k1, %k2, %k1
- /* Each bit in K7 represents a NULL or a mismatch in YMM6 and
- YMM7. */
- kord %k0, %k1, %k7
-
- kord %k4, %k5, %k0
- kord %k6, %k7, %k1
-
- /* Test each mask (32 bits) individually because for VEC_SIZE
- == 32 is not possible to OR the four masks and keep all bits
- in a 64-bit integer register, differing from SSE2 strcmp
- where ORing is possible. */
- kortestd %k0, %k1
- je L(loop)
- ktestd %k4, %k4
- je L(test_vec)
- kmovd %k4, %edi
- tzcntl %edi, %ecx
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %ecx
+ subq $(CHAR_PER_VEC * 4), %rdx
+ jbe L(ret_zero)
# endif
-# ifdef USE_AS_STRNCMP
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+
+ subq $-(VEC_SIZE * 4), %rdi
+ subq $-(VEC_SIZE * 4), %rsi
+
+ /* Check if rsi loads will cross a page boundary. */
+ addl $-(VEC_SIZE * 4), %eax
+ jnb L(page_cross_during_loop)
+
+ /* Loop entry after handling page cross during loop. */
+L(loop_skip_page_cross_check):
+ VMOVA (VEC_SIZE * 0)(%rdi), %YMM0
+ VMOVA (VEC_SIZE * 1)(%rdi), %YMM2
+ VMOVA (VEC_SIZE * 2)(%rdi), %YMM4
+ VMOVA (VEC_SIZE * 3)(%rdi), %YMM6
+
+ VPMINU %YMM0, %YMM2, %YMM8
+ VPMINU %YMM4, %YMM6, %YMM9
+
+ /* A zero CHAR in YMM9 means that there is a null CHAR. */
+ VPMINU %YMM8, %YMM9, %YMM9
+
+ /* Each bit set in K1 represents a non-null CHAR in YMM9. */
+ VPTESTM %YMM9, %YMM9, %k1
+# ifndef USE_AS_STRCASECMP_L
+ vpxorq (VEC_SIZE * 0)(%rsi), %YMM0, %YMM1
+ vpxorq (VEC_SIZE * 1)(%rsi), %YMM2, %YMM3
+ vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5
+ /* Ternary logic to xor (VEC_SIZE * 3)(%rsi) with YMM6 while
+ oring with YMM1. Result is stored in YMM6. */
+ vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM1, %YMM6
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ VMOVU (VEC_SIZE * 0)(%rsi), %YMM1
+ TOLOWER_YMM (%YMM0, %YMM1)
+ VMOVU (VEC_SIZE * 1)(%rsi), %YMM3
+ TOLOWER_YMM (%YMM2, %YMM3)
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM5
+ TOLOWER_YMM (%YMM4, %YMM5)
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM7
+ TOLOWER_YMM (%YMM6, %YMM7)
+ vpxorq %YMM0, %YMM1, %YMM1
+ vpxorq %YMM2, %YMM3, %YMM3
+ vpxorq %YMM4, %YMM5, %YMM5
+ vpternlogd $0xde, %YMM7, %YMM1, %YMM6
# endif
- ret
+ /* Or together YMM3, YMM5, and YMM6. */
+ vpternlogd $0xfe, %YMM3, %YMM5, %YMM6
- .p2align 4
-L(test_vec):
+
+ /* A non-zero CHAR in YMM6 represents a mismatch. */
+ VPTESTNM %YMM6, %YMM6, %k0{%k1}
+ kmovd %k0, %LOOP_REG
+
+ TESTEQ %LOOP_REG
+ jz L(loop)
+
+
+ /* Find which VEC has the mismatch of end of string. */
+ VPTESTM %YMM0, %YMM0, %k1
+ VPTESTNM %YMM1, %YMM1, %k0{%k1}
+ kmovd %k0, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_0_end)
+
+ VPTESTM %YMM2, %YMM2, %k1
+ VPTESTNM %YMM3, %YMM3, %k0{%k1}
+ kmovd %k0, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_1_end)
+
+
+ /* Handle VEC 2 and 3 without branches. */
+L(return_vec_2_3_end):
# ifdef USE_AS_STRNCMP
- /* The first vector matched. Return 0 if the maximum offset
- (%r11) <= VEC_SIZE. */
- cmpq $VEC_SIZE, %r11
- jbe L(zero)
-# endif
- ktestd %k5, %k5
- je L(test_2_vec)
- kmovd %k5, %ecx
- tzcntl %ecx, %edi
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edi
+ subq $(CHAR_PER_VEC * 2), %rdx
+ jbe L(ret_zero_end)
+# endif
+
+ VPTESTM %YMM4, %YMM4, %k1
+ VPTESTNM %YMM5, %YMM5, %k0{%k1}
+ kmovd %k0, %ecx
+ TESTEQ %ecx
+# if CHAR_PER_VEC <= 16
+ sall $CHAR_PER_VEC, %LOOP_REG
+ orl %ecx, %LOOP_REG
+# else
+ salq $CHAR_PER_VEC, %LOOP_REG64
+ orq %rcx, %LOOP_REG64
+# endif
+L(return_vec_3_end):
+ /* LOOP_REG contains matches for null/mismatch from the loop. If
+ VEC 0,1,and 2 all have no null and no mismatches then mismatch
+ must entirely be from VEC 3 which is fully represented by
+ LOOP_REG. */
+# if CHAR_PER_VEC <= 16
+ tzcntl %LOOP_REG, %LOOP_REG
+# else
+ tzcntq %LOOP_REG64, %LOOP_REG64
# endif
# ifdef USE_AS_STRNCMP
- addq $VEC_SIZE, %rdi
- cmpq %rdi, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ cmpq %LOOP_REG64, %rdx
+ jbe L(ret_zero_end)
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl (VEC_SIZE * 2)(%rdi, %LOOP_REG64, SIZE_OF_CHAR), %ecx
xorl %eax, %eax
- movl (%rsi, %rdi), %ecx
- cmpl (%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rdi), %eax
- movzbl (%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ cmpl (VEC_SIZE * 2)(%rsi, %LOOP_REG64, SIZE_OF_CHAR), %ecx
+ je L(ret5)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl VEC_SIZE(%rsi, %rdi), %ecx
- cmpl VEC_SIZE(%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl VEC_SIZE(%rax, %rdi), %eax
- movzbl VEC_SIZE(%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ movzbl (VEC_SIZE * 2)(%rdi, %LOOP_REG64), %eax
+ movzbl (VEC_SIZE * 2)(%rsi, %LOOP_REG64), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret5):
ret
- .p2align 4
-L(test_2_vec):
# ifdef USE_AS_STRNCMP
- /* The first 2 vectors matched. Return 0 if the maximum offset
- (%r11) <= 2 * VEC_SIZE. */
- cmpq $(VEC_SIZE * 2), %r11
- jbe L(zero)
-# endif
- ktestd %k6, %k6
- je L(test_3_vec)
- kmovd %k6, %ecx
- tzcntl %ecx, %edi
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edi
+ .p2align 4,, 2
+L(ret_zero_end):
+ xorl %eax, %eax
+ ret
# endif
+
+
+ /* The L(return_vec_N_end) differ from L(return_vec_N) in that
+ they use the value of `r8` to negate the return value. This is
+ because the page cross logic can swap `rdi` and `rsi`. */
+ .p2align 4,, 10
# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 2), %rdi
- cmpq %rdi, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rdi), %ecx
- cmpl (%rdx, %rdi), %ecx
- jne L(wcscmp_return)
+L(return_vec_1_end):
+# if CHAR_PER_VEC <= 16
+ sall $CHAR_PER_VEC, %ecx
# else
- movzbl (%rax, %rdi), %eax
- movzbl (%rdx, %rdi), %edx
- subl %edx, %eax
+ salq $CHAR_PER_VEC, %rcx
# endif
+# endif
+L(return_vec_0_end):
+# if (CHAR_PER_VEC <= 16) || !(defined USE_AS_STRNCMP)
+ tzcntl %ecx, %ecx
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx
- cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax
- movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx
- subl %edx, %eax
-# endif
+ tzcntq %rcx, %rcx
# endif
- ret
- .p2align 4
-L(test_3_vec):
# ifdef USE_AS_STRNCMP
- /* The first 3 vectors matched. Return 0 if the maximum offset
- (%r11) <= 3 * VEC_SIZE. */
- cmpq $(VEC_SIZE * 3), %r11
- jbe L(zero)
+ cmpq %rcx, %rdx
+ jbe L(ret_zero_end)
# endif
- kmovd %k7, %esi
- tzcntl %esi, %ecx
+
# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %ecx
-# endif
-# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 3), %rcx
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx
xorl %eax, %eax
- movl (%rsi, %rcx), %esi
- cmpl (%rdx, %rcx), %esi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
+ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret6)
+ setl %al
+ negl %eax
+ /* This is the non-zero case for `eax` so just xorl with `r8d`
+ flip is `rdi` and `rsi` where swapped. */
+ xorl %r8d, %eax
# else
+ movzbl (%rdi, %rcx), %eax
+ movzbl (%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ /* Flip `eax` if `rdi` and `rsi` where swapped in page cross
+ logic. Subtract `r8d` after xor for zero case. */
+ xorl %r8d, %eax
+ subl %r8d, %eax
+# endif
+L(ret6):
+ ret
+
+# ifndef USE_AS_STRNCMP
+ .p2align 4,, 10
+L(return_vec_1_end):
+ tzcntl %ecx, %ecx
# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ movl VEC_SIZE(%rdi, %rcx, SIZE_OF_CHAR), %edx
xorl %eax, %eax
- movl (VEC_SIZE * 3)(%rsi, %rcx), %esi
- cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi
- jne L(wcscmp_return)
+ cmpl VEC_SIZE(%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret7)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
# else
- movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax
- movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx
- subl %edx, %eax
+ movzbl VEC_SIZE(%rdi, %rcx), %eax
+ movzbl VEC_SIZE(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
-# endif
+L(ret7):
ret
+# endif
- .p2align 4
-L(loop_cross_page):
- xorl %r10d, %r10d
- movq %rdx, %rcx
- /* Align load via RDX. We load the extra ECX bytes which should
- be ignored. */
- andl $((VEC_SIZE * 4) - 1), %ecx
- /* R10 is -RCX. */
- subq %rcx, %r10
-
- /* This works only if VEC_SIZE * 2 == 64. */
-# if (VEC_SIZE * 2) != 64
-# error (VEC_SIZE * 2) != 64
-# endif
-
- /* Check if the first VEC_SIZE * 2 bytes should be ignored. */
- cmpl $(VEC_SIZE * 2), %ecx
- jge L(loop_cross_page_2_vec)
-
- VMOVU (%rax, %r10), %YMM2
- VMOVU VEC_SIZE(%rax, %r10), %YMM3
- VMOVU (%rdx, %r10), %YMM4
- VMOVU VEC_SIZE(%rdx, %r10), %YMM5
-
- VPCMP $4, %YMM4, %YMM2, %k0
- VPCMP $0, %YMMZERO, %YMM2, %k1
- VPCMP $0, %YMMZERO, %YMM4, %k2
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch in YMM2 and
- YMM4. */
- kord %k0, %k1, %k1
-
- VPCMP $4, %YMM5, %YMM3, %k3
- VPCMP $0, %YMMZERO, %YMM3, %k4
- VPCMP $0, %YMMZERO, %YMM5, %k5
- kord %k4, %k5, %k4
- /* Each bit in K3 represents a NULL or a mismatch in YMM3 and
- YMM5. */
- kord %k3, %k4, %k3
+
+ /* Page cross in rsi in next 4x VEC. */
+
+ /* TODO: Improve logic here. */
+ .p2align 4,, 10
+L(page_cross_during_loop):
+ /* eax contains [distance_from_page - (VEC_SIZE * 4)]. */
+
+ /* Optimistically rsi and rdi and both aligned in which case we
+ don't need any logic here. */
+ cmpl $-(VEC_SIZE * 4), %eax
+ /* Don't adjust eax before jumping back to loop and we will
+ never hit page cross case again. */
+ je L(loop_skip_page_cross_check)
+
+ /* Check if we can safely load a VEC. */
+ cmpl $-(VEC_SIZE * 3), %eax
+ jle L(less_1x_vec_till_page_cross)
+
+ VMOVA (%rdi), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, (%rsi), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_0_end)
+
+ /* if distance >= 2x VEC then eax > -(VEC_SIZE * 2). */
+ cmpl $-(VEC_SIZE * 2), %eax
+ jg L(more_2x_vec_till_page_cross)
+
+ .p2align 4,, 4
+L(less_1x_vec_till_page_cross):
+ subl $-(VEC_SIZE * 4), %eax
+ /* Guranteed safe to read from rdi - VEC_SIZE here. The only
+ concerning case is first iteration if incoming s1 was near start
+ of a page and s2 near end. If s1 was near the start of the page
+ we already aligned up to nearest VEC_SIZE * 4 so gurnateed safe
+ to read back -VEC_SIZE. If rdi is truly at the start of a page
+ here, it means the previous page (rdi - VEC_SIZE) has already
+ been loaded earlier so must be valid. */
+ VMOVU -VEC_SIZE(%rdi, %rax), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, -VEC_SIZE(%rsi, %rax), %YMM1, %k1){%k2}
+ /* Mask of potentially valid bits. The lower bits can be out of
+ range comparisons (but safe regarding page crosses). */
# ifdef USE_AS_WCSCMP
- /* NB: Each bit in K1/K3 represents 4-byte element. */
- kshiftlw $8, %k3, %k2
- /* NB: Divide shift count by 4 since each bit in K1 represent 4
- bytes. */
- movl %ecx, %SHIFT_REG32
- sarl $2, %SHIFT_REG32
+ movl $-1, %r10d
+ movl %esi, %ecx
+ andl $(VEC_SIZE - 1), %ecx
+ shrl $2, %ecx
+ shlxl %ecx, %r10d, %ecx
+ movzbl %cl, %r10d
# else
- kshiftlq $32, %k3, %k2
+ movl $-1, %ecx
+ shlxl %esi, %ecx, %r10d
# endif
- /* Each bit in K1 represents a NULL or a mismatch. */
- korq %k1, %k2, %k1
- kmovq %k1, %rdi
+ kmovd %k1, %ecx
+ notl %ecx
+
- /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */
- shrxq %SHIFT_REG64, %rdi, %rdi
- testq %rdi, %rdi
- je L(loop_cross_page_2_vec)
- tzcntq %rdi, %rcx
-# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %ecx
-# endif
# ifdef USE_AS_STRNCMP
- cmpq %rcx, %r11
- jbe L(zero)
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
-# endif
-# else
# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
- xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
+ /* NB: strcasecmp not used with WCSCMP so this access to r11 is
+ safe. */
+ movl %eax, %r11d
+ shrl $2, %r11d
+ cmpq %r11, %rdx
# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
+ cmpq %rax, %rdx
# endif
+ jbe L(return_page_cross_end_check)
# endif
- ret
+ movl %eax, %OFFSET_REG
- .p2align 4
-L(loop_cross_page_2_vec):
- /* The first VEC_SIZE * 2 bytes match or are ignored. */
- VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0
- VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1
- VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2
- VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3
-
- VPCMP $4, %YMM0, %YMM2, %k0
- VPCMP $0, %YMMZERO, %YMM0, %k1
- VPCMP $0, %YMMZERO, %YMM2, %k2
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch in YMM0 and
- YMM2. */
- kord %k0, %k1, %k1
-
- VPCMP $4, %YMM1, %YMM3, %k3
- VPCMP $0, %YMMZERO, %YMM1, %k4
- VPCMP $0, %YMMZERO, %YMM3, %k5
- kord %k4, %k5, %k4
- /* Each bit in K3 represents a NULL or a mismatch in YMM1 and
- YMM3. */
- kord %k3, %k4, %k3
+ /* Readjust eax before potentially returning to the loop. */
+ addl $(PAGE_SIZE - VEC_SIZE * 4), %eax
-# ifdef USE_AS_WCSCMP
- /* NB: Each bit in K1/K3 represents 4-byte element. */
- kshiftlw $8, %k3, %k2
-# else
- kshiftlq $32, %k3, %k2
-# endif
+ andl %r10d, %ecx
+ jz L(loop_skip_page_cross_check)
- /* Each bit in K1 represents a NULL or a mismatch. */
- korq %k1, %k2, %k1
- kmovq %k1, %rdi
+ .p2align 4,, 3
+L(return_page_cross_end):
+ tzcntl %ecx, %ecx
- xorl %r8d, %r8d
- /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
- subl $(VEC_SIZE * 2), %ecx
- jle 1f
- /* R8 has number of bytes skipped. */
- movl %ecx, %r8d
-# ifdef USE_AS_WCSCMP
- /* NB: Divide shift count by 4 since each bit in K1 represent 4
- bytes. */
- sarl $2, %ecx
-# endif
- /* Skip ECX bytes. */
- shrq %cl, %rdi
-1:
- /* Before jumping back to the loop, set ESI to the number of
- VEC_SIZE * 4 blocks before page crossing. */
- movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
-
- testq %rdi, %rdi
-# ifdef USE_AS_STRNCMP
- /* At this point, if %rdi value is 0, it already tested
- VEC_SIZE*4+%r10 byte starting from %rax. This label
- checks whether strncmp maximum offset reached or not. */
- je L(string_nbyte_offset_check)
+# if (defined USE_AS_STRNCMP) || (defined USE_AS_WCSCMP)
+ leal -VEC_SIZE(%OFFSET_REG64, %rcx, SIZE_OF_CHAR), %ecx
+L(return_page_cross_cmp_mem):
# else
- je L(back_to_loop)
+ addl %OFFSET_REG, %ecx
# endif
- tzcntq %rdi, %rcx
# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %ecx
+ movl VEC_OFFSET(%rdi, %rcx), %edx
+ xorl %eax, %eax
+ cmpl VEC_OFFSET(%rsi, %rcx), %edx
+ je L(ret8)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
+# else
+ movzbl VEC_OFFSET(%rdi, %rcx), %eax
+ movzbl VEC_OFFSET(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
- addq %r10, %rcx
- /* Adjust for number of bytes skipped. */
- addq %r8, %rcx
+L(ret8):
+ ret
+
# ifdef USE_AS_STRNCMP
- addq $(VEC_SIZE * 2), %rcx
- subq %rcx, %r11
- jbe L(zero)
+ .p2align 4,, 10
+L(return_page_cross_end_check):
+ andl %r10d, %ecx
+ tzcntl %ecx, %ecx
+ leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx
# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ sall $2, %edx
+# endif
+ cmpl %ecx, %edx
+ ja L(return_page_cross_cmp_mem)
xorl %eax, %eax
- movl (%rsi, %rcx), %edi
- cmpl (%rdx, %rcx), %edi
- jne L(wcscmp_return)
+ ret
+# endif
+
+
+ .p2align 4,, 10
+L(more_2x_vec_till_page_cross):
+ /* If more 2x vec till cross we will complete a full loop
+ iteration here. */
+
+ VMOVA VEC_SIZE(%rdi), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, VEC_SIZE(%rsi), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_1_end)
+
+# ifdef USE_AS_STRNCMP
+ cmpq $(CHAR_PER_VEC * 2), %rdx
+ jbe L(ret_zero_in_loop_page_cross)
+# endif
+
+ subl $-(VEC_SIZE * 4), %eax
+
+ /* Safe to include comparisons from lower bytes. */
+ VMOVU -(VEC_SIZE * 2)(%rdi, %rax), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, -(VEC_SIZE * 2)(%rsi, %rax), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_page_cross_0)
+
+ VMOVU -(VEC_SIZE * 1)(%rdi, %rax), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, -(VEC_SIZE * 1)(%rsi, %rax), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(return_vec_page_cross_1)
+
+# ifdef USE_AS_STRNCMP
+ /* Must check length here as length might proclude reading next
+ page. */
+# ifdef USE_AS_WCSCMP
+ /* NB: strcasecmp not used with WCSCMP so this access to r11 is
+ safe. */
+ movl %eax, %r11d
+ shrl $2, %r11d
+ cmpq %r11, %rdx
# else
- movzbl (%rax, %rcx), %eax
- movzbl (%rdx, %rcx), %edx
- subl %edx, %eax
+ cmpq %rax, %rdx
# endif
+ jbe L(ret_zero_in_loop_page_cross)
+# endif
+
+ /* Finish the loop. */
+ VMOVA (VEC_SIZE * 2)(%rdi), %YMM4
+ VMOVA (VEC_SIZE * 3)(%rdi), %YMM6
+ VPMINU %YMM4, %YMM6, %YMM9
+ VPTESTM %YMM9, %YMM9, %k1
+# ifndef USE_AS_STRCASECMP_L
+ vpxorq (VEC_SIZE * 2)(%rsi), %YMM4, %YMM5
+ /* YMM6 = YMM5 | ((VEC_SIZE * 3)(%rsi) ^ YMM6). */
+ vpternlogd $0xde, (VEC_SIZE * 3)(%rsi), %YMM5, %YMM6
# else
-# ifdef USE_AS_WCSCMP
- movq %rax, %rsi
+ VMOVU (VEC_SIZE * 2)(%rsi), %YMM5
+ TOLOWER_YMM (%YMM4, %YMM5)
+ VMOVU (VEC_SIZE * 3)(%rsi), %YMM7
+ TOLOWER_YMM (%YMM6, %YMM7)
+ vpxorq %YMM4, %YMM5, %YMM5
+ vpternlogd $0xde, %YMM7, %YMM5, %YMM6
+# endif
+ VPTESTNM %YMM6, %YMM6, %k0{%k1}
+ kmovd %k0, %LOOP_REG
+ TESTEQ %LOOP_REG
+ jnz L(return_vec_2_3_end)
+
+ /* Best for code size to include ucond-jmp here. Would be faster
+ if this case is hot to duplicate the L(return_vec_2_3_end) code
+ as fall-through and have jump back to loop on mismatch
+ comparison. */
+ subq $-(VEC_SIZE * 4), %rdi
+ subq $-(VEC_SIZE * 4), %rsi
+ addl $(PAGE_SIZE - VEC_SIZE * 8), %eax
+# ifdef USE_AS_STRNCMP
+ subq $(CHAR_PER_VEC * 4), %rdx
+ ja L(loop_skip_page_cross_check)
+L(ret_zero_in_loop_page_cross):
xorl %eax, %eax
- movl (VEC_SIZE * 2)(%rsi, %rcx), %edi
- cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi
- jne L(wcscmp_return)
-# else
- movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax
- movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx
- subl %edx, %eax
+ ret
+# else
+ jmp L(loop_skip_page_cross_check)
+# endif
+
+
+ .p2align 4,, 10
+L(return_vec_page_cross_0):
+ addl $-VEC_SIZE, %eax
+L(return_vec_page_cross_1):
+ tzcntl %ecx, %ecx
+# if defined USE_AS_STRNCMP || defined USE_AS_WCSCMP
+ leal -VEC_SIZE(%rax, %rcx, SIZE_OF_CHAR), %ecx
+# ifdef USE_AS_STRNCMP
+# ifdef USE_AS_WCSCMP
+ /* Must divide ecx instead of multiply rdx due to overflow. */
+ movl %ecx, %eax
+ shrl $2, %eax
+ cmpq %rax, %rdx
+# else
+ cmpq %rcx, %rdx
+# endif
+ jbe L(ret_zero_in_loop_page_cross)
# endif
+# else
+ addl %eax, %ecx
+# endif
+
+# ifdef USE_AS_WCSCMP
+ movl VEC_OFFSET(%rdi, %rcx), %edx
+ xorl %eax, %eax
+ cmpl VEC_OFFSET(%rsi, %rcx), %edx
+ je L(ret9)
+ setl %al
+ negl %eax
+ xorl %r8d, %eax
+# else
+ movzbl VEC_OFFSET(%rdi, %rcx), %eax
+ movzbl VEC_OFFSET(%rsi, %rcx), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret9):
ret
-# ifdef USE_AS_STRNCMP
-L(string_nbyte_offset_check):
- leaq (VEC_SIZE * 4)(%r10), %r10
- cmpq %r10, %r11
- jbe L(zero)
- jmp L(back_to_loop)
+
+ .p2align 4,, 10
+L(page_cross):
+# ifndef USE_AS_STRNCMP
+ /* If both are VEC aligned we don't need any special logic here.
+ Only valid for strcmp where stop condition is guranteed to be
+ reachable by just reading memory. */
+ testl $((VEC_SIZE - 1) << 20), %eax
+ jz L(no_page_cross)
# endif
- .p2align 4
-L(cross_page_loop):
- /* Check one byte/dword at a time. */
+ movl %edi, %eax
+ movl %esi, %ecx
+ andl $(PAGE_SIZE - 1), %eax
+ andl $(PAGE_SIZE - 1), %ecx
+
+ xorl %OFFSET_REG, %OFFSET_REG
+
+ /* Check which is closer to page cross, s1 or s2. */
+ cmpl %eax, %ecx
+ jg L(page_cross_s2)
+
+ /* The previous page cross check has false positives. Check for
+ true positive as page cross logic is very expensive. */
+ subl $(PAGE_SIZE - VEC_SIZE * 4), %eax
+ jbe L(no_page_cross)
+
+
+ /* Set r8 to not interfere with normal return value (rdi and rsi
+ did not swap). */
# ifdef USE_AS_WCSCMP
- cmpl %ecx, %eax
+ /* any non-zero positive value that doesn't inference with 0x1.
+ */
+ movl $2, %r8d
# else
- subl %ecx, %eax
+ xorl %r8d, %r8d
# endif
- jne L(different)
- addl $SIZE_OF_CHAR, %edx
- cmpl $(VEC_SIZE * 4), %edx
- je L(main_loop_header)
+
+ /* Check if less than 1x VEC till page cross. */
+ subl $(VEC_SIZE * 3), %eax
+ jg L(less_1x_vec_till_page)
+
+
+ /* If more than 1x VEC till page cross, loop throuh safely
+ loadable memory until within 1x VEC of page cross. */
+ .p2align 4,, 8
+L(page_cross_loop):
+ VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM1, %k1){%k2}
+ kmovd %k1, %ecx
+ TESTEQ %ecx
+ jnz L(check_ret_vec_page_cross)
+ addl $CHAR_PER_VEC, %OFFSET_REG
# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross)
# endif
+ addl $VEC_SIZE, %eax
+ jl L(page_cross_loop)
+
# ifdef USE_AS_WCSCMP
- movl (%rdi, %rdx), %eax
- movl (%rsi, %rdx), %ecx
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %ecx
+ shrl $2, %eax
# endif
- /* Check null char. */
- testl %eax, %eax
- jne L(cross_page_loop)
- /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
- comparisons. */
- subl %ecx, %eax
-# ifndef USE_AS_WCSCMP
-L(different):
+
+
+ subl %eax, %OFFSET_REG
+ /* OFFSET_REG has distance to page cross - VEC_SIZE. Guranteed
+ to not cross page so is safe to load. Since we have already
+ loaded at least 1 VEC from rsi it is also guranteed to be safe.
+ */
+ VMOVU (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM0
+ VPTESTM %YMM0, %YMM0, %k2
+ CMP_R1_S2_YMM (%YMM0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %YMM1, %k1){%k2}
+
+ kmovd %k1, %ecx
+# ifdef USE_AS_STRNCMP
+ leal CHAR_PER_VEC(%OFFSET_REG64), %eax
+ cmpq %rax, %rdx
+ jbe L(check_ret_vec_page_cross2)
+# ifdef USE_AS_WCSCMP
+ addq $-(CHAR_PER_VEC * 2), %rdx
+# else
+ addq %rdi, %rdx
+# endif
# endif
- ret
+ TESTEQ %ecx
+ jz L(prepare_loop_no_len)
+ .p2align 4,, 4
+L(ret_vec_page_cross):
+# ifndef USE_AS_STRNCMP
+L(check_ret_vec_page_cross):
+# endif
+ tzcntl %ecx, %ecx
+ addl %OFFSET_REG, %ecx
+L(ret_vec_page_cross_cont):
# ifdef USE_AS_WCSCMP
- .p2align 4
-L(different):
- /* Use movl to avoid modifying EFLAGS. */
- movl $0, %eax
+ movl (%rdi, %rcx, SIZE_OF_CHAR), %edx
+ xorl %eax, %eax
+ cmpl (%rsi, %rcx, SIZE_OF_CHAR), %edx
+ je L(ret12)
setl %al
negl %eax
- orl $1, %eax
- ret
+ xorl %r8d, %eax
+# else
+ movzbl (%rdi, %rcx, SIZE_OF_CHAR), %eax
+ movzbl (%rsi, %rcx, SIZE_OF_CHAR), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %ecx)
+ subl %ecx, %eax
+ xorl %r8d, %eax
+ subl %r8d, %eax
# endif
+L(ret12):
+ ret
+
# ifdef USE_AS_STRNCMP
- .p2align 4
-L(zero):
+ .p2align 4,, 10
+L(check_ret_vec_page_cross2):
+ TESTEQ %ecx
+L(check_ret_vec_page_cross):
+ tzcntl %ecx, %ecx
+ addl %OFFSET_REG, %ecx
+ cmpq %rcx, %rdx
+ ja L(ret_vec_page_cross_cont)
+ .p2align 4,, 2
+L(ret_zero_page_cross):
xorl %eax, %eax
ret
+# endif
- .p2align 4
-L(char0):
-# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (%rdi), %ecx
- cmpl (%rsi), %ecx
- jne L(wcscmp_return)
-# else
- movzbl (%rsi), %ecx
- movzbl (%rdi), %eax
- subl %ecx, %eax
-# endif
- ret
+ .p2align 4,, 4
+L(page_cross_s2):
+ /* Ensure this is a true page cross. */
+ subl $(PAGE_SIZE - VEC_SIZE * 4), %ecx
+ jbe L(no_page_cross)
+
+
+ movl %ecx, %eax
+ movq %rdi, %rcx
+ movq %rsi, %rdi
+ movq %rcx, %rsi
+
+ /* set r8 to negate return value as rdi and rsi swapped. */
+# ifdef USE_AS_WCSCMP
+ movl $-4, %r8d
+# else
+ movl $-1, %r8d
# endif
+ xorl %OFFSET_REG, %OFFSET_REG
- .p2align 4
-L(last_vector):
- addq %rdx, %rdi
- addq %rdx, %rsi
-# ifdef USE_AS_STRNCMP
- subq %rdx, %r11
+ /* Check if more than 1x VEC till page cross. */
+ subl $(VEC_SIZE * 3), %eax
+ jle L(page_cross_loop)
+
+ .p2align 4,, 6
+L(less_1x_vec_till_page):
+# ifdef USE_AS_WCSCMP
+ shrl $2, %eax
# endif
- tzcntl %ecx, %edx
+ /* Find largest load size we can use. */
+ cmpl $(16 / SIZE_OF_CHAR), %eax
+ ja L(less_16_till_page)
+
+ /* Use 16 byte comparison. */
+ vmovdqu (%rdi), %xmm0
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_S2_XMM (%xmm0, (%rsi), %xmm1, %k1){%k2}
+ kmovd %k1, %ecx
# ifdef USE_AS_WCSCMP
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- sall $2, %edx
+ subl $0xf, %ecx
+# else
+ incw %cx
# endif
+ jnz L(check_ret_vec_page_cross)
+ movl $(16 / SIZE_OF_CHAR), %OFFSET_REG
# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subl %eax, %OFFSET_REG
+# else
+ /* Explicit check for 16 byte alignment. */
+ subl %eax, %OFFSET_REG
+ jz L(prepare_loop)
# endif
+ vmovdqu (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_S2_XMM (%xmm0, (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1, %k1){%k2}
+ kmovd %k1, %ecx
# ifdef USE_AS_WCSCMP
- xorl %eax, %eax
- movl (%rdi, %rdx), %ecx
- cmpl (%rsi, %rdx), %ecx
- jne L(wcscmp_return)
+ subl $0xf, %ecx
# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %edx
- subl %edx, %eax
+ incw %cx
# endif
+ jnz L(check_ret_vec_page_cross)
+# ifdef USE_AS_STRNCMP
+ addl $(16 / SIZE_OF_CHAR), %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subq $-(CHAR_PER_VEC * 4), %rdx
+
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
+# else
+ leaq (16 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq (16 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
+# endif
+ jmp L(prepare_loop_aligned)
+
+# ifdef USE_AS_STRNCMP
+ .p2align 4,, 2
+L(ret_zero_page_cross_slow_case0):
+ xorl %eax, %eax
ret
+# endif
- /* Comparing on page boundary region requires special treatment:
- It must done one vector at the time, starting with the wider
- ymm vector if possible, if not, with xmm. If fetching 16 bytes
- (xmm) still passes the boundary, byte comparison must be done.
- */
- .p2align 4
-L(cross_page):
- /* Try one ymm vector at a time. */
- cmpl $(PAGE_SIZE - VEC_SIZE), %eax
- jg L(cross_page_1_vector)
-L(loop_1_vector):
- VMOVU (%rdi, %rdx), %YMM0
- VMOVU (%rsi, %rdx), %YMM1
-
- /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
- VPCMP $4, %YMM0, %YMM1, %k0
- VPCMP $0, %YMMZERO, %YMM0, %k1
- VPCMP $0, %YMMZERO, %YMM1, %k2
- /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
+
+ .p2align 4,, 10
+L(less_16_till_page):
+ cmpl $(24 / SIZE_OF_CHAR), %eax
+ ja L(less_8_till_page)
+
+ /* Use 8 byte comparison. */
+ vmovq (%rdi), %xmm0
+ vmovq (%rsi), %xmm1
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2}
kmovd %k1, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
+# ifdef USE_AS_WCSCMP
+ subl $0x3, %ecx
+# else
+ incb %cl
+# endif
+ jnz L(check_ret_vec_page_cross)
- addl $VEC_SIZE, %edx
- addl $VEC_SIZE, %eax
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
-# endif
- cmpl $(PAGE_SIZE - VEC_SIZE), %eax
- jle L(loop_1_vector)
-L(cross_page_1_vector):
- /* Less than 32 bytes to check, try one xmm vector. */
- cmpl $(PAGE_SIZE - 16), %eax
- jg L(cross_page_1_xmm)
- VMOVU (%rdi, %rdx), %XMM0
- VMOVU (%rsi, %rdx), %XMM1
-
- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
- VPCMP $4, %XMM0, %XMM1, %k0
- VPCMP $0, %XMMZERO, %XMM0, %k1
- VPCMP $0, %XMMZERO, %XMM1, %k2
- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
- korw %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- korw %k0, %k1, %k1
- kmovw %k1, %ecx
- testl %ecx, %ecx
- jne L(last_vector)
+ cmpq $(8 / SIZE_OF_CHAR), %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+# endif
+ movl $(24 / SIZE_OF_CHAR), %OFFSET_REG
+ subl %eax, %OFFSET_REG
- addl $16, %edx
-# ifndef USE_AS_WCSCMP
- addl $16, %eax
+ vmovq (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0
+ vmovq (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2}
+ kmovd %k1, %ecx
+# ifdef USE_AS_WCSCMP
+ subl $0x3, %ecx
+# else
+ incb %cl
# endif
+ jnz L(check_ret_vec_page_cross)
+
+
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
-# endif
-
-L(cross_page_1_xmm):
-# ifndef USE_AS_WCSCMP
- /* Less than 16 bytes to check, try 8 byte vector. NB: No need
- for wcscmp nor wcsncmp since wide char is 4 bytes. */
- cmpl $(PAGE_SIZE - 8), %eax
- jg L(cross_page_8bytes)
- vmovq (%rdi, %rdx), %XMM0
- vmovq (%rsi, %rdx), %XMM1
-
- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
- VPCMP $4, %XMM0, %XMM1, %k0
- VPCMP $0, %XMMZERO, %XMM0, %k1
- VPCMP $0, %XMMZERO, %XMM1, %k2
- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
- kmovd %k1, %ecx
+ addl $(8 / SIZE_OF_CHAR), %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case0)
+ subq $-(CHAR_PER_VEC * 4), %rdx
-# ifdef USE_AS_WCSCMP
- /* Only last 2 bits are valid. */
- andl $0x3, %ecx
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
# else
- /* Only last 8 bits are valid. */
- andl $0xff, %ecx
+ leaq (8 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq (8 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
# endif
+ jmp L(prepare_loop_aligned)
+
- testl %ecx, %ecx
- jne L(last_vector)
- addl $8, %edx
- addl $8, %eax
+
+ .p2align 4,, 10
+L(less_8_till_page):
+# ifdef USE_AS_WCSCMP
+ /* If using wchar then this is the only check before we reach
+ the page boundary. */
+ movl (%rdi), %eax
+ movl (%rsi), %ecx
+ cmpl %ecx, %eax
+ jnz L(ret_less_8_wcs)
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ addq $-(CHAR_PER_VEC * 2), %rdx
+ /* We already checked for len <= 1 so cannot hit that case here.
+ */
# endif
+ testl %eax, %eax
+ jnz L(prepare_loop)
+ ret
-L(cross_page_8bytes):
- /* Less than 8 bytes to check, try 4 byte vector. */
- cmpl $(PAGE_SIZE - 4), %eax
- jg L(cross_page_4bytes)
- vmovd (%rdi, %rdx), %XMM0
- vmovd (%rsi, %rdx), %XMM1
-
- /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
- VPCMP $4, %XMM0, %XMM1, %k0
- VPCMP $0, %XMMZERO, %XMM0, %k1
- VPCMP $0, %XMMZERO, %XMM1, %k2
- /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
- kord %k1, %k2, %k1
- /* Each bit in K1 represents a NULL or a mismatch. */
- kord %k0, %k1, %k1
- kmovd %k1, %ecx
+ .p2align 4,, 8
+L(ret_less_8_wcs):
+ setl %OFFSET_REG8
+ negl %OFFSET_REG
+ movl %OFFSET_REG, %eax
+ xorl %r8d, %eax
+ ret
-# ifdef USE_AS_WCSCMP
- /* Only the last bit is valid. */
- andl $0x1, %ecx
# else
- /* Only last 4 bits are valid. */
- andl $0xf, %ecx
-# endif
+ cmpl $28, %eax
+ ja L(less_4_till_page)
- testl %ecx, %ecx
- jne L(last_vector)
+ vmovd (%rdi), %xmm0
+ vmovd (%rsi), %xmm1
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2}
+ kmovd %k1, %ecx
+ subl $0xf, %ecx
+ jnz L(check_ret_vec_page_cross)
- addl $4, %edx
# ifdef USE_AS_STRNCMP
- /* Return 0 if the current offset (%rdx) >= the maximum offset
- (%r11). */
- cmpq %r11, %rdx
- jae L(zero)
+ cmpq $4, %rdx
+ jbe L(ret_zero_page_cross_slow_case1)
# endif
+ movl $(28 / SIZE_OF_CHAR), %OFFSET_REG
+ subl %eax, %OFFSET_REG
-L(cross_page_4bytes):
-# endif
- /* Less than 4 bytes to check, try one byte/dword at a time. */
-# ifdef USE_AS_STRNCMP
- cmpq %r11, %rdx
- jae L(zero)
-# endif
-# ifdef USE_AS_WCSCMP
- movl (%rdi, %rdx), %eax
- movl (%rsi, %rdx), %ecx
-# else
- movzbl (%rdi, %rdx), %eax
- movzbl (%rsi, %rdx), %ecx
-# endif
- testl %eax, %eax
- jne L(cross_page_loop)
- subl %ecx, %eax
+ vmovd (%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm0
+ vmovd (%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %xmm1
+ VPTESTM %xmm0, %xmm0, %k2
+ CMP_R1_R2_XMM (%xmm0, %xmm1, %k1){%k2}
+ kmovd %k1, %ecx
+ subl $0xf, %ecx
+ jnz L(check_ret_vec_page_cross)
+# ifdef USE_AS_STRNCMP
+ addl $(4 / SIZE_OF_CHAR), %OFFSET_REG
+ subq %OFFSET_REG64, %rdx
+ jbe L(ret_zero_page_cross_slow_case1)
+ subq $-(CHAR_PER_VEC * 4), %rdx
+
+ leaq -(VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq -(VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
+# else
+ leaq (4 - VEC_SIZE * 4)(%rdi, %OFFSET_REG64, SIZE_OF_CHAR), %rdi
+ leaq (4 - VEC_SIZE * 4)(%rsi, %OFFSET_REG64, SIZE_OF_CHAR), %rsi
+# endif
+ jmp L(prepare_loop_aligned)
+
+
+# ifdef USE_AS_STRNCMP
+ .p2align 4,, 2
+L(ret_zero_page_cross_slow_case1):
+ xorl %eax, %eax
+ ret
+# endif
+
+ .p2align 4,, 10
+L(less_4_till_page):
+ subq %rdi, %rsi
+ /* Extremely slow byte comparison loop. */
+L(less_4_loop):
+ movzbl (%rdi), %eax
+ movzbl (%rsi, %rdi), %ecx
+ TOLOWER_gpr (%rax, %eax)
+ TOLOWER_gpr (%rcx, %BYTE_LOOP_REG)
+ subl %BYTE_LOOP_REG, %eax
+ jnz L(ret_less_4_loop)
+ testl %ecx, %ecx
+ jz L(ret_zero_4_loop)
+# ifdef USE_AS_STRNCMP
+ decq %rdx
+ jz L(ret_zero_4_loop)
+# endif
+ incq %rdi
+ /* end condition is reach page boundary (rdi is aligned). */
+ testl $31, %edi
+ jnz L(less_4_loop)
+ leaq -(VEC_SIZE * 4)(%rdi, %rsi), %rsi
+ addq $-(VEC_SIZE * 4), %rdi
+# ifdef USE_AS_STRNCMP
+ subq $-(CHAR_PER_VEC * 4), %rdx
+# endif
+ jmp L(prepare_loop_aligned)
+
+L(ret_zero_4_loop):
+ xorl %eax, %eax
+ ret
+L(ret_less_4_loop):
+ xorl %r8d, %eax
+ subl %r8d, %eax
ret
-END (STRCMP)
+# endif
+ cfi_endproc
+ .size STRCMP, .-STRCMP
#endif
# define UPDATE_STRNCMP_COUNTER
#endif
-#ifdef USE_AVX
-# define SECTION avx
-# define GLABEL(l) l##_avx
-#else
-# define SECTION sse4.2
-# define GLABEL(l) l##_sse42
-#endif
+#define SECTION sse4.2
+#define GLABEL(l) l##_sse42
#define LABEL(l) .L##l
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
mov %fs:(%rax),%RDX_LP
- // XXX 5 byte should be before the function
- /* 5-byte NOP. */
- .byte 0x0f,0x1f,0x44,0x00,0x00
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
END (GLABEL(__strcasecmp))
/* FALLTHROUGH to strcasecmp_l. */
#endif
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
mov %fs:(%rax),%RCX_LP
- // XXX 5 byte should be before the function
- /* 5-byte NOP. */
- .byte 0x0f,0x1f,0x44,0x00,0x00
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
END (GLABEL(__strncasecmp))
/* FALLTHROUGH to strncasecmp_l. */
#endif
-#ifdef USE_AVX
-# define movdqa vmovdqa
-# define movdqu vmovdqu
-# define pmovmskb vpmovmskb
-# define pcmpistri vpcmpistri
-# define psubb vpsubb
-# define pcmpeqb vpcmpeqb
-# define psrldq vpsrldq
-# define pslldq vpslldq
-# define palignr vpalignr
-# define pxor vpxor
-# define D(arg) arg, arg
-#else
-# define D(arg) arg
-#endif
+#define arg arg
STRCMP_SSE42:
cfi_startproc
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
.section .rodata.cst16,"aM",@progbits,16
.align 16
-LABEL(belowupper):
- .quad 0x4040404040404040
- .quad 0x4040404040404040
-LABEL(topupper):
-# ifdef USE_AVX
- .quad 0x5a5a5a5a5a5a5a5a
- .quad 0x5a5a5a5a5a5a5a5a
-# else
- .quad 0x5b5b5b5b5b5b5b5b
- .quad 0x5b5b5b5b5b5b5b5b
-# endif
-LABEL(touppermask):
+LABEL(lcase_min):
+ .quad 0x3f3f3f3f3f3f3f3f
+ .quad 0x3f3f3f3f3f3f3f3f
+LABEL(lcase_max):
+ .quad 0x9999999999999999
+ .quad 0x9999999999999999
+LABEL(case_add):
.quad 0x2020202020202020
.quad 0x2020202020202020
.previous
- movdqa LABEL(belowupper)(%rip), %xmm4
-# define UCLOW_reg %xmm4
- movdqa LABEL(topupper)(%rip), %xmm5
-# define UCHIGH_reg %xmm5
- movdqa LABEL(touppermask)(%rip), %xmm6
-# define LCQWORD_reg %xmm6
+ movdqa LABEL(lcase_min)(%rip), %xmm4
+# define LCASE_MIN_reg %xmm4
+ movdqa LABEL(lcase_max)(%rip), %xmm5
+# define LCASE_MAX_reg %xmm5
+ movdqa LABEL(case_add)(%rip), %xmm6
+# define CASE_ADD_reg %xmm6
#endif
cmp $0x30, %ecx
ja LABEL(crosscache)/* rsi: 16-byte load will cross cache line */
movdqu (%rdi), %xmm1
movdqu (%rsi), %xmm2
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# ifdef USE_AVX
-# define TOLOWER(reg1, reg2) \
- vpcmpgtb UCLOW_reg, reg1, %xmm7; \
- vpcmpgtb UCHIGH_reg, reg1, %xmm8; \
- vpcmpgtb UCLOW_reg, reg2, %xmm9; \
- vpcmpgtb UCHIGH_reg, reg2, %xmm10; \
- vpandn %xmm7, %xmm8, %xmm8; \
- vpandn %xmm9, %xmm10, %xmm10; \
- vpand LCQWORD_reg, %xmm8, %xmm8; \
- vpand LCQWORD_reg, %xmm10, %xmm10; \
- vpor reg1, %xmm8, reg1; \
- vpor reg2, %xmm10, reg2
-# else
-# define TOLOWER(reg1, reg2) \
- movdqa reg1, %xmm7; \
- movdqa UCHIGH_reg, %xmm8; \
- movdqa reg2, %xmm9; \
- movdqa UCHIGH_reg, %xmm10; \
- pcmpgtb UCLOW_reg, %xmm7; \
- pcmpgtb reg1, %xmm8; \
- pcmpgtb UCLOW_reg, %xmm9; \
- pcmpgtb reg2, %xmm10; \
- pand %xmm8, %xmm7; \
- pand %xmm10, %xmm9; \
- pand LCQWORD_reg, %xmm7; \
- pand LCQWORD_reg, %xmm9; \
- por %xmm7, reg1; \
- por %xmm9, reg2
-# endif
+# define TOLOWER(reg1, reg2) \
+ movdqa LCASE_MIN_reg, %xmm7; \
+ movdqa LCASE_MIN_reg, %xmm8; \
+ paddb reg1, %xmm7; \
+ paddb reg2, %xmm8; \
+ pcmpgtb LCASE_MAX_reg, %xmm7; \
+ pcmpgtb LCASE_MAX_reg, %xmm8; \
+ pandn CASE_ADD_reg, %xmm7; \
+ pandn CASE_ADD_reg, %xmm8; \
+ paddb %xmm7, reg1; \
+ paddb %xmm8, reg2
+
TOLOWER (%xmm1, %xmm2)
#else
# define TOLOWER(reg1, reg2)
#endif
- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
- pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */
- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
+ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %edx
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
jnz LABEL(less16bytes)/* If not, find different value or null char */
xor %r8d, %r8d
and $0xf, %ecx /* offset of rsi */
and $0xf, %eax /* offset of rdi */
- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
cmp %eax, %ecx
je LABEL(ashr_0) /* rsi and rdi relative offset same */
ja LABEL(bigger)
sub %rcx, %r9
lea LABEL(unaligned_table)(%rip), %r10
movslq (%r10, %r9,4), %r9
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
lea (%r10, %r9), %r10
_CET_NOTRACK jmp *%r10 /* jump to corresponding case */
LABEL(ashr_0):
movdqa (%rsi), %xmm1
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
- pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */
+ pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
#else
movdqa (%rdi), %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */
+ pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
#endif
- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
pmovmskb %xmm1, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
*/
.p2align 4
LABEL(ashr_1):
- pslldq $15, D(%xmm2) /* shift first string to align with second */
+ pslldq $15, %xmm2 /* shift first string to align with second */
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */
- psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/
+ pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
pmovmskb %xmm2, %r9d
shr %cl, %edx /* adjust 0xffff for offset */
shr %cl, %r9d /* adjust for 16-byte offset */
LABEL(nibble_ashr_1_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $1, -16(%rdi, %rdx), D(%xmm0)
+ palignr $1, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_1_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $1, -16(%rdi, %rdx), D(%xmm0)
+ palignr $1, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_1_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $1, D(%xmm0)
+ psrldq $1, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_2):
- pslldq $14, D(%xmm2)
+ pslldq $14, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_2_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $2, -16(%rdi, %rdx), D(%xmm0)
+ palignr $2, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_2_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $2, -16(%rdi, %rdx), D(%xmm0)
+ palignr $2, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_2_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $2, D(%xmm0)
+ psrldq $2, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_3):
- pslldq $13, D(%xmm2)
+ pslldq $13, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_3_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $3, -16(%rdi, %rdx), D(%xmm0)
+ palignr $3, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_3_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $3, -16(%rdi, %rdx), D(%xmm0)
+ palignr $3, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_3_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $3, D(%xmm0)
+ psrldq $3, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_4):
- pslldq $12, D(%xmm2)
+ pslldq $12, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_4_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $4, -16(%rdi, %rdx), D(%xmm0)
+ palignr $4, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_4_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $4, -16(%rdi, %rdx), D(%xmm0)
+ palignr $4, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_4_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $4, D(%xmm0)
+ psrldq $4, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_5):
- pslldq $11, D(%xmm2)
+ pslldq $11, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_5_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $5, -16(%rdi, %rdx), D(%xmm0)
+ palignr $5, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
movdqa (%rdi, %rdx), %xmm0
- palignr $5, -16(%rdi, %rdx), D(%xmm0)
+ palignr $5, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_5_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $5, D(%xmm0)
+ psrldq $5, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_6):
- pslldq $10, D(%xmm2)
+ pslldq $10, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_6_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $6, -16(%rdi, %rdx), D(%xmm0)
+ palignr $6, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_6_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $6, -16(%rdi, %rdx), D(%xmm0)
+ palignr $6, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_6_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $6, D(%xmm0)
+ psrldq $6, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_7):
- pslldq $9, D(%xmm2)
+ pslldq $9, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_7_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $7, -16(%rdi, %rdx), D(%xmm0)
+ palignr $7, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_7_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $7, -16(%rdi, %rdx), D(%xmm0)
+ palignr $7, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_7_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $7, D(%xmm0)
+ psrldq $7, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_8):
- pslldq $8, D(%xmm2)
+ pslldq $8, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_8_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $8, -16(%rdi, %rdx), D(%xmm0)
+ palignr $8, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_8_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $8, -16(%rdi, %rdx), D(%xmm0)
+ palignr $8, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_8_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $8, D(%xmm0)
+ psrldq $8, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_9):
- pslldq $7, D(%xmm2)
+ pslldq $7, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_9_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $9, -16(%rdi, %rdx), D(%xmm0)
+ palignr $9, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_9_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $9, -16(%rdi, %rdx), D(%xmm0)
+ palignr $9, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_9_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $9, D(%xmm0)
+ psrldq $9, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_10):
- pslldq $6, D(%xmm2)
+ pslldq $6, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_10_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $10, -16(%rdi, %rdx), D(%xmm0)
+ palignr $10, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_10_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $10, -16(%rdi, %rdx), D(%xmm0)
+ palignr $10, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_10_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $10, D(%xmm0)
+ psrldq $10, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_11):
- pslldq $5, D(%xmm2)
+ pslldq $5, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_11_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $11, -16(%rdi, %rdx), D(%xmm0)
+ palignr $11, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_11_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $11, -16(%rdi, %rdx), D(%xmm0)
+ palignr $11, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_11_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $11, D(%xmm0)
+ psrldq $11, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_12):
- pslldq $4, D(%xmm2)
+ pslldq $4, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_12_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $12, -16(%rdi, %rdx), D(%xmm0)
+ palignr $12, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_12_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $12, -16(%rdi, %rdx), D(%xmm0)
+ palignr $12, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_12_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $12, D(%xmm0)
+ psrldq $12, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_13):
- pslldq $3, D(%xmm2)
+ pslldq $3, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_13_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $13, -16(%rdi, %rdx), D(%xmm0)
+ palignr $13, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_13_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $13, -16(%rdi, %rdx), D(%xmm0)
+ palignr $13, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_13_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $13, D(%xmm0)
+ psrldq $13, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_14):
- pslldq $2, D(%xmm2)
+ pslldq $2, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_14_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $14, -16(%rdi, %rdx), D(%xmm0)
+ palignr $14, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_14_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $14, -16(%rdi, %rdx), D(%xmm0)
+ palignr $14, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_14_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $14, D(%xmm0)
+ psrldq $14, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
*/
.p2align 4
LABEL(ashr_15):
- pslldq $1, D(%xmm2)
+ pslldq $1, %xmm2
TOLOWER (%xmm1, %xmm2)
- pcmpeqb %xmm1, D(%xmm2)
- psubb %xmm0, D(%xmm2)
+ pcmpeqb %xmm1, %xmm2
+ psubb %xmm0, %xmm2
pmovmskb %xmm2, %r9d
shr %cl, %edx
shr %cl, %r9d
LABEL(nibble_ashr_15_restart_use):
movdqa (%rdi, %rdx), %xmm0
- palignr $15, -16(%rdi, %rdx), D(%xmm0)
+ palignr $15, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
jg LABEL(nibble_ashr_15_use)
movdqa (%rdi, %rdx), %xmm0
- palignr $15, -16(%rdi, %rdx), D(%xmm0)
+ palignr $15, -16(%rdi, %rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
#else
LABEL(nibble_ashr_15_use):
sub $0x1000, %r10
movdqa -16(%rdi, %rdx), %xmm0
- psrldq $15, D(%xmm0)
+ psrldq $15, %xmm0
pcmpistri $0x3a,%xmm0, %xmm0
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp %r11, %rcx
.p2align 4
// XXX Same as code above
LABEL(Byte0):
- movzx (%rsi), %ecx
- movzx (%rdi), %eax
+ movzbl (%rsi), %ecx
+ movzbl (%rdi), %eax
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
return OPTIMIZE (avx2);
}
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+ return OPTIMIZE (sse42);
+
if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
return OPTIMIZE (sse2_unaligned);
RETURN (NULL, strlen (s));
const char *aligned;
- __m128i mask;
- int offset = (int) ((size_t) a & 15);
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (unsigned int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
if (offset != 0)
{
/* Load masks. */
aligned = (const char *) ((size_t) a & -16L);
__m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-
- mask = __m128i_shift_right (mask0, offset);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
/* Find where the NULL terminator is. */
- int length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16 - offset)
- {
- /* There is no NULL terminator. */
- __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
- int index = _mm_cmpistri (mask1, mask1, 0x3a);
- length += index;
-
- /* Don't use SSE4.2 if the length of A > 16. */
- if (length > 16)
- return STRCSPN_SSE2 (s, a);
-
- if (index != 0)
- {
- /* Combine mask0 and mask1. We could play games with
- palignr, but frankly this data should be in L1 now
- so do the merge via an unaligned load. */
- mask = _mm_loadu_si128 ((__m128i *) a);
- }
- }
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
}
- else
- {
- /* A is aligned. */
- mask = _mm_load_si128 ((__m128i *) a);
- /* Find where the NULL terminator is. */
- int length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return STRCSPN_SSE2 (s, a);
- }
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return STRCSPN_SSE2 (s, a);
}
- offset = (int) ((size_t) s & 15);
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
if (offset != 0)
{
+ start_unaligned:
/* Check partial string. */
aligned = (const char *) ((size_t) s & -16L);
__m128i value = _mm_load_si128 ((__m128i *) aligned);
value = __m128i_shift_right (value, offset);
- int length = _mm_cmpistri (mask, value, 0x2);
+ unsigned int length = _mm_cmpistri (mask, value, 0x2);
/* No need to check ZFlag since ZFlag is always 1. */
- int cflag = _mm_cmpistrc (mask, value, 0x2);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
if (cflag)
RETURN ((char *) (s + length), length);
/* Find where the NULL terminator is. */
- int index = _mm_cmpistri (value, value, 0x3a);
+ unsigned int index = _mm_cmpistri (value, value, 0x3a);
if (index < 16 - offset)
RETURN (NULL, index);
aligned += 16;
}
- else
- aligned = s;
+start_loop:
while (1)
{
__m128i value = _mm_load_si128 ((__m128i *) aligned);
- int index = _mm_cmpistri (mask, value, 0x2);
- int cflag = _mm_cmpistrc (mask, value, 0x2);
- int zflag = _mm_cmpistrz (mask, value, 0x2);
+ unsigned int index = _mm_cmpistri (mask, value, 0x2);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x2);
+ unsigned int zflag = _mm_cmpistrz (mask, value, 0x2);
if (cflag)
RETURN ((char *) (aligned + index), (size_t) (aligned + index - s));
if (zflag)
+++ /dev/null
-/* strcspn optimized with SSE2.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define strcspn __strcspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(strcspn)
-#endif
-
-#include <sysdeps/x86_64/strcspn.S>
--- /dev/null
+/* strcspn optimized with SSE2.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRCSPN __strcspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRCSPN)
+#endif
+
+#include <string/strcspn.c>
--- /dev/null
+/* Placeholder function, not used by any processor at the moment.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifdef USE_AS_WCSLEN
+# define VPCMP vpcmpd
+# define VPTESTN vptestnmd
+# define VPMINU vpminud
+# define CHAR_SIZE 4
+# else
+# define VPCMP vpcmpb
+# define VPTESTN vptestnmb
+# define VPMINU vpminub
+# define CHAR_SIZE 1
+# endif
+
+# define XMM0 xmm16
+# define PAGE_SIZE 4096
+# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
+
+# if VEC_SIZE == 64
+# define KMOV kmovq
+# define KORTEST kortestq
+# define RAX rax
+# define RCX rcx
+# define RDX rdx
+# define SHR shrq
+# define TEXTSUFFIX evex512
+# define VMM0 zmm16
+# define VMM1 zmm17
+# define VMM2 zmm18
+# define VMM3 zmm19
+# define VMM4 zmm20
+# define VMOVA vmovdqa64
+# elif VEC_SIZE == 32
+/* Currently Unused. */
+# define KMOV kmovd
+# define KORTEST kortestd
+# define RAX eax
+# define RCX ecx
+# define RDX edx
+# define SHR shrl
+# define TEXTSUFFIX evex256
+# define VMM0 ymm16
+# define VMM1 ymm17
+# define VMM2 ymm18
+# define VMM3 ymm19
+# define VMM4 ymm20
+# define VMOVA vmovdqa32
+# endif
+
+ .section .text.TEXTSUFFIX, "ax", @progbits
+/* Aligning entry point to 64 byte, provides better performance for
+ one vector length string. */
+ENTRY_P2ALIGN (STRLEN, 6)
+# ifdef USE_AS_STRNLEN
+ /* Check zero length. */
+ test %RSI_LP, %RSI_LP
+ jz L(ret_max)
+# ifdef __ILP32__
+ /* Clear the upper 32 bits. */
+ movl %esi, %esi
+# endif
+# endif
+
+ movl %edi, %eax
+ vpxorq %XMM0, %XMM0, %XMM0
+ andl $(PAGE_SIZE - 1), %eax
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ ja L(page_cross)
+
+ /* Compare [w]char for null, mask bit will be set for match. */
+ VPCMP $0, (%rdi), %VMM0, %k0
+ KMOV %k0, %RAX
+ test %RAX, %RAX
+ jz L(align_more)
+
+ bsf %RAX, %RAX
+# ifdef USE_AS_STRNLEN
+ cmpq %rsi, %rax
+ cmovnb %rsi, %rax
+# endif
+ ret
+
+ /* At this point vector max length reached. */
+# ifdef USE_AS_STRNLEN
+ .p2align 4,,3
+L(ret_max):
+ movq %rsi, %rax
+ ret
+# endif
+
+L(align_more):
+ leaq VEC_SIZE(%rdi), %rax
+ /* Align rax to VEC_SIZE. */
+ andq $-VEC_SIZE, %rax
+# ifdef USE_AS_STRNLEN
+ movq %rax, %rdx
+ subq %rdi, %rdx
+# ifdef USE_AS_WCSLEN
+ SHR $2, %RDX
+# endif
+ /* At this point rdx contains [w]chars already compared. */
+ subq %rsi, %rdx
+ jae L(ret_max)
+ negq %rdx
+ /* At this point rdx contains number of w[char] needs to go.
+ Now onwards rdx will keep decrementing with each compare. */
+# endif
+
+ /* Loop unroll 4 times for 4 vector loop. */
+ VPCMP $0, (%rax), %VMM0, %k0
+ KMOV %k0, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x1)
+
+# ifdef USE_AS_STRNLEN
+ subq $CHAR_PER_VEC, %rdx
+ jbe L(ret_max)
+# endif
+
+ VPCMP $0, VEC_SIZE(%rax), %VMM0, %k0
+ KMOV %k0, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x2)
+
+# ifdef USE_AS_STRNLEN
+ subq $CHAR_PER_VEC, %rdx
+ jbe L(ret_max)
+# endif
+
+ VPCMP $0, (VEC_SIZE * 2)(%rax), %VMM0, %k0
+ KMOV %k0, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x3)
+
+# ifdef USE_AS_STRNLEN
+ subq $CHAR_PER_VEC, %rdx
+ jbe L(ret_max)
+# endif
+
+ VPCMP $0, (VEC_SIZE * 3)(%rax), %VMM0, %k0
+ KMOV %k0, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x4)
+
+# ifdef USE_AS_STRNLEN
+ subq $CHAR_PER_VEC, %rdx
+ jbe L(ret_max)
+ /* Save pointer before 4 x VEC_SIZE alignment. */
+ movq %rax, %rcx
+# endif
+
+ /* Align address to VEC_SIZE * 4 for loop. */
+ andq $-(VEC_SIZE * 4), %rax
+
+# ifdef USE_AS_STRNLEN
+ subq %rax, %rcx
+# ifdef USE_AS_WCSLEN
+ SHR $2, %RCX
+# endif
+ /* rcx contains number of [w]char will be recompared due to
+ alignment fixes. rdx must be incremented by rcx to offset
+ alignment adjustment. */
+ addq %rcx, %rdx
+ /* Need jump as we don't want to add/subtract rdx for first
+ iteration of 4 x VEC_SIZE aligned loop. */
+ jmp L(loop_entry)
+# endif
+
+ .p2align 4,,11
+L(loop):
+# ifdef USE_AS_STRNLEN
+ subq $(CHAR_PER_VEC * 4), %rdx
+ jbe L(ret_max)
+L(loop_entry):
+# endif
+ /* VPMINU and VPCMP combination provide better performance as
+ compared to alternative combinations. */
+ VMOVA (VEC_SIZE * 4)(%rax), %VMM1
+ VPMINU (VEC_SIZE * 5)(%rax), %VMM1, %VMM2
+ VMOVA (VEC_SIZE * 6)(%rax), %VMM3
+ VPMINU (VEC_SIZE * 7)(%rax), %VMM3, %VMM4
+
+ VPTESTN %VMM2, %VMM2, %k0
+ VPTESTN %VMM4, %VMM4, %k1
+
+ subq $-(VEC_SIZE * 4), %rax
+ KORTEST %k0, %k1
+ jz L(loop)
+
+ VPTESTN %VMM1, %VMM1, %k2
+ KMOV %k2, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x1)
+
+ KMOV %k0, %RCX
+ /* At this point, if k0 is non zero, null char must be in the
+ second vector. */
+ test %RCX, %RCX
+ jnz L(ret_vec_x2)
+
+ VPTESTN %VMM3, %VMM3, %k3
+ KMOV %k3, %RCX
+ test %RCX, %RCX
+ jnz L(ret_vec_x3)
+ /* At this point null [w]char must be in the fourth vector so no
+ need to check. */
+ KMOV %k1, %RCX
+
+ /* Fourth, third, second vector terminating are pretty much
+ same, implemented this way to avoid branching and reuse code
+ from pre loop exit condition. */
+L(ret_vec_x4):
+ bsf %RCX, %RCX
+ subq %rdi, %rax
+# ifdef USE_AS_WCSLEN
+ subq $-(VEC_SIZE * 3), %rax
+ shrq $2, %rax
+ addq %rcx, %rax
+# else
+ leaq (VEC_SIZE * 3)(%rcx, %rax), %rax
+# endif
+# ifdef USE_AS_STRNLEN
+ cmpq %rsi, %rax
+ cmovnb %rsi, %rax
+# endif
+ ret
+
+L(ret_vec_x3):
+ bsf %RCX, %RCX
+ subq %rdi, %rax
+# ifdef USE_AS_WCSLEN
+ subq $-(VEC_SIZE * 2), %rax
+ shrq $2, %rax
+ addq %rcx, %rax
+# else
+ leaq (VEC_SIZE * 2)(%rcx, %rax), %rax
+# endif
+# ifdef USE_AS_STRNLEN
+ cmpq %rsi, %rax
+ cmovnb %rsi, %rax
+# endif
+ ret
+
+L(ret_vec_x2):
+ subq $-VEC_SIZE, %rax
+L(ret_vec_x1):
+ bsf %RCX, %RCX
+ subq %rdi, %rax
+# ifdef USE_AS_WCSLEN
+ shrq $2, %rax
+# endif
+ addq %rcx, %rax
+# ifdef USE_AS_STRNLEN
+ cmpq %rsi, %rax
+ cmovnb %rsi, %rax
+# endif
+ ret
+
+L(page_cross):
+ movl %eax, %ecx
+# ifdef USE_AS_WCSLEN
+ andl $(VEC_SIZE - 1), %ecx
+ sarl $2, %ecx
+# endif
+ /* ecx contains number of w[char] to be skipped as a result
+ of address alignment. */
+ xorq %rdi, %rax
+ VPCMP $0, (PAGE_SIZE - VEC_SIZE)(%rax), %VMM0, %k0
+ KMOV %k0, %RAX
+ /* Ignore number of character for alignment adjustment. */
+ SHR %cl, %RAX
+ jz L(align_more)
+
+ bsf %RAX, %RAX
+# ifdef USE_AS_STRNLEN
+ cmpq %rsi, %rax
+ cmovnb %rsi, %rax
+# endif
+ ret
+
+END (STRLEN)
+#endif
--- /dev/null
+#ifndef STRLEN
+# define STRLEN __strlen_evex512
+#endif
+
+#define VEC_SIZE 64
+
+#include "strlen-evex-base.S"
# define SHIFT_RETURN
#endif
+#ifndef SECTION
+# define SECTION(p) p
+#endif
+
/* Long lived register in strlen(s), strnlen(s, n) are:
%xmm3 - zero
*/
-.text
+ .section SECTION(.text),"ax",@progbits
ENTRY(strlen)
/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
L(n_nonzero):
# ifdef AS_WCSLEN
/* Check for overflow from maxlen * sizeof(wchar_t). If it would
- overflow the only way this program doesn't have undefined behavior
- is if there is a null terminator in valid memory so wcslen will
+ overflow the only way this program doesn't have undefined behavior
+ is if there is a null terminator in valid memory so wcslen will
suffice. */
mov %RSI_LP, %R10_LP
sar $62, %R10_LP
+++ /dev/null
-/* strncasecmp_l optimized with AVX.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#define STRCMP_SSE42 __strncasecmp_l_avx
-#define USE_AVX 1
-#define USE_AS_STRNCASECMP_L
-#include "strcmp-sse42.S"
--- /dev/null
+#ifndef STRCMP
+# define STRCMP __strncasecmp_l_avx2_rtm
+#endif
+
+#define _GLABEL(x) x ## _rtm
+#define GLABEL(x) _GLABEL(x)
+
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+
+#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+
+#define SECTION(p) p##.avx.rtm
+#define OVERFLOW_STRCMP __strcasecmp_l_avx2_rtm
+
+#include "strncase_l-avx2.S"
--- /dev/null
+/* strncasecmp_l optimized with AVX2.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef STRCMP
+# define STRCMP __strncasecmp_l_avx2
+#endif
+#define USE_AS_STRCASECMP_L
+#define USE_AS_STRNCMP
+#ifndef OVERFLOW_STRCMP
+# define OVERFLOW_STRCMP __strcasecmp_l_avx2
+#endif
+#include "strcmp-avx2.S"
--- /dev/null
+/* strncasecmp_l optimized with EVEX.
+ Copyright (C) 2017-2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef STRCMP
+# define STRCMP __strncasecmp_l_evex
+#endif
+#define OVERFLOW_STRCMP __strcasecmp_l_evex
+#define USE_AS_STRCASECMP_L
+#define USE_AS_STRNCMP
+#include "strcmp-evex.S"
#define STRCMP __strncmp_avx2_rtm
#define USE_AS_STRNCMP 1
+#define OVERFLOW_STRCMP __strcmp_avx2_rtm
#include "strcmp-avx2-rtm.S"
#define STRCMP __strncmp_avx2
#define USE_AS_STRNCMP 1
+#define OVERFLOW_STRCMP __strcmp_avx2
#include "strcmp-avx2.S"
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#define STRCMP_SSE42 __strncmp_sse42
-#define USE_AS_STRNCMP
-#include "strcmp-sse42.S"
+#if IS_IN (libc)
+# define STRCMP_SSE42 __strncmp_sse42
+# define USE_AS_STRNCMP
+# include "strcmp-sse42.S"
+#endif
{
if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
&& CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
return OPTIMIZE (evex);
if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
--- /dev/null
+#define STRLEN __strnlen_evex512
+#define USE_AS_STRNLEN 1
+
+#include "strlen-evex512.S"
+++ /dev/null
-/* strpbrk optimized with SSE2.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define strcspn __strpbrk_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(strpbrk)
-#endif
-
-#define USE_AS_STRPBRK
-#include <sysdeps/x86_64/strcspn.S>
--- /dev/null
+/* strpbrk optimized with SSE2.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRPBRK __strpbrk_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRPBRK)
+#endif
+
+#include <string/strpbrk.c>
# ifdef USE_AS_WCSRCHR
# define VPBROADCAST vpbroadcastd
# define VPCMPEQ vpcmpeqd
+# define VPMIN vpminud
+# define CHAR_SIZE 4
# else
# define VPBROADCAST vpbroadcastb
# define VPCMPEQ vpcmpeqb
+# define VPMIN vpminub
+# define CHAR_SIZE 1
# endif
# ifndef VZEROUPPER
# endif
# define VEC_SIZE 32
+# define PAGE_SIZE 4096
- .section SECTION(.text),"ax",@progbits
-ENTRY (STRRCHR)
- movd %esi, %xmm4
- movl %edi, %ecx
+ .section SECTION(.text), "ax", @progbits
+ENTRY(STRRCHR)
+ movd %esi, %xmm7
+ movl %edi, %eax
/* Broadcast CHAR to YMM4. */
- VPBROADCAST %xmm4, %ymm4
+ VPBROADCAST %xmm7, %ymm7
vpxor %xmm0, %xmm0, %xmm0
- /* Check if we may cross page boundary with one vector load. */
- andl $(2 * VEC_SIZE - 1), %ecx
- cmpl $VEC_SIZE, %ecx
- ja L(cros_page_boundary)
+ /* Shift here instead of `andl` to save code size (saves a fetch
+ block). */
+ sall $20, %eax
+ cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
+ ja L(cross_page)
+L(page_cross_continue):
vmovdqu (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %ecx
- vpmovmskb %ymm3, %eax
- addq $VEC_SIZE, %rdi
+ /* Check end of string match. */
+ VPCMPEQ %ymm1, %ymm0, %ymm6
+ vpmovmskb %ymm6, %ecx
+ testl %ecx, %ecx
+ jz L(aligned_more)
+
+ /* Only check match with search CHAR if needed. */
+ VPCMPEQ %ymm1, %ymm7, %ymm1
+ vpmovmskb %ymm1, %eax
+ /* Check if match before first zero. */
+ blsmskl %ecx, %ecx
+ andl %ecx, %eax
+ jz L(ret0)
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ /* We are off by 3 for wcsrchr if search CHAR is non-zero. If
+ search CHAR is zero we are correct. Either way `andq
+ -CHAR_SIZE, %rax` gets the correct result. */
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+L(ret0):
+L(return_vzeroupper):
+ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ /* Returns for first vec x1/x2 have hard coded backward search
+ path for earlier matches. */
+ .p2align 4,, 10
+L(first_vec_x1):
+ VPCMPEQ %ymm2, %ymm7, %ymm6
+ vpmovmskb %ymm6, %eax
+ blsmskl %ecx, %ecx
+ andl %ecx, %eax
+ jnz L(first_vec_x1_return)
+
+ .p2align 4,, 4
+L(first_vec_x0_test):
+ VPCMPEQ %ymm1, %ymm7, %ymm6
+ vpmovmskb %ymm6, %eax
+ testl %eax, %eax
+ jz L(ret1)
+ bsrl %eax, %eax
+ addq %r8, %rax
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+L(ret1):
+ VZEROUPPER_RETURN
+ .p2align 4,, 10
+L(first_vec_x0_x1_test):
+ VPCMPEQ %ymm2, %ymm7, %ymm6
+ vpmovmskb %ymm6, %eax
+ /* Check ymm2 for search CHAR match. If no match then check ymm1
+ before returning. */
testl %eax, %eax
- jnz L(first_vec)
+ jz L(first_vec_x0_test)
+ .p2align 4,, 4
+L(first_vec_x1_return):
+ bsrl %eax, %eax
+ leaq 1(%rdi, %rax), %rax
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+ VZEROUPPER_RETURN
- testl %ecx, %ecx
- jnz L(return_null)
- andq $-VEC_SIZE, %rdi
- xorl %edx, %edx
- jmp L(aligned_loop)
+ .p2align 4,, 10
+L(first_vec_x2):
+ VPCMPEQ %ymm3, %ymm7, %ymm6
+ vpmovmskb %ymm6, %eax
+ blsmskl %ecx, %ecx
+ /* If no in-range search CHAR match in ymm3 then need to check
+ ymm1/ymm2 for an earlier match (we delay checking search
+ CHAR matches until needed). */
+ andl %ecx, %eax
+ jz L(first_vec_x0_x1_test)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE + 1)(%rdi, %rax), %rax
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+ VZEROUPPER_RETURN
+
.p2align 4
-L(first_vec):
- /* Check if there is a nul CHAR. */
+L(aligned_more):
+ /* Save original pointer if match was in VEC 0. */
+ movq %rdi, %r8
+
+ /* Align src. */
+ orq $(VEC_SIZE - 1), %rdi
+ vmovdqu 1(%rdi), %ymm2
+ VPCMPEQ %ymm2, %ymm0, %ymm6
+ vpmovmskb %ymm6, %ecx
testl %ecx, %ecx
- jnz L(char_and_nul_in_first_vec)
+ jnz L(first_vec_x1)
- /* Remember the match and keep searching. */
- movl %eax, %edx
- movq %rdi, %rsi
- andq $-VEC_SIZE, %rdi
- jmp L(aligned_loop)
+ vmovdqu (VEC_SIZE + 1)(%rdi), %ymm3
+ VPCMPEQ %ymm3, %ymm0, %ymm6
+ vpmovmskb %ymm6, %ecx
+ testl %ecx, %ecx
+ jnz L(first_vec_x2)
+ /* Save pointer again before realigning. */
+ movq %rdi, %rsi
+ addq $(VEC_SIZE + 1), %rdi
+ andq $-(VEC_SIZE * 2), %rdi
.p2align 4
-L(cros_page_boundary):
- andl $(VEC_SIZE - 1), %ecx
- andq $-VEC_SIZE, %rdi
- vmovdqa (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %edx
- vpmovmskb %ymm3, %eax
- shrl %cl, %edx
- shrl %cl, %eax
- addq $VEC_SIZE, %rdi
-
- /* Check if there is a CHAR. */
+L(first_aligned_loop):
+ /* Do 2x VEC at a time. Any more and the cost of finding the
+ match outweights loop benefit. */
+ vmovdqa (VEC_SIZE * 0)(%rdi), %ymm4
+ vmovdqa (VEC_SIZE * 1)(%rdi), %ymm5
+
+ VPCMPEQ %ymm4, %ymm7, %ymm6
+ VPMIN %ymm4, %ymm5, %ymm8
+ VPCMPEQ %ymm5, %ymm7, %ymm10
+ vpor %ymm6, %ymm10, %ymm5
+ VPCMPEQ %ymm8, %ymm0, %ymm8
+ vpor %ymm5, %ymm8, %ymm9
+
+ vpmovmskb %ymm9, %eax
+ addq $(VEC_SIZE * 2), %rdi
+ /* No zero or search CHAR. */
testl %eax, %eax
- jnz L(found_char)
-
- testl %edx, %edx
- jnz L(return_null)
+ jz L(first_aligned_loop)
- jmp L(aligned_loop)
-
- .p2align 4
-L(found_char):
- testl %edx, %edx
- jnz L(char_and_nul)
+ /* If no zero CHAR then go to second loop (this allows us to
+ throw away all prior work). */
+ vpmovmskb %ymm8, %ecx
+ testl %ecx, %ecx
+ jz L(second_aligned_loop_prep)
- /* Remember the match and keep searching. */
- movl %eax, %edx
- leaq (%rdi, %rcx), %rsi
+ /* Search char could be zero so we need to get the true match.
+ */
+ vpmovmskb %ymm5, %eax
+ testl %eax, %eax
+ jnz L(first_aligned_loop_return)
- .p2align 4
-L(aligned_loop):
- vmovdqa (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- addq $VEC_SIZE, %rdi
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %ecx
- vpmovmskb %ymm3, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
-
- vmovdqa (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- add $VEC_SIZE, %rdi
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %ecx
+ .p2align 4,, 4
+L(first_vec_x1_or_x2):
+ VPCMPEQ %ymm3, %ymm7, %ymm3
+ VPCMPEQ %ymm2, %ymm7, %ymm2
vpmovmskb %ymm3, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
-
- vmovdqa (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- addq $VEC_SIZE, %rdi
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %ecx
- vpmovmskb %ymm3, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
-
- vmovdqa (%rdi), %ymm1
- VPCMPEQ %ymm1, %ymm0, %ymm2
- addq $VEC_SIZE, %rdi
- VPCMPEQ %ymm1, %ymm4, %ymm3
- vpmovmskb %ymm2, %ecx
- vpmovmskb %ymm3, %eax
- orl %eax, %ecx
- jz L(aligned_loop)
-
- .p2align 4
-L(char_nor_null):
- /* Find a CHAR or a nul CHAR in a loop. */
- testl %eax, %eax
- jnz L(match)
-L(return_value):
- testl %edx, %edx
- jz L(return_null)
- movl %edx, %eax
- movq %rsi, %rdi
+ vpmovmskb %ymm2, %edx
+ /* Use add for macro-fusion. */
+ addq %rax, %rdx
+ jz L(first_vec_x0_test)
+ /* NB: We could move this shift to before the branch and save a
+ bit of code size / performance on the fall through. The
+ branch leads to the null case which generally seems hotter
+ than char in first 3x VEC. */
+ salq $32, %rax
+ addq %rdx, %rax
+ bsrq %rax, %rax
+ leaq 1(%rsi, %rax), %rax
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+ VZEROUPPER_RETURN
+ .p2align 4,, 8
+L(first_aligned_loop_return):
+ VPCMPEQ %ymm4, %ymm0, %ymm4
+ vpmovmskb %ymm4, %edx
+ salq $32, %rcx
+ orq %rdx, %rcx
+
+ vpmovmskb %ymm10, %eax
+ vpmovmskb %ymm6, %edx
+ salq $32, %rax
+ orq %rdx, %rax
+ blsmskq %rcx, %rcx
+ andq %rcx, %rax
+ jz L(first_vec_x1_or_x2)
+
+ bsrq %rax, %rax
+ leaq -(VEC_SIZE * 2)(%rdi, %rax), %rax
# ifdef USE_AS_WCSRCHR
- /* Keep the first bit for each matching CHAR for bsr. */
- andl $0x11111111, %eax
+ andq $-CHAR_SIZE, %rax
# endif
- bsrl %eax, %eax
- leaq -VEC_SIZE(%rdi, %rax), %rax
-L(return_vzeroupper):
- ZERO_UPPER_VEC_REGISTERS_RETURN
+ VZEROUPPER_RETURN
+ /* Search char cannot be zero. */
.p2align 4
-L(match):
- /* Find a CHAR. Check if there is a nul CHAR. */
- vpmovmskb %ymm2, %ecx
- testl %ecx, %ecx
- jnz L(find_nul)
-
- /* Remember the match and keep searching. */
- movl %eax, %edx
+L(second_aligned_loop_set_furthest_match):
+ /* Save VEC and pointer from most recent match. */
+L(second_aligned_loop_prep):
movq %rdi, %rsi
- jmp L(aligned_loop)
+ vmovdqu %ymm6, %ymm2
+ vmovdqu %ymm10, %ymm3
.p2align 4
-L(find_nul):
-# ifdef USE_AS_WCSRCHR
- /* Keep the first bit for each matching CHAR for bsr. */
- andl $0x11111111, %ecx
- andl $0x11111111, %eax
-# endif
- /* Mask out any matching bits after the nul CHAR. */
- movl %ecx, %r8d
- subl $1, %r8d
- xorl %ecx, %r8d
- andl %r8d, %eax
+L(second_aligned_loop):
+ /* Search 2x at at time. */
+ vmovdqa (VEC_SIZE * 0)(%rdi), %ymm4
+ vmovdqa (VEC_SIZE * 1)(%rdi), %ymm5
+
+ VPCMPEQ %ymm4, %ymm7, %ymm6
+ VPMIN %ymm4, %ymm5, %ymm1
+ VPCMPEQ %ymm5, %ymm7, %ymm10
+ vpor %ymm6, %ymm10, %ymm5
+ VPCMPEQ %ymm1, %ymm0, %ymm1
+ vpor %ymm5, %ymm1, %ymm9
+
+ vpmovmskb %ymm9, %eax
+ addq $(VEC_SIZE * 2), %rdi
testl %eax, %eax
- /* If there is no CHAR here, return the remembered one. */
- jz L(return_value)
- bsrl %eax, %eax
- leaq -VEC_SIZE(%rdi, %rax), %rax
- VZEROUPPER_RETURN
-
- .p2align 4
-L(char_and_nul):
- /* Find both a CHAR and a nul CHAR. */
- addq %rcx, %rdi
- movl %edx, %ecx
-L(char_and_nul_in_first_vec):
-# ifdef USE_AS_WCSRCHR
- /* Keep the first bit for each matching CHAR for bsr. */
- andl $0x11111111, %ecx
- andl $0x11111111, %eax
-# endif
- /* Mask out any matching bits after the nul CHAR. */
- movl %ecx, %r8d
- subl $1, %r8d
- xorl %ecx, %r8d
- andl %r8d, %eax
+ jz L(second_aligned_loop)
+ vpmovmskb %ymm1, %ecx
+ testl %ecx, %ecx
+ jz L(second_aligned_loop_set_furthest_match)
+ vpmovmskb %ymm5, %eax
testl %eax, %eax
- /* Return null pointer if the nul CHAR comes first. */
- jz L(return_null)
- bsrl %eax, %eax
- leaq -VEC_SIZE(%rdi, %rax), %rax
+ jnz L(return_new_match)
+
+ /* This is the hot patch. We know CHAR is inbounds and that
+ ymm3/ymm2 have latest match. */
+ .p2align 4,, 4
+L(return_old_match):
+ vpmovmskb %ymm3, %eax
+ vpmovmskb %ymm2, %edx
+ salq $32, %rax
+ orq %rdx, %rax
+ bsrq %rax, %rax
+ /* Search char cannot be zero so safe to just use lea for
+ wcsrchr. */
+ leaq (VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rsi, %rax), %rax
VZEROUPPER_RETURN
- .p2align 4
-L(return_null):
- xorl %eax, %eax
+ /* Last iteration also potentially has a match. */
+ .p2align 4,, 8
+L(return_new_match):
+ VPCMPEQ %ymm4, %ymm0, %ymm4
+ vpmovmskb %ymm4, %edx
+ salq $32, %rcx
+ orq %rdx, %rcx
+
+ vpmovmskb %ymm10, %eax
+ vpmovmskb %ymm6, %edx
+ salq $32, %rax
+ orq %rdx, %rax
+ blsmskq %rcx, %rcx
+ andq %rcx, %rax
+ jz L(return_old_match)
+ bsrq %rax, %rax
+ /* Search char cannot be zero so safe to just use lea for
+ wcsrchr. */
+ leaq (VEC_SIZE * -2 -(CHAR_SIZE - 1))(%rdi, %rax), %rax
VZEROUPPER_RETURN
-END (STRRCHR)
+ .p2align 4,, 4
+L(cross_page):
+ movq %rdi, %rsi
+ andq $-VEC_SIZE, %rsi
+ vmovdqu (%rsi), %ymm1
+ VPCMPEQ %ymm1, %ymm0, %ymm6
+ vpmovmskb %ymm6, %ecx
+ /* Shift out zero CHAR matches that are before the begining of
+ src (rdi). */
+ shrxl %edi, %ecx, %ecx
+ testl %ecx, %ecx
+ jz L(page_cross_continue)
+ VPCMPEQ %ymm1, %ymm7, %ymm1
+ vpmovmskb %ymm1, %eax
+
+ /* Shift out search CHAR matches that are before the begining of
+ src (rdi). */
+ shrxl %edi, %eax, %eax
+ blsmskl %ecx, %ecx
+ /* Check if any search CHAR match in range. */
+ andl %ecx, %eax
+ jz L(ret2)
+ bsrl %eax, %eax
+ addq %rdi, %rax
+# ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+# endif
+L(ret2):
+ VZEROUPPER_RETURN
+END(STRRCHR)
#endif
# define STRRCHR __strrchr_evex
# endif
-# define VMOVU vmovdqu64
-# define VMOVA vmovdqa64
+# define VMOVU vmovdqu64
+# define VMOVA vmovdqa64
# ifdef USE_AS_WCSRCHR
+# define SHIFT_REG esi
+
+# define kunpck kunpckbw
+# define kmov_2x kmovd
+# define maskz_2x ecx
+# define maskm_2x eax
+# define CHAR_SIZE 4
+# define VPMIN vpminud
+# define VPTESTN vptestnmd
# define VPBROADCAST vpbroadcastd
-# define VPCMP vpcmpd
-# define SHIFT_REG r8d
+# define VPCMP vpcmpd
# else
+# define SHIFT_REG edi
+
+# define kunpck kunpckdq
+# define kmov_2x kmovq
+# define maskz_2x rcx
+# define maskm_2x rax
+
+# define CHAR_SIZE 1
+# define VPMIN vpminub
+# define VPTESTN vptestnmb
# define VPBROADCAST vpbroadcastb
-# define VPCMP vpcmpb
-# define SHIFT_REG ecx
+# define VPCMP vpcmpb
# endif
# define XMMZERO xmm16
# define YMMZERO ymm16
# define YMMMATCH ymm17
-# define YMM1 ymm18
+# define YMMSAVE ymm18
+
+# define YMM1 ymm19
+# define YMM2 ymm20
+# define YMM3 ymm21
+# define YMM4 ymm22
+# define YMM5 ymm23
+# define YMM6 ymm24
+# define YMM7 ymm25
+# define YMM8 ymm26
-# define VEC_SIZE 32
- .section .text.evex,"ax",@progbits
-ENTRY (STRRCHR)
- movl %edi, %ecx
+# define VEC_SIZE 32
+# define PAGE_SIZE 4096
+ .section .text.evex, "ax", @progbits
+ENTRY(STRRCHR)
+ movl %edi, %eax
/* Broadcast CHAR to YMMMATCH. */
VPBROADCAST %esi, %YMMMATCH
- vpxorq %XMMZERO, %XMMZERO, %XMMZERO
-
- /* Check if we may cross page boundary with one vector load. */
- andl $(2 * VEC_SIZE - 1), %ecx
- cmpl $VEC_SIZE, %ecx
- ja L(cros_page_boundary)
+ andl $(PAGE_SIZE - 1), %eax
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ jg L(cross_page_boundary)
+L(page_cross_continue):
VMOVU (%rdi), %YMM1
-
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMMMATCH, %YMM1, %k1
+ /* k0 has a 1 for each zero CHAR in YMM1. */
+ VPTESTN %YMM1, %YMM1, %k0
kmovd %k0, %ecx
- kmovd %k1, %eax
-
- addq $VEC_SIZE, %rdi
-
- testl %eax, %eax
- jnz L(first_vec)
-
testl %ecx, %ecx
- jnz L(return_null)
-
- andq $-VEC_SIZE, %rdi
- xorl %edx, %edx
- jmp L(aligned_loop)
-
- .p2align 4
-L(first_vec):
- /* Check if there is a null byte. */
- testl %ecx, %ecx
- jnz L(char_and_nul_in_first_vec)
-
- /* Remember the match and keep searching. */
- movl %eax, %edx
- movq %rdi, %rsi
- andq $-VEC_SIZE, %rdi
- jmp L(aligned_loop)
-
- .p2align 4
-L(cros_page_boundary):
- andl $(VEC_SIZE - 1), %ecx
- andq $-VEC_SIZE, %rdi
+ jz L(aligned_more)
+ /* fallthrough: zero CHAR in first VEC. */
+ /* K1 has a 1 for each search CHAR match in YMM1. */
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
+ kmovd %k1, %eax
+ /* Build mask up until first zero CHAR (used to mask of
+ potential search CHAR matches past the end of the string).
+ */
+ blsmskl %ecx, %ecx
+ andl %ecx, %eax
+ jz L(ret0)
+ /* Get last match (the `andl` removed any out of bounds
+ matches). */
+ bsrl %eax, %eax
# ifdef USE_AS_WCSRCHR
- /* NB: Divide shift count by 4 since each bit in K1 represent 4
- bytes. */
- movl %ecx, %SHIFT_REG
- sarl $2, %SHIFT_REG
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
+# else
+ addq %rdi, %rax
# endif
+L(ret0):
+ ret
- VMOVA (%rdi), %YMM1
-
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
+ /* Returns for first vec x1/x2/x3 have hard coded backward
+ search path for earlier matches. */
+ .p2align 4,, 6
+L(first_vec_x1):
+ VPCMP $0, %YMMMATCH, %YMM2, %k1
+ kmovd %k1, %eax
+ blsmskl %ecx, %ecx
+ /* eax non-zero if search CHAR in range. */
+ andl %ecx, %eax
+ jnz L(first_vec_x1_return)
+
+ /* fallthrough: no match in YMM2 then need to check for earlier
+ matches (in YMM1). */
+ .p2align 4,, 4
+L(first_vec_x0_test):
VPCMP $0, %YMMMATCH, %YMM1, %k1
- kmovd %k0, %edx
kmovd %k1, %eax
-
- shrxl %SHIFT_REG, %edx, %edx
- shrxl %SHIFT_REG, %eax, %eax
- addq $VEC_SIZE, %rdi
-
- /* Check if there is a CHAR. */
testl %eax, %eax
- jnz L(found_char)
-
- testl %edx, %edx
- jnz L(return_null)
-
- jmp L(aligned_loop)
-
- .p2align 4
-L(found_char):
- testl %edx, %edx
- jnz L(char_and_nul)
-
- /* Remember the match and keep searching. */
- movl %eax, %edx
- leaq (%rdi, %rcx), %rsi
+ jz L(ret1)
+ bsrl %eax, %eax
+# ifdef USE_AS_WCSRCHR
+ leaq (%rsi, %rax, CHAR_SIZE), %rax
+# else
+ addq %rsi, %rax
+# endif
+L(ret1):
+ ret
- .p2align 4
-L(aligned_loop):
- VMOVA (%rdi), %YMM1
- addq $VEC_SIZE, %rdi
+ .p2align 4,, 10
+L(first_vec_x1_or_x2):
+ VPCMP $0, %YMM3, %YMMMATCH, %k3
+ VPCMP $0, %YMM2, %YMMMATCH, %k2
+ /* K2 and K3 have 1 for any search CHAR match. Test if any
+ matches between either of them. Otherwise check YMM1. */
+ kortestd %k2, %k3
+ jz L(first_vec_x0_test)
+
+ /* Guranteed that YMM2 and YMM3 are within range so merge the
+ two bitmasks then get last result. */
+ kunpck %k2, %k3, %k3
+ kmovq %k3, %rax
+ bsrq %rax, %rax
+ leaq (VEC_SIZE)(%r8, %rax, CHAR_SIZE), %rax
+ ret
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMMMATCH, %YMM1, %k1
- kmovd %k0, %ecx
+ .p2align 4,, 6
+L(first_vec_x3):
+ VPCMP $0, %YMMMATCH, %YMM4, %k1
kmovd %k1, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
+ blsmskl %ecx, %ecx
+ /* If no search CHAR match in range check YMM1/YMM2/YMM3. */
+ andl %ecx, %eax
+ jz L(first_vec_x1_or_x2)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
- VMOVA (%rdi), %YMM1
- add $VEC_SIZE, %rdi
+ .p2align 4,, 6
+L(first_vec_x0_x1_test):
+ VPCMP $0, %YMMMATCH, %YMM2, %k1
+ kmovd %k1, %eax
+ /* Check YMM2 for last match first. If no match try YMM1. */
+ testl %eax, %eax
+ jz L(first_vec_x0_test)
+ .p2align 4,, 4
+L(first_vec_x1_return):
+ bsrl %eax, %eax
+ leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMMMATCH, %YMM1, %k1
- kmovd %k0, %ecx
+ .p2align 4,, 10
+L(first_vec_x2):
+ VPCMP $0, %YMMMATCH, %YMM3, %k1
kmovd %k1, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
+ blsmskl %ecx, %ecx
+ /* Check YMM3 for last match first. If no match try YMM2/YMM1.
+ */
+ andl %ecx, %eax
+ jz L(first_vec_x0_x1_test)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
- VMOVA (%rdi), %YMM1
- addq $VEC_SIZE, %rdi
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMMMATCH, %YMM1, %k1
+ .p2align 4
+L(aligned_more):
+ /* Need to keep original pointer incase YMM1 has last match. */
+ movq %rdi, %rsi
+ andq $-VEC_SIZE, %rdi
+ VMOVU VEC_SIZE(%rdi), %YMM2
+ VPTESTN %YMM2, %YMM2, %k0
kmovd %k0, %ecx
- kmovd %k1, %eax
- orl %eax, %ecx
- jnz L(char_nor_null)
+ testl %ecx, %ecx
+ jnz L(first_vec_x1)
- VMOVA (%rdi), %YMM1
- addq $VEC_SIZE, %rdi
+ VMOVU (VEC_SIZE * 2)(%rdi), %YMM3
+ VPTESTN %YMM3, %YMM3, %k0
+ kmovd %k0, %ecx
+ testl %ecx, %ecx
+ jnz L(first_vec_x2)
- /* Each bit in K0 represents a null byte in YMM1. */
- VPCMP $0, %YMMZERO, %YMM1, %k0
- /* Each bit in K1 represents a CHAR in YMM1. */
- VPCMP $0, %YMMMATCH, %YMM1, %k1
+ VMOVU (VEC_SIZE * 3)(%rdi), %YMM4
+ VPTESTN %YMM4, %YMM4, %k0
kmovd %k0, %ecx
- kmovd %k1, %eax
- orl %eax, %ecx
- jz L(aligned_loop)
+ movq %rdi, %r8
+ testl %ecx, %ecx
+ jnz L(first_vec_x3)
+ andq $-(VEC_SIZE * 2), %rdi
.p2align 4
-L(char_nor_null):
- /* Find a CHAR or a null byte in a loop. */
+L(first_aligned_loop):
+ /* Preserve YMM1, YMM2, YMM3, and YMM4 until we can gurantee
+ they don't store a match. */
+ VMOVA (VEC_SIZE * 4)(%rdi), %YMM5
+ VMOVA (VEC_SIZE * 5)(%rdi), %YMM6
+
+ VPCMP $0, %YMM5, %YMMMATCH, %k2
+ vpxord %YMM6, %YMMMATCH, %YMM7
+
+ VPMIN %YMM5, %YMM6, %YMM8
+ VPMIN %YMM8, %YMM7, %YMM7
+
+ VPTESTN %YMM7, %YMM7, %k1
+ subq $(VEC_SIZE * -2), %rdi
+ kortestd %k1, %k2
+ jz L(first_aligned_loop)
+
+ VPCMP $0, %YMM6, %YMMMATCH, %k3
+ VPTESTN %YMM8, %YMM8, %k1
+ ktestd %k1, %k1
+ jz L(second_aligned_loop_prep)
+
+ kortestd %k2, %k3
+ jnz L(return_first_aligned_loop)
+
+ .p2align 4,, 6
+L(first_vec_x1_or_x2_or_x3):
+ VPCMP $0, %YMM4, %YMMMATCH, %k4
+ kmovd %k4, %eax
testl %eax, %eax
- jnz L(match)
-L(return_value):
- testl %edx, %edx
- jz L(return_null)
- movl %edx, %eax
- movq %rsi, %rdi
+ jz L(first_vec_x1_or_x2)
bsrl %eax, %eax
-# ifdef USE_AS_WCSRCHR
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- leaq -VEC_SIZE(%rdi, %rax, 4), %rax
-# else
- leaq -VEC_SIZE(%rdi, %rax), %rax
-# endif
+ leaq (VEC_SIZE * 3)(%r8, %rax, CHAR_SIZE), %rax
ret
- .p2align 4
-L(match):
- /* Find a CHAR. Check if there is a null byte. */
- kmovd %k0, %ecx
- testl %ecx, %ecx
- jnz L(find_nul)
+ .p2align 4,, 8
+L(return_first_aligned_loop):
+ VPTESTN %YMM5, %YMM5, %k0
+ kunpck %k0, %k1, %k0
+ kmov_2x %k0, %maskz_2x
+
+ blsmsk %maskz_2x, %maskz_2x
+ kunpck %k2, %k3, %k3
+ kmov_2x %k3, %maskm_2x
+ and %maskz_2x, %maskm_2x
+ jz L(first_vec_x1_or_x2_or_x3)
- /* Remember the match and keep searching. */
- movl %eax, %edx
+ bsr %maskm_2x, %maskm_2x
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
+
+ .p2align 4
+ /* We can throw away the work done for the first 4x checks here
+ as we have a later match. This is the 'fast' path persay.
+ */
+L(second_aligned_loop_prep):
+L(second_aligned_loop_set_furthest_match):
movq %rdi, %rsi
- jmp L(aligned_loop)
+ kunpck %k2, %k3, %k4
.p2align 4
-L(find_nul):
- /* Mask out any matching bits after the null byte. */
- movl %ecx, %r8d
- subl $1, %r8d
- xorl %ecx, %r8d
- andl %r8d, %eax
- testl %eax, %eax
- /* If there is no CHAR here, return the remembered one. */
- jz L(return_value)
- bsrl %eax, %eax
+L(second_aligned_loop):
+ VMOVU (VEC_SIZE * 4)(%rdi), %YMM1
+ VMOVU (VEC_SIZE * 5)(%rdi), %YMM2
+
+ VPCMP $0, %YMM1, %YMMMATCH, %k2
+ vpxord %YMM2, %YMMMATCH, %YMM3
+
+ VPMIN %YMM1, %YMM2, %YMM4
+ VPMIN %YMM3, %YMM4, %YMM3
+
+ VPTESTN %YMM3, %YMM3, %k1
+ subq $(VEC_SIZE * -2), %rdi
+ kortestd %k1, %k2
+ jz L(second_aligned_loop)
+
+ VPCMP $0, %YMM2, %YMMMATCH, %k3
+ VPTESTN %YMM4, %YMM4, %k1
+ ktestd %k1, %k1
+ jz L(second_aligned_loop_set_furthest_match)
+
+ kortestd %k2, %k3
+ /* branch here because there is a significant advantage interms
+ of output dependency chance in using edx. */
+ jnz L(return_new_match)
+L(return_old_match):
+ kmovq %k4, %rax
+ bsrq %rax, %rax
+ leaq (VEC_SIZE * 2)(%rsi, %rax, CHAR_SIZE), %rax
+ ret
+
+L(return_new_match):
+ VPTESTN %YMM1, %YMM1, %k0
+ kunpck %k0, %k1, %k0
+ kmov_2x %k0, %maskz_2x
+
+ blsmsk %maskz_2x, %maskz_2x
+ kunpck %k2, %k3, %k3
+ kmov_2x %k3, %maskm_2x
+ and %maskz_2x, %maskm_2x
+ jz L(return_old_match)
+
+ bsr %maskm_2x, %maskm_2x
+ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ ret
+
+L(cross_page_boundary):
+ /* eax contains all the page offset bits of src (rdi). `xor rdi,
+ rax` sets pointer will all page offset bits cleared so
+ offset of (PAGE_SIZE - VEC_SIZE) will get last aligned VEC
+ before page cross (guranteed to be safe to read). Doing this
+ as opposed to `movq %rdi, %rax; andq $-VEC_SIZE, %rax` saves
+ a bit of code size. */
+ xorq %rdi, %rax
+ VMOVU (PAGE_SIZE - VEC_SIZE)(%rax), %YMM1
+ VPTESTN %YMM1, %YMM1, %k0
+ kmovd %k0, %ecx
+
+ /* Shift out zero CHAR matches that are before the begining of
+ src (rdi). */
# ifdef USE_AS_WCSRCHR
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- leaq -VEC_SIZE(%rdi, %rax, 4), %rax
-# else
- leaq -VEC_SIZE(%rdi, %rax), %rax
+ movl %edi, %esi
+ andl $(VEC_SIZE - 1), %esi
+ shrl $2, %esi
# endif
- ret
+ shrxl %SHIFT_REG, %ecx, %ecx
- .p2align 4
-L(char_and_nul):
- /* Find both a CHAR and a null byte. */
- addq %rcx, %rdi
- movl %edx, %ecx
-L(char_and_nul_in_first_vec):
- /* Mask out any matching bits after the null byte. */
- movl %ecx, %r8d
- subl $1, %r8d
- xorl %ecx, %r8d
- andl %r8d, %eax
- testl %eax, %eax
- /* Return null pointer if the null byte comes first. */
- jz L(return_null)
+ testl %ecx, %ecx
+ jz L(page_cross_continue)
+
+ /* Found zero CHAR so need to test for search CHAR. */
+ VPCMP $0, %YMMMATCH, %YMM1, %k1
+ kmovd %k1, %eax
+ /* Shift out search CHAR matches that are before the begining of
+ src (rdi). */
+ shrxl %SHIFT_REG, %eax, %eax
+
+ /* Check if any search CHAR match in range. */
+ blsmskl %ecx, %ecx
+ andl %ecx, %eax
+ jz L(ret3)
bsrl %eax, %eax
# ifdef USE_AS_WCSRCHR
- /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
- leaq -VEC_SIZE(%rdi, %rax, 4), %rax
+ leaq (%rdi, %rax, CHAR_SIZE), %rax
# else
- leaq -VEC_SIZE(%rdi, %rax), %rax
+ addq %rdi, %rax
# endif
+L(ret3):
ret
- .p2align 4
-L(return_null):
- xorl %eax, %eax
- ret
-
-END (STRRCHR)
+END(STRRCHR)
#endif
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
-# define strrchr __strrchr_sse2
+# define STRRCHR __strrchr_sse2
# undef weak_alias
# define weak_alias(strrchr, rindex)
return 0;
const char *aligned;
- __m128i mask;
- int offset = (int) ((size_t) a & 15);
+ __m128i mask, maskz, zero;
+ unsigned int maskz_bits;
+ unsigned int offset = (int) ((size_t) a & 15);
+ zero = _mm_set1_epi8 (0);
if (offset != 0)
{
/* Load masks. */
aligned = (const char *) ((size_t) a & -16L);
__m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
-
- mask = __m128i_shift_right (mask0, offset);
+ maskz = _mm_cmpeq_epi8 (mask0, zero);
/* Find where the NULL terminator is. */
- int length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16 - offset)
- {
- /* There is no NULL terminator. */
- __m128i mask1 = _mm_load_si128 ((__m128i *) (aligned + 16));
- int index = _mm_cmpistri (mask1, mask1, 0x3a);
- length += index;
-
- /* Don't use SSE4.2 if the length of A > 16. */
- if (length > 16)
- return __strspn_sse2 (s, a);
-
- if (index != 0)
- {
- /* Combine mask0 and mask1. We could play games with
- palignr, but frankly this data should be in L1 now
- so do the merge via an unaligned load. */
- mask = _mm_loadu_si128 ((__m128i *) a);
- }
- }
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
+ {
+ mask = __m128i_shift_right (mask0, offset);
+ offset = (unsigned int) ((size_t) s & 15);
+ if (offset)
+ goto start_unaligned;
+
+ aligned = s;
+ goto start_loop;
+ }
}
- else
- {
- /* A is aligned. */
- mask = _mm_load_si128 ((__m128i *) a);
- /* Find where the NULL terminator is. */
- int length = _mm_cmpistri (mask, mask, 0x3a);
- if (length == 16)
- {
- /* There is no NULL terminator. Don't use SSE4.2 if the length
- of A > 16. */
- if (a[16] != 0)
- return __strspn_sse2 (s, a);
- }
+ /* A is aligned. */
+ mask = _mm_loadu_si128 ((__m128i *) a);
+
+ /* Find where the NULL terminator is. */
+ maskz = _mm_cmpeq_epi8 (mask, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz);
+ if (maskz_bits == 0)
+ {
+ /* There is no NULL terminator. Don't use SSE4.2 if the length
+ of A > 16. */
+ if (a[16] != 0)
+ return __strspn_sse2 (s, a);
}
+ aligned = s;
+ offset = (unsigned int) ((size_t) s & 15);
- offset = (int) ((size_t) s & 15);
if (offset != 0)
{
+ start_unaligned:
/* Check partial string. */
aligned = (const char *) ((size_t) s & -16L);
__m128i value = _mm_load_si128 ((__m128i *) aligned);
+ __m128i adj_value = __m128i_shift_right (value, offset);
- value = __m128i_shift_right (value, offset);
-
- int length = _mm_cmpistri (mask, value, 0x12);
+ unsigned int length = _mm_cmpistri (mask, adj_value, 0x12);
/* No need to check CFlag since it is always 1. */
if (length < 16 - offset)
return length;
/* Find where the NULL terminator is. */
- int index = _mm_cmpistri (value, value, 0x3a);
- if (index < 16 - offset)
+ maskz = _mm_cmpeq_epi8 (value, zero);
+ maskz_bits = _mm_movemask_epi8 (maskz) >> offset;
+ if (maskz_bits != 0)
return length;
aligned += 16;
}
- else
- aligned = s;
+start_loop:
while (1)
{
__m128i value = _mm_load_si128 ((__m128i *) aligned);
- int index = _mm_cmpistri (mask, value, 0x12);
- int cflag = _mm_cmpistrc (mask, value, 0x12);
+ unsigned int index = _mm_cmpistri (mask, value, 0x12);
+ unsigned int cflag = _mm_cmpistrc (mask, value, 0x12);
if (cflag)
return (size_t) (aligned + index - s);
aligned += 16;
+++ /dev/null
-/* strspn optimized with SSE2.
- Copyright (C) 2017-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#if IS_IN (libc)
-
-# include <sysdep.h>
-# define strspn __strspn_sse2
-
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(strspn)
-#endif
-
-#include <sysdeps/x86_64/strspn.S>
--- /dev/null
+/* strspn optimized with SSE2.
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define STRSPN __strspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(STRSPN)
+#endif
+
+#include <string/strspn.c>
--- /dev/null
+/* strstr optimized with 512-bit AVX-512 instructions
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+
+#define FULL_MMASK64 0xffffffffffffffff
+#define ONE_64BIT 0x1ull
+#define ZMM_SIZE_IN_BYTES 64
+#define PAGESIZE 4096
+
+#define cvtmask64_u64(...) (uint64_t) (__VA_ARGS__)
+#define kshiftri_mask64(x, y) ((x) >> (y))
+#define kand_mask64(x, y) ((x) & (y))
+
+/*
+ Returns the index of the first edge within the needle, returns 0 if no edge
+ is found. Example: 'ab' is the first edge in 'aaaaaaaaaabaarddg'
+ */
+static inline size_t
+find_edge_in_needle (const char *ned)
+{
+ size_t ind = 0;
+ while (ned[ind + 1] != '\0')
+ {
+ if (ned[ind] != ned[ind + 1])
+ return ind;
+ else
+ ind = ind + 1;
+ }
+ return 0;
+}
+
+/*
+ Compare needle with haystack byte by byte at specified location
+ */
+static inline bool
+verify_string_match (const char *hay, const size_t hay_index, const char *ned,
+ size_t ind)
+{
+ while (ned[ind] != '\0')
+ {
+ if (ned[ind] != hay[hay_index + ind])
+ return false;
+ ind = ind + 1;
+ }
+ return true;
+}
+
+/*
+ Compare needle with haystack at specified location. The first 64 bytes are
+ compared using a ZMM register.
+ */
+static inline bool
+verify_string_match_avx512 (const char *hay, const size_t hay_index,
+ const char *ned, const __mmask64 ned_mask,
+ const __m512i ned_zmm)
+{
+ /* check first 64 bytes using zmm and then scalar */
+ __m512i hay_zmm = _mm512_loadu_si512 (hay + hay_index); // safe to do so
+ __mmask64 match = _mm512_mask_cmpneq_epi8_mask (ned_mask, hay_zmm, ned_zmm);
+ if (match != 0x0) // failed the first few chars
+ return false;
+ else if (ned_mask == FULL_MMASK64)
+ return verify_string_match (hay, hay_index, ned, ZMM_SIZE_IN_BYTES);
+ return true;
+}
+
+char *
+__strstr_avx512 (const char *haystack, const char *ned)
+{
+ char first = ned[0];
+ if (first == '\0')
+ return (char *)haystack;
+ if (ned[1] == '\0')
+ return (char *)strchr (haystack, ned[0]);
+
+ size_t edge = find_edge_in_needle (ned);
+
+ /* ensure haystack is as long as the pos of edge in needle */
+ for (int ii = 0; ii < edge; ++ii)
+ {
+ if (haystack[ii] == '\0')
+ return NULL;
+ }
+
+ /*
+ Load 64 bytes of the needle and save it to a zmm register
+ Read one cache line at a time to avoid loading across a page boundary
+ */
+ __mmask64 ned_load_mask = _bzhi_u64 (
+ FULL_MMASK64, 64 - ((uintptr_t) (ned) & 63));
+ __m512i ned_zmm = _mm512_maskz_loadu_epi8 (ned_load_mask, ned);
+ __mmask64 ned_nullmask
+ = _mm512_mask_testn_epi8_mask (ned_load_mask, ned_zmm, ned_zmm);
+
+ if (__glibc_unlikely (ned_nullmask == 0x0))
+ {
+ ned_zmm = _mm512_loadu_si512 (ned);
+ ned_nullmask = _mm512_testn_epi8_mask (ned_zmm, ned_zmm);
+ ned_load_mask = ned_nullmask ^ (ned_nullmask - ONE_64BIT);
+ if (ned_nullmask != 0x0)
+ ned_load_mask = ned_load_mask >> 1;
+ }
+ else
+ {
+ ned_load_mask = ned_nullmask ^ (ned_nullmask - ONE_64BIT);
+ ned_load_mask = ned_load_mask >> 1;
+ }
+ const __m512i ned0 = _mm512_set1_epi8 (ned[edge]);
+ const __m512i ned1 = _mm512_set1_epi8 (ned[edge + 1]);
+
+ /*
+ Read the bytes of haystack in the current cache line
+ */
+ size_t hay_index = edge;
+ __mmask64 loadmask = _bzhi_u64 (
+ FULL_MMASK64, 64 - ((uintptr_t) (haystack + hay_index) & 63));
+ /* First load is a partial cache line */
+ __m512i hay0 = _mm512_maskz_loadu_epi8 (loadmask, haystack + hay_index);
+ /* Search for NULL and compare only till null char */
+ uint64_t nullmask
+ = cvtmask64_u64 (_mm512_mask_testn_epi8_mask (loadmask, hay0, hay0));
+ uint64_t cmpmask = nullmask ^ (nullmask - ONE_64BIT);
+ cmpmask = cmpmask & cvtmask64_u64 (loadmask);
+ /* Search for the 2 charaters of needle */
+ __mmask64 k0 = _mm512_cmpeq_epi8_mask (hay0, ned0);
+ __mmask64 k1 = _mm512_cmpeq_epi8_mask (hay0, ned1);
+ k1 = kshiftri_mask64 (k1, 1);
+ /* k2 masks tell us if both chars from needle match */
+ uint64_t k2 = cvtmask64_u64 (kand_mask64 (k0, k1)) & cmpmask;
+ /* For every match, search for the entire needle for a full match */
+ while (k2)
+ {
+ uint64_t bitcount = _tzcnt_u64 (k2);
+ k2 = _blsr_u64 (k2);
+ size_t match_pos = hay_index + bitcount - edge;
+ if (((uintptr_t) (haystack + match_pos) & (PAGESIZE - 1))
+ < PAGESIZE - 1 - ZMM_SIZE_IN_BYTES)
+ {
+ /*
+ * Use vector compare as long as you are not crossing a page
+ */
+ if (verify_string_match_avx512 (haystack, match_pos, ned,
+ ned_load_mask, ned_zmm))
+ return (char *)haystack + match_pos;
+ }
+ else
+ {
+ if (verify_string_match (haystack, match_pos, ned, 0))
+ return (char *)haystack + match_pos;
+ }
+ }
+ /* We haven't checked for potential match at the last char yet */
+ haystack = (const char *)(((uintptr_t) (haystack + hay_index) | 63));
+ hay_index = 0;
+
+ /*
+ Loop over one cache line at a time to prevent reading over page
+ boundary
+ */
+ __m512i hay1;
+ while (nullmask == 0)
+ {
+ hay0 = _mm512_loadu_si512 (haystack + hay_index);
+ hay1 = _mm512_load_si512 (haystack + hay_index
+ + 1); // Always 64 byte aligned
+ nullmask = cvtmask64_u64 (_mm512_testn_epi8_mask (hay1, hay1));
+ /* Compare only till null char */
+ cmpmask = nullmask ^ (nullmask - ONE_64BIT);
+ k0 = _mm512_cmpeq_epi8_mask (hay0, ned0);
+ k1 = _mm512_cmpeq_epi8_mask (hay1, ned1);
+ /* k2 masks tell us if both chars from needle match */
+ k2 = cvtmask64_u64 (kand_mask64 (k0, k1)) & cmpmask;
+ /* For every match, compare full strings for potential match */
+ while (k2)
+ {
+ uint64_t bitcount = _tzcnt_u64 (k2);
+ k2 = _blsr_u64 (k2);
+ size_t match_pos = hay_index + bitcount - edge;
+ if (((uintptr_t) (haystack + match_pos) & (PAGESIZE - 1))
+ < PAGESIZE - 1 - ZMM_SIZE_IN_BYTES)
+ {
+ /*
+ * Use vector compare as long as you are not crossing a page
+ */
+ if (verify_string_match_avx512 (haystack, match_pos, ned,
+ ned_load_mask, ned_zmm))
+ return (char *)haystack + match_pos;
+ }
+ else
+ {
+ /* Compare byte by byte */
+ if (verify_string_match (haystack, match_pos, ned, 0))
+ return (char *)haystack + match_pos;
+ }
+ }
+ hay_index += ZMM_SIZE_IN_BYTES;
+ }
+ return NULL;
+}
extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
+extern __typeof (__redirect_strstr) __strstr_avx512 attribute_hidden;
#include "init-arch.h"
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
extern __typeof (__redirect_strstr) __libc_strstr;
-libc_ifunc (__libc_strstr,
- HAS_ARCH_FEATURE (Fast_Unaligned_Load)
- ? __strstr_sse2_unaligned
- : __strstr_sse2)
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features *cpu_features = __get_cpu_features ();
+
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512DQ)
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
+ return __strstr_avx512;
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return __strstr_sse2_unaligned;
+
+ return __strstr_sse2;
+}
+
+libc_ifunc_redirected (__redirect_strstr, __libc_strstr, IFUNC_SELECTOR ());
#undef strstr
strong_alias (__libc_strstr, strstr)
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include "varshift.h"
+#include <stdint.h>
-const int8_t ___m128i_shift_right[31] attribute_hidden =
+const int8_t ___m128i_shift_right[31] attribute_hidden
+ __attribute__((aligned(32))) =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
#include <stdint.h>
#include <tmmintrin.h>
-extern const int8_t ___m128i_shift_right[31] attribute_hidden;
+extern const int8_t ___m128i_shift_right[31] attribute_hidden
+ __attribute__ ((aligned (32)));
static __inline__ __m128i
__m128i_shift_right (__m128i value, unsigned long int offset)
--- /dev/null
+/* Macro helpers for VEC_{type}({vec_num})
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2022 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#ifndef _VEC_MACROS_H
+#define _VEC_MACROS_H 1
+
+#ifndef VEC_SIZE
+# error "Never include this file directly. Always include a vector config."
+#endif
+
+/* Defines so we can use SSE2 / AVX2 / EVEX / EVEX512 encoding with same
+ VEC(N) values. */
+#define VEC_hi_xmm0 xmm16
+#define VEC_hi_xmm1 xmm17
+#define VEC_hi_xmm2 xmm18
+#define VEC_hi_xmm3 xmm19
+#define VEC_hi_xmm4 xmm20
+#define VEC_hi_xmm5 xmm21
+#define VEC_hi_xmm6 xmm22
+#define VEC_hi_xmm7 xmm23
+#define VEC_hi_xmm8 xmm24
+#define VEC_hi_xmm9 xmm25
+#define VEC_hi_xmm10 xmm26
+#define VEC_hi_xmm11 xmm27
+#define VEC_hi_xmm12 xmm28
+#define VEC_hi_xmm13 xmm29
+#define VEC_hi_xmm14 xmm30
+#define VEC_hi_xmm15 xmm31
+
+#define VEC_hi_ymm0 ymm16
+#define VEC_hi_ymm1 ymm17
+#define VEC_hi_ymm2 ymm18
+#define VEC_hi_ymm3 ymm19
+#define VEC_hi_ymm4 ymm20
+#define VEC_hi_ymm5 ymm21
+#define VEC_hi_ymm6 ymm22
+#define VEC_hi_ymm7 ymm23
+#define VEC_hi_ymm8 ymm24
+#define VEC_hi_ymm9 ymm25
+#define VEC_hi_ymm10 ymm26
+#define VEC_hi_ymm11 ymm27
+#define VEC_hi_ymm12 ymm28
+#define VEC_hi_ymm13 ymm29
+#define VEC_hi_ymm14 ymm30
+#define VEC_hi_ymm15 ymm31
+
+#define VEC_hi_zmm0 zmm16
+#define VEC_hi_zmm1 zmm17
+#define VEC_hi_zmm2 zmm18
+#define VEC_hi_zmm3 zmm19
+#define VEC_hi_zmm4 zmm20
+#define VEC_hi_zmm5 zmm21
+#define VEC_hi_zmm6 zmm22
+#define VEC_hi_zmm7 zmm23
+#define VEC_hi_zmm8 zmm24
+#define VEC_hi_zmm9 zmm25
+#define VEC_hi_zmm10 zmm26
+#define VEC_hi_zmm11 zmm27
+#define VEC_hi_zmm12 zmm28
+#define VEC_hi_zmm13 zmm29
+#define VEC_hi_zmm14 zmm30
+#define VEC_hi_zmm15 zmm31
+
+#define PRIMITIVE_VEC(vec, num) vec##num
+
+#define VEC_any_xmm(i) PRIMITIVE_VEC(xmm, i)
+#define VEC_any_ymm(i) PRIMITIVE_VEC(ymm, i)
+#define VEC_any_zmm(i) PRIMITIVE_VEC(zmm, i)
+
+#define VEC_hi_xmm(i) PRIMITIVE_VEC(VEC_hi_xmm, i)
+#define VEC_hi_ymm(i) PRIMITIVE_VEC(VEC_hi_ymm, i)
+#define VEC_hi_zmm(i) PRIMITIVE_VEC(VEC_hi_zmm, i)
+
+#endif
--- /dev/null
+#define STRLEN __wcslen_evex512
+#define USE_AS_WCSLEN 1
+
+#include "strlen-evex512.S"
#define AS_WCSLEN
#define strlen __wcslen_sse4_1
+#define SECTION(p) p##.sse4.1
#include "strlen-vec.S"
#define STRCMP __wcsncmp_avx2_rtm
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
-
+#define OVERFLOW_STRCMP __wcscmp_avx2_rtm
#include "strcmp-avx2-rtm.S"
#define STRCMP __wcsncmp_avx2
#define USE_AS_STRNCMP 1
#define USE_AS_WCSCMP 1
-
+#define OVERFLOW_STRCMP __wcscmp_avx2
#include "strcmp-avx2.S"
--- /dev/null
+#define STRLEN __wcsnlen_evex512
+#define USE_AS_WCSLEN 1
+#define USE_AS_STRNLEN 1
+
+#include "strlen-evex512.S"
#define AS_WCSLEN
#define AS_STRNLEN
#define strlen __wcsnlen_sse4_1
+#define SECTION(p) p##.sse4.1
#include "strlen-vec.S"
<https://www.gnu.org/licenses/>. */
#if IS_IN (libc)
-# define wcsrchr __wcsrchr_sse2
+# define STRRCHR __wcsrchr_sse2
#endif
-
#include "../wcsrchr.S"
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
mov %fs:(%rax),%RDX_LP
- // XXX 5 byte should be before the function
- /* 5-byte NOP. */
- .byte 0x0f,0x1f,0x44,0x00,0x00
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
END2 (__strcasecmp)
# ifndef NO_NOLOCALE_ALIAS
weak_alias (__strcasecmp, strcasecmp)
movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
mov %fs:(%rax),%RCX_LP
- // XXX 5 byte should be before the function
- /* 5-byte NOP. */
- .byte 0x0f,0x1f,0x44,0x00,0x00
+ /* Either 1 or 5 bytes (dependeing if CET is enabled). */
+ .p2align 4
END2 (__strncasecmp)
# ifndef NO_NOLOCALE_ALIAS
weak_alias (__strncasecmp, strncasecmp)
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
.section .rodata.cst16,"aM",@progbits,16
.align 16
-.Lbelowupper:
- .quad 0x4040404040404040
- .quad 0x4040404040404040
-.Ltopupper:
- .quad 0x5b5b5b5b5b5b5b5b
- .quad 0x5b5b5b5b5b5b5b5b
-.Ltouppermask:
+.Llcase_min:
+ .quad 0x3f3f3f3f3f3f3f3f
+ .quad 0x3f3f3f3f3f3f3f3f
+.Llcase_max:
+ .quad 0x9999999999999999
+ .quad 0x9999999999999999
+.Lcase_add:
.quad 0x2020202020202020
.quad 0x2020202020202020
.previous
- movdqa .Lbelowupper(%rip), %xmm5
-# define UCLOW_reg %xmm5
- movdqa .Ltopupper(%rip), %xmm6
-# define UCHIGH_reg %xmm6
- movdqa .Ltouppermask(%rip), %xmm7
-# define LCQWORD_reg %xmm7
+ movdqa .Llcase_min(%rip), %xmm5
+# define LCASE_MIN_reg %xmm5
+ movdqa .Llcase_max(%rip), %xmm6
+# define LCASE_MAX_reg %xmm6
+ movdqa .Lcase_add(%rip), %xmm7
+# define CASE_ADD_reg %xmm7
#endif
cmp $0x30, %ecx
ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
movhpd 8(%rdi), %xmm1
movhpd 8(%rsi), %xmm2
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-# define TOLOWER(reg1, reg2) \
- movdqa reg1, %xmm8; \
- movdqa UCHIGH_reg, %xmm9; \
- movdqa reg2, %xmm10; \
- movdqa UCHIGH_reg, %xmm11; \
- pcmpgtb UCLOW_reg, %xmm8; \
- pcmpgtb reg1, %xmm9; \
- pcmpgtb UCLOW_reg, %xmm10; \
- pcmpgtb reg2, %xmm11; \
- pand %xmm9, %xmm8; \
- pand %xmm11, %xmm10; \
- pand LCQWORD_reg, %xmm8; \
- pand LCQWORD_reg, %xmm10; \
- por %xmm8, reg1; \
- por %xmm10, reg2
- TOLOWER (%xmm1, %xmm2)
+# define TOLOWER(reg1, reg2) \
+ movdqa LCASE_MIN_reg, %xmm8; \
+ movdqa LCASE_MIN_reg, %xmm9; \
+ paddb reg1, %xmm8; \
+ paddb reg2, %xmm9; \
+ pcmpgtb LCASE_MAX_reg, %xmm8; \
+ pcmpgtb LCASE_MAX_reg, %xmm9; \
+ pandn CASE_ADD_reg, %xmm8; \
+ pandn CASE_ADD_reg, %xmm9; \
+ paddb %xmm8, reg1; \
+ paddb %xmm9, reg2
+ TOLOWER (%xmm1, %xmm2)
#else
# define TOLOWER(reg1, reg2)
#endif
.p2align 4
LABEL(Byte0):
- movzx (%rsi), %ecx
- movzx (%rdi), %eax
+ movzbl (%rsi), %ecx
+ movzbl (%rdi), %eax
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
+++ /dev/null
-/* strcspn (str, ss) -- Return the length of the initial segment of STR
- which contains no characters from SS.
- For AMD x86-64.
- Copyright (C) 1994-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
- Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
- Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
- .text
-ENTRY (strcspn)
-
- movq %rdi, %rdx /* Save SRC. */
-
- /* First we create a table with flags for all possible characters.
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
- supported by the C string functions we have 256 characters.
- Before inserting marks for the stop characters we clear the whole
- table. */
- movq %rdi, %r8 /* Save value. */
- subq $256, %rsp /* Make space for 256 bytes. */
- cfi_adjust_cfa_offset(256)
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */
- movq %rsp, %rdi
- xorl %eax, %eax /* We store 0s. */
- cld
- rep
- stosq
-
- movq %rsi, %rax /* Setup skipset. */
-
-/* For understanding the following code remember that %rcx == 0 now.
- Although all the following instruction only modify %cl we always
- have a correct zero-extended 64-bit value in %rcx. */
-
- .p2align 4
-L(2): movb (%rax), %cl /* get byte from skipset */
- testb %cl, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
-
- movb 1(%rax), %cl /* get byte from skipset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
-
- movb 2(%rax), %cl /* get byte from skipset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
-
- movb 3(%rax), %cl /* get byte from skipset */
- addq $4, %rax /* increment skipset pointer */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
- testb $0xff, %cl /* is NUL char? */
- jnz L(2) /* no => process next dword from skipset */
-
-L(1): leaq -4(%rdx), %rax /* prepare loop */
-
- /* We use a neat trick for the following loop. Normally we would
- have to test for two termination conditions
- 1. a character in the skipset was found
- and
- 2. the end of the string was found
- But as a sign that the character is in the skipset we store its
- value in the table. But the value of NUL is NUL so the loop
- terminates for NUL in every case. */
-
- .p2align 4
-L(3): addq $4, %rax /* adjust pointer for full loop round */
-
- movb (%rax), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- je L(4) /* yes => return */
-
- movb 1(%rax), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- je L(5) /* yes => return */
-
- movb 2(%rax), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jz L(6) /* yes => return */
-
- movb 3(%rax), %cl /* get byte from string */
- cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jne L(3) /* no => start loop again */
-
- incq %rax /* adjust pointer */
-L(6): incq %rax
-L(5): incq %rax
-
-L(4): addq $256, %rsp /* remove skipset */
- cfi_adjust_cfa_offset(-256)
-#ifdef USE_AS_STRPBRK
- xorl %edx,%edx
- orb %cl, %cl /* was last character NUL? */
- cmovzq %rdx, %rax /* Yes: return NULL */
-#else
- subq %rdx, %rax /* we have to return the number of valid
- characters, so compute distance to first
- non-valid character */
-#endif
- ret
-END (strcspn)
-libc_hidden_builtin_def (strcspn)
+++ /dev/null
-#define strcspn strpbrk
-#define USE_AS_STRPBRK
-#include <sysdeps/x86_64/strcspn.S>
#include <sysdep.h>
+#ifndef STRRCHR
+# define STRRCHR strrchr
+#endif
+
+#ifdef USE_AS_WCSRCHR
+# define PCMPEQ pcmpeqd
+# define CHAR_SIZE 4
+# define PMINU pminud
+#else
+# define PCMPEQ pcmpeqb
+# define CHAR_SIZE 1
+# define PMINU pminub
+#endif
+
+#define PAGE_SIZE 4096
+#define VEC_SIZE 16
+
.text
-ENTRY (strrchr)
- movd %esi, %xmm1
+ENTRY(STRRCHR)
+ movd %esi, %xmm0
movq %rdi, %rax
- andl $4095, %eax
- punpcklbw %xmm1, %xmm1
- cmpq $4032, %rax
- punpcklwd %xmm1, %xmm1
- pshufd $0, %xmm1, %xmm1
+ andl $(PAGE_SIZE - 1), %eax
+#ifndef USE_AS_WCSRCHR
+ punpcklbw %xmm0, %xmm0
+ punpcklwd %xmm0, %xmm0
+#endif
+ pshufd $0, %xmm0, %xmm0
+ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
ja L(cross_page)
- movdqu (%rdi), %xmm0
+
+L(cross_page_continue):
+ movups (%rdi), %xmm1
pxor %xmm2, %xmm2
- movdqa %xmm0, %xmm3
- pcmpeqb %xmm1, %xmm0
- pcmpeqb %xmm2, %xmm3
- pmovmskb %xmm0, %ecx
- pmovmskb %xmm3, %edx
- testq %rdx, %rdx
- je L(next_48_bytes)
- leaq -1(%rdx), %rax
- xorq %rdx, %rax
- andq %rcx, %rax
- je L(exit)
- bsrq %rax, %rax
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %ecx
+ testl %ecx, %ecx
+ jz L(aligned_more)
+
+ PCMPEQ %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+ leal -1(%rcx), %edx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(ret0)
+ bsrl %eax, %eax
addq %rdi, %rax
+ /* We are off by 3 for wcsrchr if search CHAR is non-zero. If
+ search CHAR is zero we are correct. Either way `andq
+ -CHAR_SIZE, %rax` gets the correct result. */
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+L(ret0):
ret
+ /* Returns for first vec x1/x2 have hard coded backward search
+ path for earlier matches. */
.p2align 4
-L(next_48_bytes):
- movdqu 16(%rdi), %xmm4
- movdqa %xmm4, %xmm5
- movdqu 32(%rdi), %xmm3
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm2, %xmm5
- movdqu 48(%rdi), %xmm0
- pmovmskb %xmm5, %edx
- movdqa %xmm3, %xmm5
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm2, %xmm5
- pcmpeqb %xmm0, %xmm2
- salq $16, %rdx
- pmovmskb %xmm3, %r8d
- pmovmskb %xmm5, %eax
- pmovmskb %xmm2, %esi
- salq $32, %r8
- salq $32, %rax
- pcmpeqb %xmm1, %xmm0
- orq %rdx, %rax
- movq %rsi, %rdx
- pmovmskb %xmm4, %esi
- salq $48, %rdx
- salq $16, %rsi
- orq %r8, %rsi
- orq %rcx, %rsi
- pmovmskb %xmm0, %ecx
- salq $48, %rcx
- orq %rcx, %rsi
- orq %rdx, %rax
- je L(loop_header2)
- leaq -1(%rax), %rcx
- xorq %rax, %rcx
- andq %rcx, %rsi
- je L(exit)
- bsrq %rsi, %rsi
- leaq (%rdi,%rsi), %rax
+L(first_vec_x0_test):
+ PCMPEQ %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+ testl %eax, %eax
+ jz L(ret0)
+ bsrl %eax, %eax
+ addq %r8, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
ret
.p2align 4
-L(loop_header2):
- testq %rsi, %rsi
- movq %rdi, %rcx
- je L(no_c_found)
-L(loop_header):
- addq $64, %rdi
- pxor %xmm7, %xmm7
- andq $-64, %rdi
- jmp L(loop_entry)
+L(first_vec_x1):
+ PCMPEQ %xmm0, %xmm2
+ pmovmskb %xmm2, %eax
+ leal -1(%rcx), %edx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(first_vec_x0_test)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE)(%rdi, %rax), %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+ ret
.p2align 4
-L(loop64):
- testq %rdx, %rdx
- cmovne %rdx, %rsi
- cmovne %rdi, %rcx
- addq $64, %rdi
-L(loop_entry):
- movdqa 32(%rdi), %xmm3
- pxor %xmm6, %xmm6
- movdqa 48(%rdi), %xmm2
- movdqa %xmm3, %xmm0
- movdqa 16(%rdi), %xmm4
- pminub %xmm2, %xmm0
- movdqa (%rdi), %xmm5
- pminub %xmm4, %xmm0
- pminub %xmm5, %xmm0
- pcmpeqb %xmm7, %xmm0
- pmovmskb %xmm0, %eax
- movdqa %xmm5, %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %r9d
- movdqa %xmm4, %xmm0
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %edx
- movdqa %xmm3, %xmm0
- pcmpeqb %xmm1, %xmm0
- salq $16, %rdx
- pmovmskb %xmm0, %r10d
- movdqa %xmm2, %xmm0
- pcmpeqb %xmm1, %xmm0
- salq $32, %r10
- orq %r10, %rdx
- pmovmskb %xmm0, %r8d
- orq %r9, %rdx
- salq $48, %r8
- orq %r8, %rdx
+L(first_vec_x1_test):
+ PCMPEQ %xmm0, %xmm2
+ pmovmskb %xmm2, %eax
testl %eax, %eax
- je L(loop64)
- pcmpeqb %xmm6, %xmm4
- pcmpeqb %xmm6, %xmm3
- pcmpeqb %xmm6, %xmm5
- pmovmskb %xmm4, %eax
- pmovmskb %xmm3, %r10d
- pcmpeqb %xmm6, %xmm2
- pmovmskb %xmm5, %r9d
- salq $32, %r10
- salq $16, %rax
- pmovmskb %xmm2, %r8d
- orq %r10, %rax
- orq %r9, %rax
- salq $48, %r8
- orq %r8, %rax
- leaq -1(%rax), %r8
- xorq %rax, %r8
- andq %r8, %rdx
- cmovne %rdi, %rcx
- cmovne %rdx, %rsi
- bsrq %rsi, %rsi
- leaq (%rcx,%rsi), %rax
+ jz L(first_vec_x0_test)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE)(%rdi, %rax), %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+ ret
+
+ .p2align 4
+L(first_vec_x2):
+ PCMPEQ %xmm0, %xmm3
+ pmovmskb %xmm3, %eax
+ leal -1(%rcx), %edx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(first_vec_x1_test)
+ bsrl %eax, %eax
+ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+ ret
+
+ .p2align 4
+L(aligned_more):
+ /* Save original pointer if match was in VEC 0. */
+ movq %rdi, %r8
+ andq $-VEC_SIZE, %rdi
+
+ movaps VEC_SIZE(%rdi), %xmm2
+ pxor %xmm3, %xmm3
+ PCMPEQ %xmm2, %xmm3
+ pmovmskb %xmm3, %ecx
+ testl %ecx, %ecx
+ jnz L(first_vec_x1)
+
+ movaps (VEC_SIZE * 2)(%rdi), %xmm3
+ pxor %xmm4, %xmm4
+ PCMPEQ %xmm3, %xmm4
+ pmovmskb %xmm4, %ecx
+ testl %ecx, %ecx
+ jnz L(first_vec_x2)
+
+ addq $VEC_SIZE, %rdi
+ /* Save pointer again before realigning. */
+ movq %rdi, %rsi
+ andq $-(VEC_SIZE * 2), %rdi
+ .p2align 4
+L(first_loop):
+ /* Do 2x VEC at a time. */
+ movaps (VEC_SIZE * 2)(%rdi), %xmm4
+ movaps (VEC_SIZE * 3)(%rdi), %xmm5
+ /* Since SSE2 no pminud so wcsrchr needs seperate logic for
+ detecting zero. Note if this is found to be a bottleneck it
+ may be worth adding an SSE4.1 wcsrchr implementation. */
+#ifdef USE_AS_WCSRCHR
+ movaps %xmm5, %xmm6
+ pxor %xmm8, %xmm8
+
+ PCMPEQ %xmm8, %xmm5
+ PCMPEQ %xmm4, %xmm8
+ por %xmm5, %xmm8
+#else
+ movaps %xmm5, %xmm6
+ PMINU %xmm4, %xmm5
+#endif
+
+ movaps %xmm4, %xmm9
+ PCMPEQ %xmm0, %xmm4
+ PCMPEQ %xmm0, %xmm6
+ movaps %xmm6, %xmm7
+ por %xmm4, %xmm6
+#ifndef USE_AS_WCSRCHR
+ pxor %xmm8, %xmm8
+ PCMPEQ %xmm5, %xmm8
+#endif
+ pmovmskb %xmm8, %ecx
+ pmovmskb %xmm6, %eax
+
+ addq $(VEC_SIZE * 2), %rdi
+ /* Use `addl` 1) so we can undo it with `subl` and 2) it can
+ macro-fuse with `jz`. */
+ addl %ecx, %eax
+ jz L(first_loop)
+
+ /* Check if there is zero match. */
+ testl %ecx, %ecx
+ jz L(second_loop_match)
+
+ /* Check if there was a match in last iteration. */
+ subl %ecx, %eax
+ jnz L(new_match)
+
+L(first_loop_old_match):
+ PCMPEQ %xmm0, %xmm2
+ PCMPEQ %xmm0, %xmm3
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm3, %eax
+ addl %eax, %ecx
+ jz L(first_vec_x0_test)
+ /* NB: We could move this shift to before the branch and save a
+ bit of code size / performance on the fall through. The
+ branch leads to the null case which generally seems hotter
+ than char in first 3x VEC. */
+ sall $16, %eax
+ orl %ecx, %eax
+
+ bsrl %eax, %eax
+ addq %rsi, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+ ret
+
+ .p2align 4
+L(new_match):
+ pxor %xmm6, %xmm6
+ PCMPEQ %xmm9, %xmm6
+ pmovmskb %xmm6, %eax
+ sall $16, %ecx
+ orl %eax, %ecx
+
+ /* We can't reuse either of the old comparisons as since we mask
+ of zeros after first zero (instead of using the full
+ comparison) we can't gurantee no interference between match
+ after end of string and valid match. */
+ pmovmskb %xmm4, %eax
+ pmovmskb %xmm7, %edx
+ sall $16, %edx
+ orl %edx, %eax
+
+ leal -1(%ecx), %edx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(first_loop_old_match)
+ bsrl %eax, %eax
+ addq %rdi, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
ret
+ /* Save minimum state for getting most recent match. We can
+ throw out all previous work. */
.p2align 4
-L(no_c_found):
- movl $1, %esi
- xorl %ecx, %ecx
- jmp L(loop_header)
+L(second_loop_match):
+ movq %rdi, %rsi
+ movaps %xmm4, %xmm2
+ movaps %xmm7, %xmm3
.p2align 4
-L(exit):
- xorl %eax, %eax
+L(second_loop):
+ movaps (VEC_SIZE * 2)(%rdi), %xmm4
+ movaps (VEC_SIZE * 3)(%rdi), %xmm5
+ /* Since SSE2 no pminud so wcsrchr needs seperate logic for
+ detecting zero. Note if this is found to be a bottleneck it
+ may be worth adding an SSE4.1 wcsrchr implementation. */
+#ifdef USE_AS_WCSRCHR
+ movaps %xmm5, %xmm6
+ pxor %xmm8, %xmm8
+
+ PCMPEQ %xmm8, %xmm5
+ PCMPEQ %xmm4, %xmm8
+ por %xmm5, %xmm8
+#else
+ movaps %xmm5, %xmm6
+ PMINU %xmm4, %xmm5
+#endif
+
+ movaps %xmm4, %xmm9
+ PCMPEQ %xmm0, %xmm4
+ PCMPEQ %xmm0, %xmm6
+ movaps %xmm6, %xmm7
+ por %xmm4, %xmm6
+#ifndef USE_AS_WCSRCHR
+ pxor %xmm8, %xmm8
+ PCMPEQ %xmm5, %xmm8
+#endif
+
+ pmovmskb %xmm8, %ecx
+ pmovmskb %xmm6, %eax
+
+ addq $(VEC_SIZE * 2), %rdi
+ /* Either null term or new occurence of CHAR. */
+ addl %ecx, %eax
+ jz L(second_loop)
+
+ /* No null term so much be new occurence of CHAR. */
+ testl %ecx, %ecx
+ jz L(second_loop_match)
+
+
+ subl %ecx, %eax
+ jnz L(second_loop_new_match)
+
+L(second_loop_old_match):
+ pmovmskb %xmm2, %ecx
+ pmovmskb %xmm3, %eax
+ sall $16, %eax
+ orl %ecx, %eax
+ bsrl %eax, %eax
+ addq %rsi, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
ret
.p2align 4
+L(second_loop_new_match):
+ pxor %xmm6, %xmm6
+ PCMPEQ %xmm9, %xmm6
+ pmovmskb %xmm6, %eax
+ sall $16, %ecx
+ orl %eax, %ecx
+
+ /* We can't reuse either of the old comparisons as since we mask
+ of zeros after first zero (instead of using the full
+ comparison) we can't gurantee no interference between match
+ after end of string and valid match. */
+ pmovmskb %xmm4, %eax
+ pmovmskb %xmm7, %edx
+ sall $16, %edx
+ orl %edx, %eax
+
+ leal -1(%ecx), %edx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(second_loop_old_match)
+ bsrl %eax, %eax
+ addq %rdi, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+ ret
+
+ .p2align 4,, 4
L(cross_page):
- movq %rdi, %rax
- pxor %xmm0, %xmm0
- andq $-64, %rax
- movdqu (%rax), %xmm5
- movdqa %xmm5, %xmm6
- movdqu 16(%rax), %xmm4
- pcmpeqb %xmm1, %xmm5
- pcmpeqb %xmm0, %xmm6
- movdqu 32(%rax), %xmm3
- pmovmskb %xmm6, %esi
- movdqa %xmm4, %xmm6
- movdqu 48(%rax), %xmm2
- pcmpeqb %xmm1, %xmm4
- pcmpeqb %xmm0, %xmm6
- pmovmskb %xmm6, %edx
- movdqa %xmm3, %xmm6
- pcmpeqb %xmm1, %xmm3
- pcmpeqb %xmm0, %xmm6
- pcmpeqb %xmm2, %xmm0
- salq $16, %rdx
- pmovmskb %xmm3, %r9d
- pmovmskb %xmm6, %r8d
- pmovmskb %xmm0, %ecx
- salq $32, %r9
- salq $32, %r8
- pcmpeqb %xmm1, %xmm2
- orq %r8, %rdx
- salq $48, %rcx
- pmovmskb %xmm5, %r8d
- orq %rsi, %rdx
- pmovmskb %xmm4, %esi
- orq %rcx, %rdx
- pmovmskb %xmm2, %ecx
- salq $16, %rsi
- salq $48, %rcx
- orq %r9, %rsi
- orq %r8, %rsi
- orq %rcx, %rsi
+ movq %rdi, %rsi
+ andq $-VEC_SIZE, %rsi
+ movaps (%rsi), %xmm1
+ pxor %xmm2, %xmm2
+ PCMPEQ %xmm1, %xmm2
+ pmovmskb %xmm2, %edx
movl %edi, %ecx
- subl %eax, %ecx
- shrq %cl, %rdx
- shrq %cl, %rsi
- testq %rdx, %rdx
- je L(loop_header2)
- leaq -1(%rdx), %rax
- xorq %rdx, %rax
- andq %rax, %rsi
- je L(exit)
- bsrq %rsi, %rax
+ andl $(VEC_SIZE - 1), %ecx
+ sarl %cl, %edx
+ jz L(cross_page_continue)
+ PCMPEQ %xmm0, %xmm1
+ pmovmskb %xmm1, %eax
+ sarl %cl, %eax
+ leal -1(%rdx), %ecx
+ xorl %edx, %ecx
+ andl %ecx, %eax
+ jz L(ret1)
+ bsrl %eax, %eax
addq %rdi, %rax
+#ifdef USE_AS_WCSRCHR
+ andq $-CHAR_SIZE, %rax
+#endif
+L(ret1):
ret
-END (strrchr)
+END(STRRCHR)
-weak_alias (strrchr, rindex)
-libc_hidden_builtin_def (strrchr)
+#ifndef USE_AS_WCSRCHR
+ weak_alias (STRRCHR, rindex)
+ libc_hidden_builtin_def (STRRCHR)
+#endif
+++ /dev/null
-/* strspn (str, ss) -- Return the length of the initial segment of STR
- which contains only characters from SS.
- For AMD x86-64.
- Copyright (C) 1994-2021 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
- Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
- Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <https://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-
- .text
-ENTRY (strspn)
-
- movq %rdi, %rdx /* Save SRC. */
-
- /* First we create a table with flags for all possible characters.
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
- supported by the C string functions we have 256 characters.
- Before inserting marks for the stop characters we clear the whole
- table. */
- movq %rdi, %r8 /* Save value. */
- subq $256, %rsp /* Make space for 256 bytes. */
- cfi_adjust_cfa_offset(256)
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */
- movq %rsp, %rdi
- xorl %eax, %eax /* We store 0s. */
- cld
- rep
- stosq
-
- movq %rsi, %rax /* Setup stopset. */
-
-/* For understanding the following code remember that %rcx == 0 now.
- Although all the following instruction only modify %cl we always
- have a correct zero-extended 64-bit value in %rcx. */
-
- .p2align 4
-L(2): movb (%rax), %cl /* get byte from stopset */
- testb %cl, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 1(%rax), %cl /* get byte from stopset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 2(%rax), %cl /* get byte from stopset */
- testb $0xff, %cl /* is NUL char? */
- jz L(1) /* yes => start compare loop */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
-
- movb 3(%rax), %cl /* get byte from stopset */
- addq $4, %rax /* increment stopset pointer */
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
- testb $0xff, %cl /* is NUL char? */
- jnz L(2) /* no => process next dword from stopset */
-
-L(1): leaq -4(%rdx), %rax /* prepare loop */
-
- /* We use a neat trick for the following loop. Normally we would
- have to test for two termination conditions
- 1. a character in the stopset was found
- and
- 2. the end of the string was found
- But as a sign that the character is in the stopset we store its
- value in the table. But the value of NUL is NUL so the loop
- terminates for NUL in every case. */
-
- .p2align 4
-L(3): addq $4, %rax /* adjust pointer for full loop round */
-
- movb (%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jz L(4) /* no => return */
-
- movb 1(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jz L(5) /* no => return */
-
- movb 2(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jz L(6) /* no => return */
-
- movb 3(%rax), %cl /* get byte from string */
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
- jnz L(3) /* yes => start loop again */
-
- incq %rax /* adjust pointer */
-L(6): incq %rax
-L(5): incq %rax
-
-L(4): addq $256, %rsp /* remove stopset */
- cfi_adjust_cfa_offset(-256)
- subq %rdx, %rax /* we have to return the number of valid
- characters, so compute distance to first
- non-valid character */
- ret
-END (strspn)
-libc_hidden_builtin_def (strspn)
to avoid RTM abort triggered by VZEROUPPER inside transactionally. */
#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \
xtest; \
- jz 1f; \
- vzeroall; \
+ jnz 1f; \
+ vzeroupper; \
ret; \
1: \
- vzeroupper; \
+ vzeroall; \
ret
+/* Can be used to replace vzeroupper that is not directly before a
+ return. This is useful when hoisting a vzeroupper from multiple
+ return paths to decrease the total number of vzerouppers and code
+ size. */
+#define COND_VZEROUPPER_XTEST \
+ xtest; \
+ jz 1f; \
+ vzeroall; \
+ jmp 2f; \
+1: \
+ vzeroupper; \
+2:
+
+/* In RTM define this as COND_VZEROUPPER_XTEST. */
+#ifndef COND_VZEROUPPER
+# define COND_VZEROUPPER vzeroupper
+#endif
+
/* Zero upper vector registers and return. */
#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
# define ZERO_UPPER_VEC_REGISTERS_RETURN \
pxor %xmm0, %xmm0
lea 32(%rdi), %rax
- lea 16(%rdi), %rcx
+ addq $16, %rdi
and $-16, %rax
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
+ addq $16, %rax
test %edx, %edx
- lea 16(%rax), %rax
jnz L(exit)
and $-0x40, %rax
pminub %xmm0, %xmm2
pcmpeqd %xmm3, %xmm2
pmovmskb %xmm2, %edx
+ addq $64, %rax
test %edx, %edx
- lea 64(%rax), %rax
jz L(aligned_64_loop)
pcmpeqd -64(%rax), %xmm3
pmovmskb %xmm3, %edx
+ addq $48, %rdi
test %edx, %edx
- lea 48(%rcx), %rcx
jnz L(exit)
pcmpeqd %xmm1, %xmm3
pmovmskb %xmm3, %edx
+ addq $-16, %rdi
test %edx, %edx
- lea -16(%rcx), %rcx
jnz L(exit)
pcmpeqd -32(%rax), %xmm3
pmovmskb %xmm3, %edx
+ addq $-16, %rdi
test %edx, %edx
- lea -16(%rcx), %rcx
jnz L(exit)
pcmpeqd %xmm6, %xmm3
pmovmskb %xmm3, %edx
+ addq $-16, %rdi
test %edx, %edx
- lea -16(%rcx), %rcx
- jnz L(exit)
-
- jmp L(aligned_64_loop)
+ jz L(aligned_64_loop)
.p2align 4
L(exit):
- sub %rcx, %rax
+ sub %rdi, %rax
shr $2, %rax
test %dl, %dl
jz L(exit_high)
- mov %dl, %cl
- and $15, %cl
+ andl $15, %edx
jz L(exit_1)
ret
- .p2align 4
+ /* No align here. Naturally aligned % 16 == 1. */
L(exit_high):
- mov %dh, %ch
- and $15, %ch
+ andl $(15 << 8), %edx
jz L(exit_3)
add $2, %rax
ret
- .p2align 4
+ .p2align 3
L(exit_1):
add $1, %rax
ret
- .p2align 4
+ .p2align 3
L(exit_3):
add $3, %rax
ret
- .p2align 4
+ .p2align 3
L(exit_tail0):
- xor %rax, %rax
+ xorl %eax, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail1):
- mov $1, %rax
+ movl $1, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail2):
- mov $2, %rax
+ movl $2, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail3):
- mov $3, %rax
+ movl $3, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail4):
- mov $4, %rax
+ movl $4, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail5):
- mov $5, %rax
+ movl $5, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail6):
- mov $6, %rax
+ movl $6, %eax
ret
- .p2align 4
+ .p2align 3
L(exit_tail7):
- mov $7, %rax
+ movl $7, %eax
ret
END (__wcslen)
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
-#include <sysdep.h>
- .text
-ENTRY (wcsrchr)
+#define USE_AS_WCSRCHR 1
+#define NO_PMINU 1
- movd %rsi, %xmm1
- mov %rdi, %rcx
- punpckldq %xmm1, %xmm1
- pxor %xmm2, %xmm2
- punpckldq %xmm1, %xmm1
- and $63, %rcx
- cmp $48, %rcx
- ja L(crosscache)
+#ifndef STRRCHR
+# define STRRCHR wcsrchr
+#endif
- movdqu (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm2
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match1)
-
- test %rcx, %rcx
- jnz L(return_null)
-
- and $-16, %rdi
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match1):
- test %rcx, %rcx
- jnz L(prolog_find_zero_1)
-
- mov %rax, %r8
- mov %rdi, %rsi
- and $-16, %rdi
- jmp L(loop)
-
- .p2align 4
-L(crosscache):
- and $15, %rcx
- and $-16, %rdi
- pxor %xmm3, %xmm3
- movdqa (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm3
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm3, %rdx
- pmovmskb %xmm0, %rax
- shr %cl, %rdx
- shr %cl, %rax
- add $16, %rdi
-
- test %rax, %rax
- jnz L(unaligned_match)
-
- test %rdx, %rdx
- jnz L(return_null)
-
- xor %r8, %r8
- jmp L(loop)
-
- .p2align 4
-L(unaligned_match):
- test %rdx, %rdx
- jnz L(prolog_find_zero)
-
- mov %rax, %r8
- lea (%rdi, %rcx), %rsi
-
-/* Loop start on aligned string. */
- .p2align 4
-L(loop):
- movdqa (%rdi), %xmm0
- pcmpeqd %xmm0, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm0
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm0, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm3
- pcmpeqd %xmm3, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm3
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm3, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm4
- pcmpeqd %xmm4, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm4
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm4, %rax
- or %rax, %rcx
- jnz L(matches)
-
- movdqa (%rdi), %xmm5
- pcmpeqd %xmm5, %xmm2
- add $16, %rdi
- pcmpeqd %xmm1, %xmm5
- pmovmskb %xmm2, %rcx
- pmovmskb %xmm5, %rax
- or %rax, %rcx
- jz L(loop)
-
- .p2align 4
-L(matches):
- test %rax, %rax
- jnz L(match)
-L(return_value):
- test %r8, %r8
- jz L(return_null)
- mov %r8, %rax
- mov %rsi, %rdi
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(match):
- pmovmskb %xmm2, %rcx
- test %rcx, %rcx
- jnz L(find_zero)
- mov %rax, %r8
- mov %rdi, %rsi
- jmp L(loop)
-
- .p2align 4
-L(find_zero):
- test $15, %cl
- jnz L(find_zero_in_first_wchar)
- test %cl, %cl
- jnz L(find_zero_in_second_wchar)
- test $15, %ch
- jnz L(find_zero_in_third_wchar)
-
- and $1 << 13 - 1, %rax
- jz L(return_value)
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_first_wchar):
- test $1, %rax
- jz L(return_value)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_second_wchar):
- and $1 << 5 - 1, %rax
- jz L(return_value)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(find_zero_in_third_wchar):
- and $1 << 9 - 1, %rax
- jz L(return_value)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero):
- add %rcx, %rdi
- mov %rdx, %rcx
-L(prolog_find_zero_1):
- test $15, %cl
- jnz L(prolog_find_zero_in_first_wchar)
- test %cl, %cl
- jnz L(prolog_find_zero_in_second_wchar)
- test $15, %ch
- jnz L(prolog_find_zero_in_third_wchar)
-
- and $1 << 13 - 1, %rax
- jz L(return_null)
-
- test $15 << 4, %ah
- jnz L(match_fourth_wchar)
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_first_wchar):
- test $1, %rax
- jz L(return_null)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_second_wchar):
- and $1 << 5 - 1, %rax
- jz L(return_null)
-
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(prolog_find_zero_in_third_wchar):
- and $1 << 9 - 1, %rax
- jz L(return_null)
-
- test %ah, %ah
- jnz L(match_third_wchar)
- test $15 << 4, %al
- jnz L(match_second_wchar)
- lea -16(%rdi), %rax
- ret
-
- .p2align 4
-L(match_second_wchar):
- lea -12(%rdi), %rax
- ret
-
- .p2align 4
-L(match_third_wchar):
- lea -8(%rdi), %rax
- ret
-
- .p2align 4
-L(match_fourth_wchar):
- lea -4(%rdi), %rax
- ret
-
- .p2align 4
-L(return_null):
- xor %rax, %rax
- ret
-
-END (wcsrchr)
+#include "../strrchr.S"
if (__tzname[0] == NULL)
{
/* This should only happen if there are no transition rules.
- In this case there should be only one single type. */
- assert (num_types == 1);
+ In this case there's usually only one single type, unless
+ e.g. the data file has a truncated time-range. */
__tzname[0] = __tzstring (zone_names);
}
if (__tzname[1] == NULL)
*leap_correct = leaps[i].change;
if (timer == leaps[i].transition /* Exactly at the transition time. */
- && ((i == 0 && leaps[i].change > 0)
- || leaps[i].change > leaps[i - 1].change))
+ && (leaps[i].change > (i == 0 ? 0 : leaps[i - 1].change)))
{
*leap_hit = 1;
while (i > 0
include ../Makeconfig
others := zdump zic
-tests := test-tz tst-timezone tst-tzset
+tests := test-tz tst-timezone tst-tzset tst-bz28707
generated-dirs += testdata
America/Sao_Paulo Asia/Tokyo \
Europe/London)
$(objpfx)tst-tzset.out: $(addprefix $(testdata)/XT, 1 2 3 4)
+$(objpfx)tst-bz28707.out: $(testdata)/XT5
test-tz-ENV = TZDIR=$(testdata)
tst-timezone-ENV = TZDIR=$(testdata)
tst-tzset-ENV = TZDIR=$(testdata)
+tst-bz28707-ENV = TZDIR=$(testdata)
# Note this must come second in the deps list for $(built-program-cmd) to work.
zic-deps = $(objpfx)zic $(leapseconds) yearistype
$(make-target-directory)
cp $< $@
+$(testdata)/XT%: testdata/gen-XT%.sh
+ $(SHELL) $< > $@.tmp
+ mv $@.tmp $@
+
$(objpfx)tzselect: tzselect.ksh $(common-objpfx)config.make
sed -e 's|TZDIR=[^}]*|TZDIR=$(zonedir)|' \
-e '/TZVERSION=/s|see_Makefile|"$(version)"|' \
--- /dev/null
+#! /bin/sh
+
+# This test-case is the tzfile for America/Asuncion
+# generated by tzlib-2021e as follows, using the tzlib-2021e
+# zic: "zic -d DEST -r @1546300800 -L /dev/null -b slim
+# SOURCE/southamerica". Note that in its type 2 header, it
+# has two entries in its "time-types" array (types), but
+# only one entry in its "transition types" array
+# (type_idxs).
+
+printf \
+'TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'\
+'\0\0\0\0\0\0\0\1\0\0\0\1\0\0\0\0\0\0\0TZif2\0\0\0\0\0\0\0\0'\
+'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\0\2\0\0\0\b\0'\
+'\0\0\0\*\255\200\1\0\0\0\0\0\0\377\377\325\320\1\4-00\0-03\0\n'\
+'<-04>4<-03>,M10.1.0/0,M3.4.0/0\n'
--- /dev/null
+/* Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Test that we can use a truncated timezone-file, where the time-type
+ at index 0 is not indexed by the transition-types array (and the
+ transition-types array does not contain at least both one DST and one
+ normal time members). */
+
+static int
+do_test (void)
+{
+ if (setenv ("TZ", "XT5", 1))
+ {
+ puts ("setenv failed.");
+ return 1;
+ }
+
+ tzset ();
+
+ return
+ /* Sanity-check that we got the right timezone-name for DST. For
+ normal time, we're likely to get "-00" (the "unspecified" marker),
+ even though the POSIX timezone string says "-04". Let's not test
+ that. */
+ !(strcmp (tzname[1], "-03") == 0);
+}
+#include <support/test-driver.c>
__NTH (wmemcpy (wchar_t *__restrict __s1, const wchar_t *__restrict __s2,
size_t __n))
{
- if (__glibc_objsize0 (__s1) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wmemcpy_chk (__s1, __s2, __n,
- __glibc_objsize0 (__s1) / sizeof (wchar_t));
-
- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t))
- return __wmemcpy_chk_warn (__s1, __s2, __n,
- __glibc_objsize0 (__s1) / sizeof (wchar_t));
- }
- return __wmemcpy_alias (__s1, __s2, __n);
+ return __glibc_fortify_n (wmemcpy, __n, sizeof (wchar_t),
+ __glibc_objsize0 (__s1),
+ __s1, __s2, __n);
}
__fortify_function wchar_t *
__NTH (wmemmove (wchar_t *__s1, const wchar_t *__s2, size_t __n))
{
- if (__glibc_objsize0 (__s1) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wmemmove_chk (__s1, __s2, __n,
- __glibc_objsize0 (__s1) / sizeof (wchar_t));
-
- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t))
- return __wmemmove_chk_warn (__s1, __s2, __n,
- (__glibc_objsize0 (__s1)
- / sizeof (wchar_t)));
- }
- return __wmemmove_alias (__s1, __s2, __n);
+ return __glibc_fortify_n (wmemmove, __n, sizeof (wchar_t),
+ __glibc_objsize0 (__s1),
+ __s1, __s2, __n);
}
__NTH (wmempcpy (wchar_t *__restrict __s1, const wchar_t *__restrict __s2,
size_t __n))
{
- if (__glibc_objsize0 (__s1) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wmempcpy_chk (__s1, __s2, __n,
- __glibc_objsize0 (__s1) / sizeof (wchar_t));
-
- if (__n > __glibc_objsize0 (__s1) / sizeof (wchar_t))
- return __wmempcpy_chk_warn (__s1, __s2, __n,
- (__glibc_objsize0 (__s1)
- / sizeof (wchar_t)));
- }
- return __wmempcpy_alias (__s1, __s2, __n);
+ return __glibc_fortify_n (wmempcpy, __n, sizeof (wchar_t),
+ __glibc_objsize0 (__s1),
+ __s1, __s2, __n);
}
#endif
__fortify_function wchar_t *
__NTH (wmemset (wchar_t *__s, wchar_t __c, size_t __n))
{
- if (__glibc_objsize0 (__s) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wmemset_chk (__s, __c, __n,
- __glibc_objsize0 (__s) / sizeof (wchar_t));
-
- if (__n > __glibc_objsize0 (__s) / sizeof (wchar_t))
- return __wmemset_chk_warn (__s, __c, __n,
- __glibc_objsize0 (__s) / sizeof (wchar_t));
- }
- return __wmemset_alias (__s, __c, __n);
+ return __glibc_fortify_n (wmemset, __n, sizeof (wchar_t),
+ __glibc_objsize0 (__s),
+ __s, __c, __n);
}
__fortify_function wchar_t *
__NTH (wcscpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- return __wcscpy_chk (__dest, __src,
- __glibc_objsize (__dest) / sizeof (wchar_t));
+ size_t sz = __glibc_objsize (__dest);
+ if (sz != (size_t) -1)
+ return __wcscpy_chk (__dest, __src, sz / sizeof (wchar_t));
return __wcscpy_alias (__dest, __src);
}
__fortify_function wchar_t *
__NTH (wcpcpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- return __wcpcpy_chk (__dest, __src,
- __glibc_objsize (__dest) / sizeof (wchar_t));
+ size_t sz = __glibc_objsize (__dest);
+ if (sz != (size_t) -1)
+ return __wcpcpy_chk (__dest, __src, sz / sizeof (wchar_t));
return __wcpcpy_alias (__dest, __src);
}
__NTH (wcsncpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src,
size_t __n))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wcsncpy_chk (__dest, __src, __n,
- __glibc_objsize (__dest) / sizeof (wchar_t));
- if (__n > __glibc_objsize (__dest) / sizeof (wchar_t))
- return __wcsncpy_chk_warn (__dest, __src, __n,
- (__glibc_objsize (__dest)
- / sizeof (wchar_t)));
- }
- return __wcsncpy_alias (__dest, __src, __n);
+ return __glibc_fortify_n (wcsncpy, __n, sizeof (wchar_t),
+ __glibc_objsize (__dest),
+ __dest, __src, __n);
}
__NTH (wcpncpy (wchar_t *__restrict __dest, const wchar_t *__restrict __src,
size_t __n))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n))
- return __wcpncpy_chk (__dest, __src, __n,
- __glibc_objsize (__dest) / sizeof (wchar_t));
- if (__n > __glibc_objsize (__dest) / sizeof (wchar_t))
- return __wcpncpy_chk_warn (__dest, __src, __n,
- (__glibc_objsize (__dest)
- / sizeof (wchar_t)));
- }
- return __wcpncpy_alias (__dest, __src, __n);
+ return __glibc_fortify_n (wcpncpy, __n, sizeof (wchar_t),
+ __glibc_objsize (__dest),
+ __dest, __src, __n);
}
__fortify_function wchar_t *
__NTH (wcscat (wchar_t *__restrict __dest, const wchar_t *__restrict __src))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- return __wcscat_chk (__dest, __src,
- __glibc_objsize (__dest) / sizeof (wchar_t));
+ size_t sz = __glibc_objsize (__dest);
+ if (sz != (size_t) -1)
+ return __wcscat_chk (__dest, __src, sz / sizeof (wchar_t));
return __wcscat_alias (__dest, __src);
}
__NTH (wcsncat (wchar_t *__restrict __dest, const wchar_t *__restrict __src,
size_t __n))
{
- if (__glibc_objsize (__dest) != (size_t) -1)
- return __wcsncat_chk (__dest, __src, __n,
- __glibc_objsize (__dest) / sizeof (wchar_t));
+ size_t sz = __glibc_objsize (__dest);
+ if (sz != (size_t) -1)
+ return __wcsncat_chk (__dest, __src, __n, sz / sizeof (wchar_t));
return __wcsncat_alias (__dest, __src, __n);
}
__NTH (swprintf (wchar_t *__restrict __s, size_t __n,
const wchar_t *__restrict __fmt, ...))
{
- if (__glibc_objsize (__s) != (size_t) -1 || __USE_FORTIFY_LEVEL > 1)
+ size_t sz = __glibc_objsize (__s);
+ if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1)
return __swprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
- __glibc_objsize (__s) / sizeof (wchar_t),
- __fmt, __va_arg_pack ());
+ sz / sizeof (wchar_t), __fmt, __va_arg_pack ());
return __swprintf_alias (__s, __n, __fmt, __va_arg_pack ());
}
#elif !defined __cplusplus
__NTH (vswprintf (wchar_t *__restrict __s, size_t __n,
const wchar_t *__restrict __fmt, __gnuc_va_list __ap))
{
- if (__glibc_objsize (__s) != (size_t) -1 || __USE_FORTIFY_LEVEL > 1)
+ size_t sz = __glibc_objsize (__s);
+ if (sz != (size_t) -1 || __USE_FORTIFY_LEVEL > 1)
return __vswprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1,
- __glibc_objsize (__s) / sizeof (wchar_t), __fmt,
- __ap);
+ sz / sizeof (wchar_t), __fmt, __ap);
return __vswprintf_alias (__s, __n, __fmt, __ap);
}
__fortify_function __wur wchar_t *
fgetws (wchar_t *__restrict __s, int __n, __FILE *__restrict __stream)
{
- if (__glibc_objsize (__s) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n) || __n <= 0)
- return __fgetws_chk (__s, __glibc_objsize (__s) / sizeof (wchar_t),
- __n, __stream);
-
- if ((size_t) __n > __glibc_objsize (__s) / sizeof (wchar_t))
- return __fgetws_chk_warn (__s,
- __glibc_objsize (__s) / sizeof (wchar_t),
- __n, __stream);
- }
- return __fgetws_alias (__s, __n, __stream);
+ size_t sz = __glibc_objsize (__s);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz))
+ return __fgetws_alias (__s, __n, __stream);
+ if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz))
+ return __fgetws_chk_warn (__s, sz / sizeof (wchar_t), __n, __stream);
+ return __fgetws_chk (__s, sz / sizeof (wchar_t), __n, __stream);
}
#ifdef __USE_GNU
__fortify_function __wur wchar_t *
fgetws_unlocked (wchar_t *__restrict __s, int __n, __FILE *__restrict __stream)
{
- if (__glibc_objsize (__s) != (size_t) -1)
- {
- if (!__builtin_constant_p (__n) || __n <= 0)
- return __fgetws_unlocked_chk (__s,
- __glibc_objsize (__s) / sizeof (wchar_t),
- __n, __stream);
-
- if ((size_t) __n > __glibc_objsize (__s) / sizeof (wchar_t))
- return __fgetws_unlocked_chk_warn (__s,
- (__glibc_objsize (__s)
- / sizeof (wchar_t)),
- __n, __stream);
- }
- return __fgetws_unlocked_alias (__s, __n, __stream);
+ size_t sz = __glibc_objsize (__s);
+ if (__glibc_safe_or_unknown_len (__n, sizeof (wchar_t), sz))
+ return __fgetws_unlocked_alias (__s, __n, __stream);
+ if (__glibc_unsafe_len (__n, sizeof (wchar_t), sz))
+ return __fgetws_unlocked_chk_warn (__s, sz / sizeof (wchar_t), __n,
+ __stream);
+ return __fgetws_unlocked_chk (__s, sz / sizeof (wchar_t), __n, __stream);
}
#endif
__NTH (mbsrtowcs (wchar_t *__restrict __dst, const char **__restrict __src,
size_t __len, mbstate_t *__restrict __ps))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __mbsrtowcs_chk (__dst, __src, __len, __ps,
- __glibc_objsize (__dst) / sizeof (wchar_t));
-
- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t))
- return __mbsrtowcs_chk_warn (__dst, __src, __len, __ps,
- (__glibc_objsize (__dst)
- / sizeof (wchar_t)));
- }
- return __mbsrtowcs_alias (__dst, __src, __len, __ps);
+ return __glibc_fortify_n (mbsrtowcs, __len, sizeof (wchar_t),
+ __glibc_objsize (__dst),
+ __dst, __src, __len, __ps);
}
__NTH (wcsrtombs (char *__restrict __dst, const wchar_t **__restrict __src,
size_t __len, mbstate_t *__restrict __ps))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __wcsrtombs_chk (__dst, __src, __len, __ps,
- __glibc_objsize (__dst));
-
- if (__len > __glibc_objsize (__dst))
- return __wcsrtombs_chk_warn (__dst, __src, __len, __ps,
- __glibc_objsize (__dst));
- }
- return __wcsrtombs_alias (__dst, __src, __len, __ps);
+ return __glibc_fortify (wcsrtombs, __len, sizeof (char),
+ __glibc_objsize (__dst),
+ __dst, __src, __len, __ps);
}
-#ifdef __USE_GNU
+#ifdef __USE_XOPEN2K8
extern size_t __mbsnrtowcs_chk (wchar_t *__restrict __dst,
const char **__restrict __src, size_t __nmc,
size_t __len, mbstate_t *__restrict __ps,
__NTH (mbsnrtowcs (wchar_t *__restrict __dst, const char **__restrict __src,
size_t __nmc, size_t __len, mbstate_t *__restrict __ps))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __mbsnrtowcs_chk (__dst, __src, __nmc, __len, __ps,
- __glibc_objsize (__dst) / sizeof (wchar_t));
-
- if (__len > __glibc_objsize (__dst) / sizeof (wchar_t))
- return __mbsnrtowcs_chk_warn (__dst, __src, __nmc, __len, __ps,
- (__glibc_objsize (__dst)
- / sizeof (wchar_t)));
- }
- return __mbsnrtowcs_alias (__dst, __src, __nmc, __len, __ps);
+ return __glibc_fortify_n (mbsnrtowcs, __len, sizeof (wchar_t),
+ __glibc_objsize (__dst),
+ __dst, __src, __nmc, __len, __ps);
}
__NTH (wcsnrtombs (char *__restrict __dst, const wchar_t **__restrict __src,
size_t __nwc, size_t __len, mbstate_t *__restrict __ps))
{
- if (__glibc_objsize (__dst) != (size_t) -1)
- {
- if (!__builtin_constant_p (__len))
- return __wcsnrtombs_chk (__dst, __src, __nwc, __len, __ps,
- __glibc_objsize (__dst));
-
- if (__len > __glibc_objsize (__dst))
- return __wcsnrtombs_chk_warn (__dst, __src, __nwc, __len, __ps,
- __glibc_objsize (__dst));
- }
- return __wcsnrtombs_alias (__dst, __src, __nwc, __len, __ps);
+ return __glibc_fortify (wcsnrtombs, __len, sizeof (char),
+ __glibc_objsize (__dst),
+ __dst, __src, __nwc, __len, __ps);
}
#endif