+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
- + License along with the GNU C Library; if not, see
+ + License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
- +#ifndef _UNWIND_ARCH_H
- +#define _UNWIND_ARCH_H
+ +#include <sysdep.h>
+
- +#include <unwind.h>
+ +/* Assumptions:
+ + *
+ + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
+ + *
+ + */
+
- +static inline void *
- +unwind_arch_adjustment (void *prev, void *addr)
- +{
- + return addr;
- +}
+ +#define dstin x0
+ +#define src x1
+ +#define count x2
+ +#define dst x3
+ +#define srcend x4
+ +#define dstend x5
+ +#define A_l x6
+ +#define A_lw w6
+ +#define A_h x7
+ +#define B_l x8
+ +#define B_lw w8
+ +#define B_h x9
+ +#define C_lw w10
+ +#define tmp1 x14
+
- +#endif
- diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
- index 0a37397284..25ca8f8463 100644
- --- a/sysdeps/hppa/dl-fptr.c
- +++ b/sysdeps/hppa/dl-fptr.c
- @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp)
- }
-
- install:
- - fdesc->ip = ip;
- fdesc->gp = gp;
- + fdesc->ip = ip;
-
- return (ElfW(Addr)) fdesc;
- }
- @@ -350,7 +350,9 @@ ElfW(Addr)
- _dl_lookup_address (const void *address)
- {
- ElfW(Addr) addr = (ElfW(Addr)) address;
- - unsigned int *desc, *gptr;
- + ElfW(Word) reloc_arg;
- + volatile unsigned int *desc;
- + unsigned int *gptr;
-
- /* Return ADDR if the least-significant two bits of ADDR are not consistent
- with ADDR being a linker defined function pointer. The normal value for
- @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address)
- if (!_dl_read_access_allowed (desc))
- return addr;
-
- - /* Load first word of candidate descriptor. It should be a pointer
- + /* First load the relocation offset. */
- + reloc_arg = (ElfW(Word)) desc[1];
- + atomic_full_barrier();
+ +#define A_q q0
+ +#define B_q q1
+ +#define C_q q2
+ +#define D_q q3
+ +#define E_q q4
+ +#define F_q q5
+ +#define G_q q6
+ +#define H_q q7
+
- + /* Then load first word of candidate descriptor. It should be a pointer
- with word alignment and point to memory that can be read. */
- gptr = (unsigned int *) desc[0];
- if (((unsigned int) gptr & 3) != 0
- @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address)
- /* See if descriptor requires resolution. The following trampoline is
- used in each global offset table for function resolution:
-
- - ldw 0(r20),r22
- - bv r0(r22)
- + ldw 0(r20),r21
- + bv r0(r21)
- ldw 4(r20),r21
- tramp: b,l .-12,r20
- depwi 0,31,2,r20
- @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address)
- if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */
- && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */
- && (ElfW(Addr)) gptr[2] == elf_machine_resolve ())
- - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]);
- + {
- + struct link_map *l = (struct link_map *) gptr[5];
+
- + /* If gp has been resolved, we need to hunt for relocation offset. */
- + if (!(reloc_arg & PA_GP_RELOC))
- + reloc_arg = _dl_fix_reloc_arg (addr, l);
+ +/* This implementation supports both memcpy and memmove and shares most code.
+ + It uses unaligned accesses and branchless sequences to keep the code small,
+ + simple and improve performance.
+
- + _dl_fixup (l, reloc_arg);
- + }
-
- return (ElfW(Addr)) desc[0];
- }
- diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
- index 9e98366ea3..8ecff97706 100644
- --- a/sysdeps/hppa/dl-machine.h
- +++ b/sysdeps/hppa/dl-machine.h
- @@ -48,6 +48,14 @@
- #define GOT_FROM_PLT_STUB (4*4)
- #define PLT_ENTRY_SIZE (2*4)
-
- +/* The gp slot in the function descriptor contains the relocation offset
- + before resolution. To distinguish between a resolved gp value and an
- + unresolved relocation offset we set an unused bit in the relocation
- + offset. This would allow us to do a synchronzied two word update
- + using this bit (interlocked update), but instead of waiting for the
- + update we simply recompute the gp value given that we know the ip. */
- +#define PA_GP_RELOC 1
+ + Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+ + copies of up to 128 bytes, and large copies. The overhead of the overlap
+ + check in memmove is negligible since it is only required for large copies.
+
- /* Initialize the function descriptor table before relocations */
- static inline void
- __hppa_init_bootstrap_fdesc_table (struct link_map *map)
- @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
- volatile Elf32_Addr *rfdesc = reloc_addr;
- /* map is the link_map for the caller, t is the link_map for the object
- being called */
- - rfdesc[1] = value.gp;
- - /* Need to ensure that the gp is visible before the code
- - entry point is updated */
- - rfdesc[0] = value.ip;
+ + Large copies use a software pipelined loop processing 64 bytes per
+ + iteration. The destination pointer is 16-byte aligned to minimize
+ + unaligned accesses. The loop tail is handled by always copying 64 bytes
+ + from the end. */
+
- + /* We would like the function descriptor to be double word aligned. This
- + helps performance (ip and gp then reside on the same cache line) and
- + we can update the pair atomically with a single store. The linker
- + now ensures this alignment but we still have to handle old code. */
- + if ((unsigned int)reloc_addr & 7)
- + {
- + /* Need to ensure that the gp is visible before the code
- + entry point is updated */
- + rfdesc[1] = value.gp;
- + atomic_full_barrier();
- + rfdesc[0] = value.ip;
- + }
- + else
- + {
- + /* Update pair atomically with floating point store. */
- + union { ElfW(Word) v[2]; double d; } u;
+ +ENTRY (__memcpy_simd)
+ + DELOUSE (0)
+ + DELOUSE (1)
+ + DELOUSE (2)
+
- + u.v[0] = value.ip;
- + u.v[1] = value.gp;
- + *(volatile double *)rfdesc = u.d;
- + }
- return value;
- }
-
- @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
- here. The trampoline code will load the proper
- LTP and pass the reloc offset to the fixup
- function. */
- - fptr->gp = iplt - jmprel;
- + fptr->gp = (iplt - jmprel) | PA_GP_RELOC;
- } /* r_sym != 0 */
- else
- {
- diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c
- new file mode 100644
- index 0000000000..885a3f1837
- --- /dev/null
- +++ b/sysdeps/hppa/dl-runtime.c
- @@ -0,0 +1,58 @@
- +/* On-demand PLT fixup for shared objects. HPPA version.
- + Copyright (C) 2019 Free Software Foundation, Inc.
- + This file is part of the GNU C Library.
+ + add srcend, src, count
+ + add dstend, dstin, count
+ + cmp count, 128
+ + b.hi L(copy_long)
+ + cmp count, 32
+ + b.hi L(copy32_128)
+
- + The GNU C Library is free software; you can redistribute it and/or
- + modify it under the terms of the GNU Lesser General Public
- + License as published by the Free Software Foundation; either
- + version 2.1 of the License, or (at your option) any later version.
+ + /* Small copies: 0..32 bytes. */
+ + cmp count, 16
+ + b.lo L(copy16)
+ + ldr A_q, [src]
+ + ldr B_q, [srcend, -16]
+ + str A_q, [dstin]
+ + str B_q, [dstend, -16]
+ + ret
+
- + The GNU C Library is distributed in the hope that it will be useful,
- + but WITHOUT ANY WARRANTY; without even the implied warranty of
- + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- + Lesser General Public License for more details.
+ + /* Copy 8-15 bytes. */
+ +L(copy16):
+ + tbz count, 3, L(copy8)
+ + ldr A_l, [src]
+ + ldr A_h, [srcend, -8]
+ + str A_l, [dstin]
+ + str A_h, [dstend, -8]
+ + ret
+
- + You should have received a copy of the GNU Lesser General Public
- + License along with the GNU C Library; if not, write to the Free
- + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- + 02111-1307 USA. */
+ + /* Copy 4-7 bytes. */
+ +L(copy8):
+ + tbz count, 2, L(copy4)
+ + ldr A_lw, [src]
+ + ldr B_lw, [srcend, -4]
+ + str A_lw, [dstin]
+ + str B_lw, [dstend, -4]
+ + ret
+
- +/* Clear PA_GP_RELOC bit in relocation offset. */
- +#define reloc_offset (reloc_arg & ~PA_GP_RELOC)
- +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL)
+ + /* Copy 0..3 bytes using a branchless sequence. */
+ +L(copy4):
+ + cbz count, L(copy0)
+ + lsr tmp1, count, 1
+ + ldrb A_lw, [src]
+ + ldrb C_lw, [srcend, -1]
+ + ldrb B_lw, [src, tmp1]
+ + strb A_lw, [dstin]
+ + strb B_lw, [dstin, tmp1]
+ + strb C_lw, [dstend, -1]
+ +L(copy0):
+ + ret
+
- +#include <elf/dl-runtime.c>
+ + .p2align 4
+ + /* Medium copies: 33..128 bytes. */
+ +L(copy32_128):
+ + ldp A_q, B_q, [src]
+ + ldp C_q, D_q, [srcend, -32]
+ + cmp count, 64
+ + b.hi L(copy128)
+ + stp A_q, B_q, [dstin]
+ + stp C_q, D_q, [dstend, -32]
+ + ret
+
- +/* The caller has encountered a partially relocated function descriptor.
- + The gp of the descriptor has been updated, but not the ip. We find
- + the function descriptor again and compute the relocation offset and
- + return that to the caller. The caller will continue on to call
- + _dl_fixup with the relocation offset. */
+ + .p2align 4
+ + /* Copy 65..128 bytes. */
+ +L(copy128):
+ + ldp E_q, F_q, [src, 32]
+ + cmp count, 96
+ + b.ls L(copy96)
+ + ldp G_q, H_q, [srcend, -64]
+ + stp G_q, H_q, [dstend, -64]
+ +L(copy96):
+ + stp A_q, B_q, [dstin]
+ + stp E_q, F_q, [dstin, 32]
+ + stp C_q, D_q, [dstend, -32]
+ + ret
+
- +ElfW(Word)
- +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
- +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l)
- +{
- + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type;
- + const Elf32_Rela *reloc;
+ + /* Align loop64 below to 16 bytes. */
+ + nop
+
- + l_addr = l->l_addr;
- + jmprel = D_PTR(l, l_info[DT_JMPREL]);
- + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val;
+ + /* Copy more than 128 bytes. */
+ +L(copy_long):
+ + /* Copy 16 bytes and then align src to 16-byte alignment. */
+ + ldr D_q, [src]
+ + and tmp1, src, 15
+ + bic src, src, 15
+ + sub dst, dstin, tmp1
+ + add count, count, tmp1 /* Count is now 16 too large. */
+ + ldp A_q, B_q, [src, 16]
+ + str D_q, [dstin]
+ + ldp C_q, D_q, [src, 48]
+ + subs count, count, 128 + 16 /* Test and readjust count. */
+ + b.ls L(copy64_from_end)
+ +L(loop64):
+ + stp A_q, B_q, [dst, 16]
+ + ldp A_q, B_q, [src, 80]
+ + stp C_q, D_q, [dst, 48]
+ + ldp C_q, D_q, [src, 112]
+ + add src, src, 64
+ + add dst, dst, 64
+ + subs count, count, 64
+ + b.hi L(loop64)
+
- + /* Look for the entry... */
- + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela))
- + {
- + reloc = (const Elf32_Rela *) iplt;
- + r_type = ELF32_R_TYPE (reloc->r_info);
+ + /* Write the last iteration and copy 64 bytes from the end. */
+ +L(copy64_from_end):
+ + ldp E_q, F_q, [srcend, -64]
+ + stp A_q, B_q, [dst, 16]
+ + ldp A_q, B_q, [srcend, -32]
+ + stp C_q, D_q, [dst, 48]
+ + stp E_q, F_q, [dstend, -64]
+ + stp A_q, B_q, [dstend, -32]
+ + ret
+
- + if (__builtin_expect (r_type == R_PARISC_IPLT, 1)
- + && fptr == (struct fdesc *) (reloc->r_offset + l_addr))
- + /* Found entry. Return the reloc offset. */
- + return iplt - jmprel;
- + }
+ +END (__memcpy_simd)
+ +libc_hidden_builtin_def (__memcpy_simd)
+
- + /* Crash if we weren't passed a valid function pointer. */
- + ABORT_INSTRUCTION;
- + return 0;
- +}
- diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
- index 0114ca8b19..d0804b30c0 100644
- --- a/sysdeps/hppa/dl-trampoline.S
- +++ b/sysdeps/hppa/dl-trampoline.S
- @@ -31,7 +31,7 @@
- slow down __cffc when it attempts to call fixup to resolve function
- descriptor references. Please refer to gcc/gcc/config/pa/fptr.c
-
- - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */
- + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */
-
- /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */
- .text
- @@ -61,17 +61,20 @@ _dl_runtime_resolve:
- copy %sp, %r1 /* Copy previous sp */
- /* Save function result address (on entry) */
- stwm %r28,128(%sp)
- - /* Fillin some frame info to follow ABI */
- + /* Fill in some frame info to follow ABI */
- stw %r1,-4(%sp) /* Previous sp */
- stw %r21,-32(%sp) /* PIC register value */
-
- /* Save input floating point registers. This must be done
- in the new frame since the previous frame doesn't have
- enough space */
- - ldo -56(%sp),%r1
- + ldo -64(%sp),%r1
- fstd,ma %fr4,-8(%r1)
- fstd,ma %fr5,-8(%r1)
- fstd,ma %fr6,-8(%r1)
+
- + /* Test PA_GP_RELOC bit. */
- + bb,>= %r19,31,2f /* branch if not reloc offset */
- fstd,ma %fr7,-8(%r1)
-
- /* Set up args to fixup func, needs only two arguments */
- @@ -79,7 +82,7 @@ _dl_runtime_resolve:
- copy %r19,%r25 /* (2) reloc offset */
-
- /* Call the real address resolver. */
- - bl _dl_fixup,%rp
- +3: bl _dl_fixup,%rp
- copy %r21,%r19 /* set fixup func ltp */
-
- /* While the linker will set a function pointer to NULL when it
- @@ -102,7 +105,7 @@ _dl_runtime_resolve:
- copy %r29, %r19
-
- /* Reload arguments fp args */
- - ldo -56(%sp),%r1
- + ldo -64(%sp),%r1
- fldd,ma -8(%r1),%fr4
- fldd,ma -8(%r1),%fr5
- fldd,ma -8(%r1),%fr6
- @@ -129,6 +132,25 @@ _dl_runtime_resolve:
- bv %r0(%rp)
- ldo -128(%sp),%sp
-
- +2:
- + /* Set up args for _dl_fix_reloc_arg. */
- + copy %r22,%r26 /* (1) function pointer */
- + depi 0,31,2,%r26 /* clear least significant bits */
- + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
+ +ENTRY (__memmove_simd)
+ + DELOUSE (0)
+ + DELOUSE (1)
+ + DELOUSE (2)
+
- + /* Save ltp and link map arg for _dl_fixup. */
- + stw %r21,-56(%sp) /* ltp */
- + stw %r25,-60(%sp) /* struct link map */
+ + add srcend, src, count
+ + add dstend, dstin, count
+ + cmp count, 128
+ + b.hi L(move_long)
+ + cmp count, 32
+ + b.hi L(copy32_128)
+
- + /* Find reloc offset. */
- + bl _dl_fix_reloc_arg,%rp
- + copy %r21,%r19 /* set func ltp */
+ + /* Small moves: 0..32 bytes. */
+ + cmp count, 16
+ + b.lo L(copy16)
+ + ldr A_q, [src]
+ + ldr B_q, [srcend, -16]
+ + str A_q, [dstin]
+ + str B_q, [dstend, -16]
+ + ret
+
- + /* Set up args for _dl_fixup. */
- + ldw -56(%sp),%r21 /* ltp */
- + ldw -60(%sp),%r26 /* (1) struct link map */
- + b 3b
- + copy %ret0,%r25 /* (2) reloc offset */
- .EXIT
- .PROCEND
- cfi_endproc
- @@ -153,7 +175,7 @@ _dl_runtime_profile:
- copy %sp, %r1 /* Copy previous sp */
- /* Save function result address (on entry) */
- stwm %r28,192(%sp)
- - /* Fillin some frame info to follow ABI */
- + /* Fill in some frame info to follow ABI */
- stw %r1,-4(%sp) /* Previous sp */
- stw %r21,-32(%sp) /* PIC register value */
-
- @@ -181,10 +203,11 @@ _dl_runtime_profile:
- fstd,ma %fr5,8(%r1)
- fstd,ma %fr6,8(%r1)
- fstd,ma %fr7,8(%r1)
- - /* 32-bit stack pointer and return register */
- - stw %sp,-56(%sp)
- - stw %r2,-52(%sp)
-
- + /* Test PA_GP_RELOC bit. */
- + bb,>= %r19,31,2f /* branch if not reloc offset */
- + /* 32-bit stack pointer */
- + stw %sp,-56(%sp)
-
- /* Set up args to fixup func, needs five arguments */
- ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */
- @@ -197,7 +220,7 @@ _dl_runtime_profile:
- stw %r1, -52(%sp) /* (5) long int *framesizep */
-
- /* Call the real address resolver. */
- - bl _dl_profile_fixup,%rp
- +3: bl _dl_profile_fixup,%rp
- copy %r21,%r19 /* set fixup func ltp */
-
- /* Load up the returned function descriptor */
- @@ -215,7 +238,9 @@ _dl_runtime_profile:
- fldd,ma 8(%r1),%fr5
- fldd,ma 8(%r1),%fr6
- fldd,ma 8(%r1),%fr7
- - ldw -52(%sp),%rp
+ +L(move_long):
+ + /* Only use backward copy if there is an overlap. */
+ + sub tmp1, dstin, src
+ + cbz tmp1, L(move0)
+ + cmp tmp1, count
+ + b.hs L(copy_long)
+
- + /* Reload rp register -(192+20) without adjusting stack */
- + ldw -212(%sp),%rp
-
- /* Reload static link register -(192+16) without adjusting stack */
- ldw -208(%sp),%r29
- @@ -303,6 +328,33 @@ L(cont):
- ldw -20(%sp),%rp
- /* Return */
- bv,n 0(%r2)
- +
- +2:
- + /* Set up args for _dl_fix_reloc_arg. */
- + copy %r22,%r26 /* (1) function pointer */
- + depi 0,31,2,%r26 /* clear least significant bits */
- + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
- +
- + /* Save ltp and link map arg for _dl_fixup. */
- + stw %r21,-92(%sp) /* ltp */
- + stw %r25,-116(%sp) /* struct link map */
+ + /* Large backwards copy for overlapping copies.
+ + Copy 16 bytes and then align srcend to 16-byte alignment. */
+ +L(copy_long_backwards):
+ + ldr D_q, [srcend, -16]
+ + and tmp1, srcend, 15
+ + bic srcend, srcend, 15
+ + sub count, count, tmp1
+ + ldp A_q, B_q, [srcend, -32]
+ + str D_q, [dstend, -16]
+ + ldp C_q, D_q, [srcend, -64]
+ + sub dstend, dstend, tmp1
+ + subs count, count, 128
+ + b.ls L(copy64_from_start)
+
- + /* Find reloc offset. */
- + bl _dl_fix_reloc_arg,%rp
- + copy %r21,%r19 /* set func ltp */
+ +L(loop64_backwards):
+ + str B_q, [dstend, -16]
+ + str A_q, [dstend, -32]
+ + ldp A_q, B_q, [srcend, -96]
+ + str D_q, [dstend, -48]
+ + str C_q, [dstend, -64]!
+ + ldp C_q, D_q, [srcend, -128]
+ + sub srcend, srcend, 64
+ + subs count, count, 64
+ + b.hi L(loop64_backwards)
+
- + /* Restore fixup ltp. */
- + ldw -92(%sp),%r21 /* ltp */
+ + /* Write the last iteration and copy 64 bytes from the start. */
+ +L(copy64_from_start):
+ + ldp E_q, F_q, [src, 32]
+ + stp A_q, B_q, [dstend, -32]
+ + ldp A_q, B_q, [src]
+ + stp C_q, D_q, [dstend, -64]
+ + stp E_q, F_q, [dstin, 32]
+ + stp A_q, B_q, [dstin]
+ +L(move0):
+ + ret
+
- + /* Set up args to fixup func, needs five arguments */
- + ldw -116(%sp),%r26 /* (1) struct link map */
- + copy %ret0,%r25 /* (2) reloc offset */
- + stw %r25,-120(%sp) /* Save reloc offset */
- + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */
- + ldo -56(%sp),%r23 /* (4) La_hppa_regs */
- + ldo -112(%sp), %r1
- + b 3b
- + stw %r1, -52(%sp) /* (5) long int *framesizep */
- .EXIT
- .PROCEND
- cfi_endproc
- diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
- index 8af0789a9c..4334ade2a0 100644
- --- a/sysdeps/i386/dl-machine.h
- +++ b/sysdeps/i386/dl-machine.h
- @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
- {
- # ifndef RTLD_BOOTSTRAP
- if (sym_map != map
- - && sym_map->l_type != lt_executable
- && !sym_map->l_relocated)
- {
- const char *strtab
- = (const char *) D_PTR (map, l_info[DT_STRTAB]);
- - _dl_error_printf ("\
- + if (sym_map->l_type == lt_executable)
- + _dl_fatal_printf ("\
- +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
- +and creates an unsatisfiable circular dependency.\n",
- + RTLD_PROGNAME, strtab + refsym->st_name,
- + map->l_name);
- + else
- + _dl_error_printf ("\
- %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
- - RTLD_PROGNAME, map->l_name,
- - sym_map->l_name,
- - strtab + refsym->st_name);
- + RTLD_PROGNAME, map->l_name,
- + sym_map->l_name,
- + strtab + refsym->st_name);
- }
- # endif
- value = ((Elf32_Addr (*) (void)) value) ();
- diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h
- index b4bcd8fb6c..6094af8fec 100644
- --- a/sysdeps/i386/sysdep.h
- +++ b/sysdeps/i386/sysdep.h
- @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \
+ +END (__memmove_simd)
+ +libc_hidden_builtin_def (__memmove_simd)
+ diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
+ index ed5a47f6f8..46a4cb3a54 100644
+ --- a/sysdeps/aarch64/multiarch/memmove.c
+ +++ b/sysdeps/aarch64/multiarch/memmove.c
+ @@ -29,6 +29,7 @@
+ extern __typeof (__redirect_memmove) __libc_memmove;
- # define SETUP_PIC_REG(reg) \
- .ifndef GET_PC_THUNK(reg); \
- - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \
- + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \
- .globl GET_PC_THUNK(reg); \
- .hidden GET_PC_THUNK(reg); \
- .p2align 4; \
- @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \
+ extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
+ +extern __typeof (__redirect_memmove) __memmove_simd attribute_hidden;
+ extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
+ extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden;
+ extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
+ @@ -40,7 +41,10 @@ libc_ifunc (__libc_memmove,
+ ? __memmove_falkor
+ : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+ ? __memmove_thunderx2
+ - : __memmove_generic))));
+ + : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
+ + || IS_NEOVERSE_V1 (midr)
+ + ? __memmove_simd
+ + : __memmove_generic)))));
- # define SETUP_PIC_REG_STR(reg) \
- ".ifndef " GET_PC_THUNK_STR (reg) "\n" \
- - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \
- + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \
- + GET_PC_THUNK_STR (reg) ",comdat\n" \
- ".globl " GET_PC_THUNK_STR (reg) "\n" \
- ".hidden " GET_PC_THUNK_STR (reg) "\n" \
- ".p2align 4\n" \
- diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile
- index 995e90d6da..6030adf7e7 100644
- --- a/sysdeps/ieee754/ldbl-96/Makefile
- +++ b/sysdeps/ieee754/ldbl-96/Makefile
- @@ -17,5 +17,8 @@
- # <https://www.gnu.org/licenses/>.
+ # undef memmove
+ strong_alias (__libc_memmove, memmove);
+ diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
+ index 548130e413..a8ff52c072 100644
+ --- a/sysdeps/aarch64/strcpy.S
+ +++ b/sysdeps/aarch64/strcpy.S
+ @@ -234,8 +234,13 @@ L(entry_no_page_cross):
+ #endif
+ /* calculate the loc value */
+ cmeq datav.16b, datav.16b, #0
+ +#ifdef __AARCH64EB__
+ + mov data1, datav.d[1]
+ + mov data2, datav.d[0]
+ +#else
+ mov data1, datav.d[0]
+ mov data2, datav.d[1]
+ +#endif
+ cmp data1, 0
+ csel data1, data1, data2, ne
+ mov pos, 8
+ diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
+ index 5981247dd9..086a5c7e99 100644
+ --- a/sysdeps/aarch64/strnlen.S
+ +++ b/sysdeps/aarch64/strnlen.S
+ @@ -154,8 +154,13 @@ L(loop_end):
+ byte. */
- ifeq ($(subdir),math)
- -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96
- +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo
- +ifeq ($(have-ssp),yes)
- +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all
- endif
- +endif # $(subdir) == math
- diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
- index 5f742321ae..bcdf20179f 100644
- --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
- +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
- @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y)
- return 0;
- }
+ cmeq datav.16b, datav.16b, #0
+ +#ifdef __AARCH64EB__
+ + mov data1, datav.d[1]
+ + mov data2, datav.d[0]
+ +#else
+ mov data1, datav.d[0]
+ mov data2, datav.d[1]
+ +#endif
+ cmp data1, 0
+ csel data1, data1, data2, ne
+ sub len, src, srcin
+ diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
+ index 604c489170..f1feb19dc7 100644
+ --- a/sysdeps/aarch64/sysdep.h
+ +++ b/sysdeps/aarch64/sysdep.h
+ @@ -45,7 +45,7 @@
+ #define ENTRY(name) \
+ .globl C_SYMBOL_NAME(name); \
+ .type C_SYMBOL_NAME(name),%function; \
+ - .align 4; \
+ + .p2align 6; \
+ C_LABEL(name) \
+ cfi_startproc; \
+ CALL_MCOUNT
+ diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+ index bf4ac7077f..379bb56fc9 100644
+ --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+ +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S
+ @@ -268,7 +268,7 @@ ENTRY(memcpy)
- + if ((i0 & 0x80000000) == 0)
- + {
- + /* Pseudo-zero and unnormal representations are not valid
- + representations of long double. We need to avoid stack
- + corruption in __kernel_rem_pio2, which expects input in a
- + particular normal form, but those representations do not need
- + to be consistently handled like any particular floating-point
- + value. */
- + y[1] = y[0] = __builtin_nanl ("");
- + return 0;
- + }
- +
- /* Split the 64 bits of the mantissa into three 24-bit integers
- stored in a double array. */
- exp = j0 - 23;
- diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c
- new file mode 100644
- index 0000000000..f59b97769d
- --- /dev/null
- +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c
- @@ -0,0 +1,41 @@
- +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487).
- + Copyright (C) 2020 Free Software Foundation, Inc.
- + This file is part of the GNU C Library.
- +
- + The GNU C Library is free software; you can redistribute it and/or
- + modify it under the terms of the GNU Lesser General Public
- + License as published by the Free Software Foundation; either
- + version 2.1 of the License, or (at your option) any later version.
- +
- + The GNU C Library is distributed in the hope that it will be useful,
- + but WITHOUT ANY WARRANTY; without even the implied warranty of
- + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- + Lesser General Public License for more details.
- +
- + You should have received a copy of the GNU Lesser General Public
- + License along with the GNU C Library; if not, see
- + <https://www.gnu.org/licenses/>. */
- +
- +#include <math.h>
- +#include <math_ldbl.h>
- +#include <stdint.h>
- +
- +static int
- +do_test (void)
- +{
- + for (int i = 0; i < 64; i++)
- + {
- + uint64_t sig = i == 63 ? 0 : 1ULL << i;
- + long double ld;
- + SET_LDOUBLE_WORDS (ld, 0x4141,
- + sig >> 32, sig & 0xffffffffULL);
- + /* The requirement is that no stack overflow occurs when the
- + pseudo-zero or unnormal goes through range reduction. */
- + volatile long double ldr;
- + ldr = sinl (ld);
- + (void) ldr;
- + }
- + return 0;
- +}
- +
- +#include <support/test-driver.c>
- diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
- index e613e6a344..a03f478fc7 100644
- --- a/sysdeps/posix/system.c
- +++ b/sysdeps/posix/system.c
- @@ -101,7 +101,8 @@ cancel_handler (void *arg)
- static int
- do_system (const char *line)
- {
- - int status;
- + int status = -1;
- + int ret;
- pid_t pid;
- struct sigaction sa;
- #ifndef _LIBC_REENTRANT
- @@ -144,14 +145,14 @@ do_system (const char *line)
- __posix_spawnattr_setflags (&spawn_attr,
- POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK);
+ mov dst, dstin /* Preserve dstin, we need to return it. */
+ cmp count, #64
+ - bge .Lcpy_not_short
+ + bhs .Lcpy_not_short
+ /* Deal with small copies quickly by dropping straight into the
+ exit block. */
- - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr,
- - (char *const[]){ (char*) SHELL_NAME,
- - (char*) "-c",
- - (char *) line, NULL },
- - __environ);
- + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr,
- + (char *const[]){ (char *) SHELL_NAME,
- + (char *) "-c",
- + (char *) line, NULL },
- + __environ);
- __posix_spawnattr_destroy (&spawn_attr);
+ @@ -351,10 +351,10 @@ ENTRY(memcpy)
- - if (status == 0)
- + if (ret == 0)
- {
- /* Cancellation results in cleanup handlers running as exceptions in
- the block where they were installed, so it is safe to reference
- @@ -186,6 +187,9 @@ do_system (const char *line)
- }
- DO_UNLOCK ();
+ 1:
+ subs tmp2, count, #64 /* Use tmp2 for count. */
+ - blt .Ltail63aligned
+ + blo .Ltail63aligned
- + if (ret != 0)
- + __set_errno (ret);
- +
- return status;
- }
+ cmp tmp2, #512
+ - bge .Lcpy_body_long
+ + bhs .Lcpy_body_long
- diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
- index 2ba009e919..829eec266a 100644
- --- a/sysdeps/powerpc/powerpc32/sysdep.h
- +++ b/sysdeps/powerpc/powerpc32/sysdep.h
- @@ -179,8 +179,8 @@ GOT_LABEL: ; \
- #else
- /* Position-dependent code does not require access to the GOT. */
- # define __GLRO(rOUT, rGOT, member, offset) \
- - lis rOUT,(member+LOWORD)@ha; \
- - lwz rOUT,(member+LOWORD)@l(rOUT)
- + lis rOUT,(member)@ha; \
- + lwz rOUT,(member)@l(rOUT)
- #endif /* PIC */
+ .Lcpy_body_medium: /* Count in tmp2. */
+ #ifdef USE_VFP
+ @@ -378,7 +378,7 @@ ENTRY(memcpy)
+ add src, src, #64
+ vstr d1, [dst, #56]
+ add dst, dst, #64
+ - bge 1b
+ + bhs 1b
+ tst tmp2, #0x3f
+ beq .Ldone
- #endif /* __ASSEMBLER__ */
- diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
- index 8a53a1088f..362a2b713c 100644
- --- a/sysdeps/powerpc/powerpc64/backtrace.c
- +++ b/sysdeps/powerpc/powerpc64/backtrace.c
- @@ -54,11 +54,22 @@ struct signal_frame_64 {
- /* We don't care about the rest, since the IP value is at 'uc' field. */
- };
+ @@ -412,7 +412,7 @@ ENTRY(memcpy)
+ ldrd A_l, A_h, [src, #64]!
+ strd A_l, A_h, [dst, #64]!
+ subs tmp2, tmp2, #64
+ - bge 1b
+ + bhs 1b
+ tst tmp2, #0x3f
+ bne 1f
+ ldr tmp2,[sp], #FRAME_SIZE
+ @@ -482,7 +482,7 @@ ENTRY(memcpy)
+ add src, src, #32
- +/* Test if the address match to the inside the trampoline code.
- + Up to and including kernel 5.8, returning from an interrupt or syscall to a
- + signal handler starts execution directly at the handler's entry point, with
- + LR set to address of the sigreturn trampoline (the vDSO symbol).
- + Newer kernels will branch to signal handler from the trampoline instead, so
- + checking the stacktrace against the vDSO entrypoint does not work in such
- + case.
- + The vDSO branches with a 'bctrl' instruction, so checking either the
- + vDSO address itself and the next instruction should cover all kernel
- + versions. */
- static inline bool
- is_sigtramp_address (void *nip)
- {
- #ifdef HAVE_SIGTRAMP_RT64
- - if (nip == GLRO (dl_vdso_sigtramp_rt64))
- + if (nip == GLRO (dl_vdso_sigtramp_rt64) ||
- + nip == GLRO (dl_vdso_sigtramp_rt64) + 4)
- return true;
+ subs tmp2, tmp2, #prefetch_lines * 64 * 2
+ - blt 2f
+ + blo 2f
+ 1:
+ cpy_line_vfp d3, 0
+ cpy_line_vfp d4, 64
+ @@ -494,7 +494,7 @@ ENTRY(memcpy)
+ add dst, dst, #2 * 64
+ add src, src, #2 * 64
+ subs tmp2, tmp2, #prefetch_lines * 64
+ - bge 1b
+ + bhs 1b
+
+ 2:
+ cpy_tail_vfp d3, 0
+ @@ -615,8 +615,8 @@ ENTRY(memcpy)
+ 1:
+ pld [src, #(3 * 64)]
+ subs count, count, #64
+ - ldrmi tmp2, [sp], #FRAME_SIZE
+ - bmi .Ltail63unaligned
+ + ldrlo tmp2, [sp], #FRAME_SIZE
+ + blo .Ltail63unaligned
+ pld [src, #(4 * 64)]
+
+ #ifdef USE_NEON
+ @@ -633,7 +633,7 @@ ENTRY(memcpy)
+ neon_load_multi d0-d3, src
+ neon_load_multi d4-d7, src
+ subs count, count, #64
+ - bmi 2f
+ + blo 2f
+ 1:
+ pld [src, #(4 * 64)]
+ neon_store_multi d0-d3, dst
+ @@ -641,7 +641,7 @@ ENTRY(memcpy)
+ neon_store_multi d4-d7, dst
+ neon_load_multi d4-d7, src
+ subs count, count, #64
+ - bpl 1b
+ + bhs 1b
+ 2:
+ neon_store_multi d0-d3, dst
+ neon_store_multi d4-d7, dst
+ diff --git a/sysdeps/arm/be/nofpu/Implies b/sysdeps/arm/be/nofpu/Implies
+ new file mode 100644
+ index 0000000000..c90dd7fd5c
+ --- /dev/null
+ +++ b/sysdeps/arm/be/nofpu/Implies
+ @@ -0,0 +1 @@
+ +arm/nofpu
+ diff --git a/sysdeps/arm/le/nofpu/Implies b/sysdeps/arm/le/nofpu/Implies
+ new file mode 100644
+ index 0000000000..c90dd7fd5c
+ --- /dev/null
+ +++ b/sysdeps/arm/le/nofpu/Implies
+ @@ -0,0 +1 @@
+ +arm/nofpu
+ diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S
+ index 510e8adaf2..bcfbc51d99 100644
+ --- a/sysdeps/arm/memcpy.S
+ +++ b/sysdeps/arm/memcpy.S
+ @@ -68,7 +68,7 @@ ENTRY(memcpy)
+ cfi_remember_state
+
+ subs r2, r2, #4
+ - blt 8f
+ + blo 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #0] )
+ bne 9f
+ @@ -82,7 +82,7 @@ ENTRY(memcpy)
+ cfi_rel_offset (r6, 4)
+ cfi_rel_offset (r7, 8)
+ cfi_rel_offset (r8, 12)
+ - blt 5f
+ + blo 5f
+
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb r3, ip, #32 )
+ @@ -98,9 +98,9 @@ ENTRY(memcpy)
#endif
- return false;
- diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
- index fa46e9e351..e7f576338d 100644
- --- a/sysdeps/s390/configure
- +++ b/sysdeps/s390/configure
- @@ -123,7 +123,9 @@ void testinsn (char *buf)
- __asm__ (".machine \"arch13\" \n\t"
- ".machinemode \"zarch_nohighgprs\" \n\t"
- "lghi %%r0,16 \n\t"
- - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
- + "mvcrl 0(%0),32(%0) \n\t"
- + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
- + : : "a" (buf) : "memory", "r0");
- }
- EOF
- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
- @@ -271,7 +273,9 @@ else
- void testinsn (char *buf)
- {
- __asm__ ("lghi %%r0,16 \n\t"
- - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
- + "mvcrl 0(%0),32(%0) \n\t"
- + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
- + : : "a" (buf) : "memory", "r0");
- }
- EOF
- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
- diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
- index 3ed5a8ef87..5c3479e8cf 100644
- --- a/sysdeps/s390/configure.ac
- +++ b/sysdeps/s390/configure.ac
- @@ -88,7 +88,9 @@ void testinsn (char *buf)
- __asm__ (".machine \"arch13\" \n\t"
- ".machinemode \"zarch_nohighgprs\" \n\t"
- "lghi %%r0,16 \n\t"
- - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
- + "mvcrl 0(%0),32(%0) \n\t"
- + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
- + : : "a" (buf) : "memory", "r0");
- }
- EOF
- dnl test, if assembler supports S390 arch13 instructions
- @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF
- void testinsn (char *buf)
- {
- __asm__ ("lghi %%r0,16 \n\t"
- - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
- + "mvcrl 0(%0),32(%0) \n\t"
- + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
- + : : "a" (buf) : "memory", "r0");
- }
- EOF
- dnl test, if assembler supports S390 arch13 zarch instructions as default
- diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
- index 5fc85e129f..ee59b5de14 100644
- --- a/sysdeps/s390/memmove.c
- +++ b/sysdeps/s390/memmove.c
- @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
- s390_libc_ifunc_expr (__redirect_memmove, memmove,
- ({
- s390_libc_ifunc_expr_stfle_init ();
- - (HAVE_MEMMOVE_ARCH13
- + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2)
- && S390_IS_ARCH13_MIE3 (stfle_bits))
- ? MEMMOVE_ARCH13
- : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
- diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
- index e6195c6e26..17c0cc3952 100644
- --- a/sysdeps/s390/multiarch/ifunc-impl-list.c
- +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
- @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- IFUNC_IMPL (i, name, memmove,
- # if HAVE_MEMMOVE_ARCH13
- IFUNC_IMPL_ADD (array, i, memmove,
- - S390_IS_ARCH13_MIE3 (stfle_bits),
- + ((dl_hwcap & HWCAP_S390_VXRS_EXT2)
- + && S390_IS_ARCH13_MIE3 (stfle_bits)),
- MEMMOVE_ARCH13)
- # endif
- # if HAVE_MEMMOVE_Z13
- diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies
- new file mode 100644
- index 0000000000..71b28ee1a4
- --- /dev/null
- +++ b/sysdeps/sh/be/sh4/fpu/Implies
- @@ -0,0 +1 @@
- +sh/sh4/fpu
- diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies
- new file mode 100644
- index 0000000000..71b28ee1a4
- --- /dev/null
- +++ b/sysdeps/sh/le/sh4/fpu/Implies
- @@ -0,0 +1 @@
- +sh/sh4/fpu
- diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh
- index c07626677f..4f6c3490a2 100644
- --- a/sysdeps/unix/make-syscalls.sh
- +++ b/sysdeps/unix/make-syscalls.sh
- @@ -30,6 +30,7 @@
- # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction)
- # s: non-NULL string (e.g., 1st arg to open)
- # S: optionally-NULL string (e.g., 1st arg to acct)
- +# U: unsigned long int (32-bit types are zero-extended to 64-bit types)
- # v: vararg scalar (e.g., optional 3rd arg to open)
- # V: byte-per-page vector (3rd arg to mincore)
- # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4)
- @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do
- ?:?????????) nargs=9;;
- esac
- + # Derive the unsigned long int arguments from the argument signature
- + ulong_arg_1=0
- + ulong_arg_2=0
- + ulong_count=0
- + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U)
- + do
- + ulong_count=$(expr $ulong_count + 1)
- + ulong_arg=$(echo $U | sed -e "s/:U//")
- + case $ulong_count in
- + 1)
- + ulong_arg_1=$ulong_arg
- + ;;
- + 2)
- + ulong_arg_2=$ulong_arg
- + ;;
- + *)
- + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)"
- + exit 2
- + esac
- + done
- +
- # Make sure only the first syscall rule is used, if multiple dirs
- # define the same syscall.
- echo ''
- @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do
- \$(make-target-directory)
- (echo '#define SYSCALL_NAME $syscall'; \\
- echo '#define SYSCALL_NARGS $nargs'; \\
- + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\
- + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\
- echo '#define SYSCALL_SYMBOL $strong'; \\
- echo '#define SYSCALL_NOERRNO $noerrno'; \\
- echo '#define SYSCALL_ERRVAL $errval'; \\
- diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S
- index cf6c7a58fb..f807a8603f 100644
- --- a/sysdeps/unix/syscall-template.S
- +++ b/sysdeps/unix/syscall-template.S
- @@ -25,6 +25,12 @@
- defining a few macros:
- SYSCALL_NAME syscall name
- SYSCALL_NARGS number of arguments this call takes
- + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this
- + call takes. 0 means that there are no
- + unsigned long int arguments.
- + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this
- + call takes. 0 means that there is at most
- + one unsigned long int argument.
- SYSCALL_SYMBOL primary symbol name
- SYSCALL_NOERRNO 1 to define a no-errno version (see below)
- SYSCALL_ERRVAL 1 to define an error-value version (see below)
- @@ -44,9 +50,31 @@
- /* This indirection is needed so that SYMBOL gets macro-expanded. */
- #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL)
+ PLD( pld [r1, #0] )
+ -2: PLD( subs r2, r2, #96 )
+ +2: PLD( cmp r2, #96 )
+ PLD( pld [r1, #28] )
+ - PLD( blt 4f )
+ + PLD( blo 4f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
- -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N)
- -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N)
- -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N)
- +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros
- + have 2 extra arguments for unsigned long int arguments:
- + Extra argument 1: Position of the first unsigned long int argument.
- + Extra argument 2: Position of the second unsigned long int argument.
- + */
- +#ifndef PSEUDOS_HAVE_ULONG_INDICES
- +# undef SYSCALL_ULONG_ARG_1
- +# define SYSCALL_ULONG_ARG_1 0
- +#endif
- +
- +#if SYSCALL_ULONG_ARG_1
- +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \
- + PSEUDO (SYMBOL, NAME, N, U1, U2)
- +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \
- + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2)
- +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \
- + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2)
- +#else
- +# define T_PSEUDO(SYMBOL, NAME, N) \
- + PSEUDO (SYMBOL, NAME, N)
- +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \
- + PSEUDO_NOERRNO (SYMBOL, NAME, N)
- +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \
- + PSEUDO_ERRVAL (SYMBOL, NAME, N)
- +#endif
- #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL)
- #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL)
- #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL)
- @@ -56,7 +84,12 @@
- /* This kind of system call stub never returns an error.
- We return the return value register to the caller unexamined. */
+ @@ -108,9 +108,7 @@ ENTRY(memcpy)
+ 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ - bge 3b
+ - PLD( cmn r2, #96 )
+ - PLD( bge 4b )
+ + bhs 3b
- +# if SYSCALL_ULONG_ARG_1
- +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
- + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
- +# else
- T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
- +# endif
- ret_NOERRNO
- T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL)
+ 5: ands ip, r2, #28
+ rsb ip, ip, #32
+ @@ -222,7 +220,7 @@ ENTRY(memcpy)
+ strbge r4, [r0], #1
+ subs r2, r2, ip
+ strb lr, [r0], #1
+ - blt 8b
+ + blo 8b
+ ands ip, r1, #3
+ beq 1b
- @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL)
- value, or zero for success. We may massage the kernel's return value
- to meet that ABI, but we never set errno here. */
+ @@ -236,7 +234,7 @@ ENTRY(memcpy)
+ .macro forward_copy_shift pull push
- +# if SYSCALL_ULONG_ARG_1
- +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
- + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
- +# else
- T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
- +# endif
- ret_ERRVAL
- T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL)
+ subs r2, r2, #28
+ - blt 14f
+ + blo 14f
- @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL)
- /* This is a "normal" system call stub: if there is an error,
- it returns -1 and sets errno. */
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb ip, ip, #32 )
+ @@ -253,9 +251,9 @@ ENTRY(memcpy)
+ cfi_rel_offset (r10, 16)
- +# if SYSCALL_ULONG_ARG_1
- +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
- + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
- +# else
- T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
- +# endif
- ret
- T_PSEUDO_END (SYSCALL_SYMBOL)
+ PLD( pld [r1, #0] )
+ - PLD( subs r2, r2, #96 )
+ + PLD( cmp r2, #96 )
+ PLD( pld [r1, #28] )
+ - PLD( blt 13f )
+ + PLD( blo 13f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
- diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list
- index e28e801c7a..6b22b2cb45 100644
- --- a/sysdeps/unix/syscalls.list
- +++ b/sysdeps/unix/syscalls.list
- @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill
- link - link i:ss __link link
- listen - listen i:ii __listen listen
- lseek - lseek i:iii __libc_lseek __lseek lseek
- -madvise - madvise i:pii __madvise madvise
- +madvise - madvise i:pUi __madvise madvise
- mkdir - mkdir i:si __mkdir mkdir
- -mmap - mmap b:aniiii __mmap mmap
- -mprotect - mprotect i:aii __mprotect mprotect
- -munmap - munmap i:ai __munmap munmap
- +mmap - mmap b:aUiiii __mmap mmap
- +mprotect - mprotect i:aUi __mprotect mprotect
- +munmap - munmap i:aU __munmap munmap
- open - open Ci:siv __libc_open __open open
- profil - profil i:piii __profil profil
- ptrace - ptrace i:iiii ptrace
- -read - read Ci:ibn __libc_read __read read
- -readlink - readlink i:spi __readlink readlink
- +read - read Ci:ibU __libc_read __read read
- +readlink - readlink i:spU __readlink readlink
- readv - readv Ci:ipi __readv readv
- reboot - reboot i:i reboot
- -recv - recv Ci:ibni __libc_recv recv
- -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom
- +recv - recv Ci:ibUi __libc_recv recv
- +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom
- recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg
- rename - rename i:ss rename
- rmdir - rmdir i:s __rmdir rmdir
- select - select Ci:iPPPP __select __libc_select select
- -send - send Ci:ibni __libc_send __send send
- +send - send Ci:ibUi __libc_send __send send
- sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg
- -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto
- +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto
- setdomain - setdomainname i:si setdomainname
- setegid - setegid i:i __setegid setegid
- seteuid - seteuid i:i __seteuid seteuid
- @@ -94,5 +94,5 @@ uname - uname i:p __uname uname
- unlink - unlink i:s __unlink unlink
- utimes - utimes i:sp __utimes utimes
- vhangup - vhangup i:i vhangup
- -write - write Ci:ibn __libc_write __write write
- +write - write Ci:ibU __libc_write __write write
- writev - writev Ci:ipi __writev writev
- diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
- index f12b7b1a2d..5fbde369c3 100644
- --- a/sysdeps/unix/sysv/linux/Makefile
- +++ b/sysdeps/unix/sysv/linux/Makefile
- @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \
- setfsuid setfsgid epoll_pwait signalfd \
- eventfd eventfd_read eventfd_write prlimit \
- personality epoll_wait tee vmsplice splice \
- - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get
- + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \
- + prctl \
- + process_vm_readv process_vm_writev
+ @@ -280,9 +278,7 @@ ENTRY(memcpy)
+ mov ip, ip, PULL #\pull
+ orr ip, ip, lr, PUSH #\push
+ stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
+ - bge 12b
+ - PLD( cmn r2, #96 )
+ - PLD( bge 13b )
+ + bhs 12b
- CFLAGS-gethostid.c = -fexceptions
- CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
- diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
- index 9378387747..c8471947b9 100644
- --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
- +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
- @@ -17,6 +17,7 @@
- #define __NR_clock_nanosleep 115
- #define __NR_clock_settime 112
- #define __NR_clone 220
- +#define __NR_clone3 435
- #define __NR_close 57
- #define __NR_connect 203
- #define __NR_copy_file_range 285
- diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
- index 1389cea1b3..346d045fb4 100644
- --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
- +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
- @@ -51,8 +51,12 @@
+ pop {r5 - r8, r10}
+ cfi_adjust_cfa_offset (-20)
+ diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S
+ index 954037ef3a..0d07b76ee6 100644
+ --- a/sysdeps/arm/memmove.S
+ +++ b/sysdeps/arm/memmove.S
+ @@ -85,7 +85,7 @@ ENTRY(memmove)
+ add r1, r1, r2
+ add r0, r0, r2
+ subs r2, r2, #4
+ - blt 8f
+ + blo 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #-4] )
+ bne 9f
+ @@ -99,7 +99,7 @@ ENTRY(memmove)
+ cfi_rel_offset (r6, 4)
+ cfi_rel_offset (r7, 8)
+ cfi_rel_offset (r8, 12)
+ - blt 5f
+ + blo 5f
- #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \
- && MIDR_PARTNUM(midr) == 0x000)
- -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
- - && MIDR_PARTNUM(midr) == 0xd0c)
- +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
- + && MIDR_PARTNUM(midr) == 0xd0c)
- +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
- + && MIDR_PARTNUM(midr) == 0xd49)
- +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
- + && MIDR_PARTNUM(midr) == 0xd40)
+ CALGN( ands ip, r1, #31 )
+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here
+ @@ -114,9 +114,9 @@ ENTRY(memmove)
+ #endif
- #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \
- && MIDR_PARTNUM(midr) == 0x000)
- diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data
- index a60053b914..08af68b5e8 100644
- --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data
- +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data
- @@ -7,6 +7,9 @@ libc.so: malloc
- libc.so: memalign
- libc.so: realloc
- libm.so: matherr
- +# If outline atomics are used, libgcc (built outside of glibc) may
- +# call __getauxval using the PLT.
- +libc.so: __getauxval ?
- # The dynamic loader needs __tls_get_addr for TLS.
- ld.so: __tls_get_addr
- # The main malloc is interposed into the dynamic linker, for
- diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
- index 9d8ffbe860..bf61b66b70 100644
- --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
- +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
- @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t;
- typedef intmax_t atomic_max_t;
- typedef uintmax_t uatomic_max_t;
+ PLD( pld [r1, #-4] )
+ -2: PLD( subs r2, r2, #96 )
+ +2: PLD( cmp r2, #96 )
+ PLD( pld [r1, #-32] )
+ - PLD( blt 4f )
+ + PLD( blo 4f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
- +#define atomic_full_barrier() __sync_synchronize ()
- +
- #define __HAVE_64B_ATOMICS 0
- #define USE_ATOMIC_COMPILER_BUILTINS 0
+ @@ -124,9 +124,7 @@ ENTRY(memmove)
+ 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ - bge 3b
+ - PLD( cmn r2, #96 )
+ - PLD( bge 4b )
+ + bhs 3b
- +/* We use the compiler atomic load and store builtins as the generic
- + defines are not atomic. In particular, we need to use compare and
- + exchange for stores as the implementation is synthesized. */
- +void __atomic_link_error (void);
- +#define __atomic_check_size_ls(mem) \
- + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
- + __atomic_link_error ();
- +
- +#define atomic_load_relaxed(mem) \
- + ({ __atomic_check_size_ls((mem)); \
- + __atomic_load_n ((mem), __ATOMIC_RELAXED); })
- +#define atomic_load_acquire(mem) \
- + ({ __atomic_check_size_ls((mem)); \
- + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
- +
- +#define atomic_store_relaxed(mem, val) \
- + do { \
- + __atomic_check_size_ls((mem)); \
- + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
- + } while (0)
- +#define atomic_store_release(mem, val) \
- + do { \
- + __atomic_check_size_ls((mem)); \
- + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
- + } while (0)
- +
- /* XXX Is this actually correct? */
- #define ATOMIC_EXCHANGE_USES_CAS 1
+ 5: ands ip, r2, #28
+ rsb ip, ip, #32
+ @@ -237,7 +235,7 @@ ENTRY(memmove)
+ strbge r4, [r0, #-1]!
+ subs r2, r2, ip
+ strb lr, [r0, #-1]!
+ - blt 8b
+ + blo 8b
+ ands ip, r1, #3
+ beq 1b
- diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h
- index ed873d9dd4..796663a23a 100644
- --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h
- +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h
- @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \
+ @@ -251,7 +249,7 @@ ENTRY(memmove)
+ .macro backward_copy_shift push pull
- # define inline_syscall0(name,dummy) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r12) \
- @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \
+ subs r2, r2, #28
+ - blt 14f
+ + blo 14f
- # define inline_syscall1(name,arg1) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- + long int __arg1 = (long int) (arg1); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r12) \
- @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \
+ CALGN( ands ip, r1, #31 )
+ CALGN( rsb ip, ip, #32 )
+ @@ -268,9 +266,9 @@ ENTRY(memmove)
+ cfi_rel_offset (r10, 16)
- # define inline_syscall2(name,arg1,arg2) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- - register long __r6 __asm__("r6") = (long)(arg2); \
- + long int __arg1 = (long int) (arg1); \
- + long int __arg2 = (long int) (arg2); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- + register long int __r6 __asm__("r6") = __arg2; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r6), "r"(__r12) \
- @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \
+ PLD( pld [r1, #-4] )
+ - PLD( subs r2, r2, #96 )
+ + PLD( cmp r2, #96 )
+ PLD( pld [r1, #-32] )
+ - PLD( blt 13f )
+ + PLD( blo 13f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
- # define inline_syscall3(name,arg1,arg2,arg3) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- - register long __r6 __asm__("r6") = (long)(arg2); \
- - register long __r7 __asm__("r7") = (long)(arg3); \
- + long int __arg1 = (long int) (arg1); \
- + long int __arg2 = (long int) (arg2); \
- + long int __arg3 = (long int) (arg3); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- + register long int __r6 __asm__("r6") = __arg2; \
- + register long int __r7 __asm__("r7") = __arg3; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \
- @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \
-
- # define inline_syscall4(name,arg1,arg2,arg3,arg4) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- - register long __r6 __asm__("r6") = (long)(arg2); \
- - register long __r7 __asm__("r7") = (long)(arg3); \
- - register long __r8 __asm__("r8") = (long)(arg4); \
- + long int __arg1 = (long int) (arg1); \
- + long int __arg2 = (long int) (arg2); \
- + long int __arg3 = (long int) (arg3); \
- + long int __arg4 = (long int) (arg4); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- + register long int __r6 __asm__("r6") = __arg2; \
- + register long int __r7 __asm__("r7") = __arg3; \
- + register long int __r8 __asm__("r8") = __arg4; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \
- @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \
-
- # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- - register long __r6 __asm__("r6") = (long)(arg2); \
- - register long __r7 __asm__("r7") = (long)(arg3); \
- - register long __r8 __asm__("r8") = (long)(arg4); \
- - register long __r9 __asm__("r9") = (long)(arg5); \
- + long int __arg1 = (long int) (arg1); \
- + long int __arg2 = (long int) (arg2); \
- + long int __arg3 = (long int) (arg3); \
- + long int __arg4 = (long int) (arg4); \
- + long int __arg5 = (long int) (arg5); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- + register long int __r6 __asm__("r6") = __arg2; \
- + register long int __r7 __asm__("r7") = __arg3; \
- + register long int __r8 __asm__("r8") = __arg4; \
- + register long int __r9 __asm__("r9") = __arg5; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \
- @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \
-
- # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \
- ({ \
- - register long __ret __asm__("r3"); \
- - register long __r12 __asm__("r12") = name; \
- - register long __r5 __asm__("r5") = (long)(arg1); \
- - register long __r6 __asm__("r6") = (long)(arg2); \
- - register long __r7 __asm__("r7") = (long)(arg3); \
- - register long __r8 __asm__("r8") = (long)(arg4); \
- - register long __r9 __asm__("r9") = (long)(arg5); \
- - register long __r10 __asm__("r10") = (long)(arg6); \
- + long int __arg1 = (long int) (arg1); \
- + long int __arg2 = (long int) (arg2); \
- + long int __arg3 = (long int) (arg3); \
- + long int __arg4 = (long int) (arg4); \
- + long int __arg5 = (long int) (arg5); \
- + long int __arg6 = (long int) (arg6); \
- + register long int __ret __asm__("r3"); \
- + register long int __r12 __asm__("r12") = name; \
- + register long int __r5 __asm__("r5") = __arg1; \
- + register long int __r6 __asm__("r6") = __arg2; \
- + register long int __r7 __asm__("r7") = __arg3; \
- + register long int __r8 __asm__("r8") = __arg4; \
- + register long int __r9 __asm__("r9") = __arg5; \
- + register long int __r10 __asm__("r10") = __arg6; \
- __asm__ __volatile__( "brki r14,8; nop;" \
- : "=r"(__ret) \
- : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
- index b2bbf10181..ff445a5406 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
- @@ -22,9 +22,9 @@
- .text
- .set nomips16
+ @@ -295,9 +293,7 @@ ENTRY(memmove)
+ mov r4, r4, PUSH #\push
+ orr r4, r4, r3, PULL #\pull
+ stmdb r0!, {r4 - r8, r10, ip, lr}
+ - bge 12b
+ - PLD( cmn r2, #96 )
+ - PLD( bge 13b )
+ + bhs 12b
- -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4,
- - long arg5,
- - long number) */
- +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3,
- + long int arg4, long int arg5,
- + long int number) */
+ pop {r5 - r8, r10}
+ cfi_adjust_cfa_offset (-20)
+ diff --git a/sysdeps/generic/unwind-arch.h b/sysdeps/generic/unwind-arch.h
+ new file mode 100644
+ index 0000000000..d712e5e11d
+ --- /dev/null
+ +++ b/sysdeps/generic/unwind-arch.h
+ @@ -0,0 +1,30 @@
+ +/* Return backtrace of current program state. Arch-specific bits.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#ifndef _UNWIND_ARCH_H
+ +#define _UNWIND_ARCH_H
+ +
+ +#include <unwind.h>
+ +
+ +static inline void *
+ +unwind_arch_adjustment (void *prev, void *addr)
+ +{
+ + return addr;
+ +}
+ +
+ +#endif
+ diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c
+ index 0a37397284..25ca8f8463 100644
+ --- a/sysdeps/hppa/dl-fptr.c
+ +++ b/sysdeps/hppa/dl-fptr.c
+ @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp)
+ }
- ENTRY(__mips_syscall5)
- lw v0, 20(sp)
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
- index 572d7c1137..2b4a3117d1 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
- @@ -22,9 +22,9 @@
- .text
- .set nomips16
+ install:
+ - fdesc->ip = ip;
+ fdesc->gp = gp;
+ + fdesc->ip = ip;
- -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4,
- - long arg5, long arg6,
- - long number) */
- +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3,
- + long int arg4, long int arg5, long int arg6,
- + long int number) */
+ return (ElfW(Addr)) fdesc;
+ }
+ @@ -350,7 +350,9 @@ ElfW(Addr)
+ _dl_lookup_address (const void *address)
+ {
+ ElfW(Addr) addr = (ElfW(Addr)) address;
+ - unsigned int *desc, *gptr;
+ + ElfW(Word) reloc_arg;
+ + volatile unsigned int *desc;
+ + unsigned int *gptr;
- ENTRY(__mips_syscall6)
- lw v0, 24(sp)
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
- index 05164cb253..2723bbb138 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
- @@ -22,9 +22,10 @@
- .text
- .set nomips16
+ /* Return ADDR if the least-significant two bits of ADDR are not consistent
+ with ADDR being a linker defined function pointer. The normal value for
+ @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address)
+ if (!_dl_read_access_allowed (desc))
+ return addr;
- -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4,
- - long arg5, long arg6, long arg7,
- - long number) */
- +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3,
- + long int arg4, long int arg5, long int arg6,
- + long int arg7,
- + long int number) */
+ - /* Load first word of candidate descriptor. It should be a pointer
+ + /* First load the relocation offset. */
+ + reloc_arg = (ElfW(Word)) desc[1];
+ + atomic_full_barrier();
+ +
+ + /* Then load first word of candidate descriptor. It should be a pointer
+ with word alignment and point to memory that can be read. */
+ gptr = (unsigned int *) desc[0];
+ if (((unsigned int) gptr & 3) != 0
+ @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address)
+ /* See if descriptor requires resolution. The following trampoline is
+ used in each global offset table for function resolution:
- ENTRY(__mips_syscall7)
- lw v0, 28(sp)
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
- index 9bf551ace8..f23ede0259 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
- @@ -19,51 +19,57 @@
- #ifndef MIPS16_SYSCALL_H
- #define MIPS16_SYSCALL_H 1
+ - ldw 0(r20),r22
+ - bv r0(r22)
+ + ldw 0(r20),r21
+ + bv r0(r21)
+ ldw 4(r20),r21
+ tramp: b,l .-12,r20
+ depwi 0,31,2,r20
+ @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address)
+ if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */
+ && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */
+ && (ElfW(Addr)) gptr[2] == elf_machine_resolve ())
+ - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]);
+ + {
+ + struct link_map *l = (struct link_map *) gptr[5];
+ +
+ + /* If gp has been resolved, we need to hunt for relocation offset. */
+ + if (!(reloc_arg & PA_GP_RELOC))
+ + reloc_arg = _dl_fix_reloc_arg (addr, l);
+ +
+ + _dl_fixup (l, reloc_arg);
+ + }
- -long long __nomips16 __mips16_syscall0 (long number);
- +long long int __nomips16 __mips16_syscall0 (long int number);
- #define __mips16_syscall0(dummy, number) \
- - __mips16_syscall0 ((long) (number))
- + __mips16_syscall0 ((long int) (number))
+ return (ElfW(Addr)) desc[0];
+ }
+ diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h
+ index 9e98366ea3..8ecff97706 100644
+ --- a/sysdeps/hppa/dl-machine.h
+ +++ b/sysdeps/hppa/dl-machine.h
+ @@ -48,6 +48,14 @@
+ #define GOT_FROM_PLT_STUB (4*4)
+ #define PLT_ENTRY_SIZE (2*4)
- -long long __nomips16 __mips16_syscall1 (long a0,
- - long number);
- +long long int __nomips16 __mips16_syscall1 (long int a0,
- + long int number);
- #define __mips16_syscall1(a0, number) \
- - __mips16_syscall1 ((long) (a0), \
- - (long) (number))
- + __mips16_syscall1 ((long int) (a0), \
- + (long int) (number))
-
- -long long __nomips16 __mips16_syscall2 (long a0, long a1,
- - long number);
- +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1,
- + long int number);
- #define __mips16_syscall2(a0, a1, number) \
- - __mips16_syscall2 ((long) (a0), (long) (a1), \
- - (long) (number))
- + __mips16_syscall2 ((long int) (a0), (long int) (a1), \
- + (long int) (number))
-
- -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2,
- - long number);
- +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1,
- + long int a2,
- + long int number);
- #define __mips16_syscall3(a0, a1, a2, number) \
- - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \
- - (long) (number))
- + __mips16_syscall3 ((long int) (a0), (long int) (a1), \
- + (long int) (a2), \
- + (long int) (number))
-
- -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3,
- - long number);
- +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1,
- + long int a2, long int a3,
- + long int number);
- #define __mips16_syscall4(a0, a1, a2, a3, number) \
- - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \
- - (long) (a3), \
- - (long) (number))
- + __mips16_syscall4 ((long int) (a0), (long int) (a1), \
- + (long int) (a2), (long int) (a3), \
- + (long int) (number))
+ +/* The gp slot in the function descriptor contains the relocation offset
+ + before resolution. To distinguish between a resolved gp value and an
+ + unresolved relocation offset we set an unused bit in the relocation
+ + offset. This would allow us to do a synchronzied two word update
+ + using this bit (interlocked update), but instead of waiting for the
+ + update we simply recompute the gp value given that we know the ip. */
+ +#define PA_GP_RELOC 1
+ +
+ /* Initialize the function descriptor table before relocations */
+ static inline void
+ __hppa_init_bootstrap_fdesc_table (struct link_map *map)
+ @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+ volatile Elf32_Addr *rfdesc = reloc_addr;
+ /* map is the link_map for the caller, t is the link_map for the object
+ being called */
+ - rfdesc[1] = value.gp;
+ - /* Need to ensure that the gp is visible before the code
+ - entry point is updated */
+ - rfdesc[0] = value.ip;
+ +
+ + /* We would like the function descriptor to be double word aligned. This
+ + helps performance (ip and gp then reside on the same cache line) and
+ + we can update the pair atomically with a single store. The linker
+ + now ensures this alignment but we still have to handle old code. */
+ + if ((unsigned int)reloc_addr & 7)
+ + {
+ + /* Need to ensure that the gp is visible before the code
+ + entry point is updated */
+ + rfdesc[1] = value.gp;
+ + atomic_full_barrier();
+ + rfdesc[0] = value.ip;
+ + }
+ + else
+ + {
+ + /* Update pair atomically with floating point store. */
+ + union { ElfW(Word) v[2]; double d; } u;
+ +
+ + u.v[0] = value.ip;
+ + u.v[1] = value.gp;
+ + *(volatile double *)rfdesc = u.d;
+ + }
+ return value;
+ }
- /* The remaining ones use regular MIPS wrappers. */
+ @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+ here. The trampoline code will load the proper
+ LTP and pass the reloc offset to the fixup
+ function. */
+ - fptr->gp = iplt - jmprel;
+ + fptr->gp = (iplt - jmprel) | PA_GP_RELOC;
+ } /* r_sym != 0 */
+ else
+ {
+ diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c
+ new file mode 100644
+ index 0000000000..885a3f1837
+ --- /dev/null
+ +++ b/sysdeps/hppa/dl-runtime.c
+ @@ -0,0 +1,58 @@
+ +/* On-demand PLT fixup for shared objects. HPPA version.
+ + Copyright (C) 2019 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, write to the Free
+ + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ + 02111-1307 USA. */
+ +
+ +/* Clear PA_GP_RELOC bit in relocation offset. */
+ +#define reloc_offset (reloc_arg & ~PA_GP_RELOC)
+ +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL)
+ +
+ +#include <elf/dl-runtime.c>
+ +
+ +/* The caller has encountered a partially relocated function descriptor.
+ + The gp of the descriptor has been updated, but not the ip. We find
+ + the function descriptor again and compute the relocation offset and
+ + return that to the caller. The caller will continue on to call
+ + _dl_fixup with the relocation offset. */
+ +
+ +ElfW(Word)
+ +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
+ +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l)
+ +{
+ + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type;
+ + const Elf32_Rela *reloc;
+ +
+ + l_addr = l->l_addr;
+ + jmprel = D_PTR(l, l_info[DT_JMPREL]);
+ + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val;
+ +
+ + /* Look for the entry... */
+ + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela))
+ + {
+ + reloc = (const Elf32_Rela *) iplt;
+ + r_type = ELF32_R_TYPE (reloc->r_info);
+ +
+ + if (__builtin_expect (r_type == R_PARISC_IPLT, 1)
+ + && fptr == (struct fdesc *) (reloc->r_offset + l_addr))
+ + /* Found entry. Return the reloc offset. */
+ + return iplt - jmprel;
+ + }
+ +
+ + /* Crash if we weren't passed a valid function pointer. */
+ + ABORT_INSTRUCTION;
+ + return 0;
+ +}
+ diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S
+ index 0114ca8b19..d0804b30c0 100644
+ --- a/sysdeps/hppa/dl-trampoline.S
+ +++ b/sysdeps/hppa/dl-trampoline.S
+ @@ -31,7 +31,7 @@
+ slow down __cffc when it attempts to call fixup to resolve function
+ descriptor references. Please refer to gcc/gcc/config/pa/fptr.c
- #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \
- - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \
- - (long) (a3), (long) (a4), \
- - (long) (number))
- + __mips_syscall5 ((long int) (a0), (long int) (a1), \
- + (long int) (a2), (long int) (a3), \
- + (long int) (a4), \
- + (long int) (number))
+ - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */
+ + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */
- #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \
- - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \
- - (long) (a3), (long) (a4), (long) (a5), \
- - (long) (number))
- + __mips_syscall6 ((long int) (a0), (long int) (a1), \
- + (long int) (a2), (long int) (a3), \
- + (long int) (a4), (long int) (a5), \
- + (long int) (number))
+ /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */
+ .text
+ @@ -61,17 +61,20 @@ _dl_runtime_resolve:
+ copy %sp, %r1 /* Copy previous sp */
+ /* Save function result address (on entry) */
+ stwm %r28,128(%sp)
+ - /* Fillin some frame info to follow ABI */
+ + /* Fill in some frame info to follow ABI */
+ stw %r1,-4(%sp) /* Previous sp */
+ stw %r21,-32(%sp) /* PIC register value */
- #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \
- - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \
- - (long) (a3), (long) (a4), (long) (a5), \
- - (long) (a6), \
- - (long) (number))
- + __mips_syscall7 ((long int) (a0), (long int) (a1), \
- + (long int) (a2), (long int) (a3), \
- + (long int) (a4), (long int) (a5), \
- + (long int) (a6), \
- + (long int) (number))
+ /* Save input floating point registers. This must be done
+ in the new frame since the previous frame doesn't have
+ enough space */
+ - ldo -56(%sp),%r1
+ + ldo -64(%sp),%r1
+ fstd,ma %fr4,-8(%r1)
+ fstd,ma %fr5,-8(%r1)
+ fstd,ma %fr6,-8(%r1)
+ +
+ + /* Test PA_GP_RELOC bit. */
+ + bb,>= %r19,31,2f /* branch if not reloc offset */
+ fstd,ma %fr7,-8(%r1)
- #endif
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
- index 92f16e2724..43c05f8050 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
- @@ -20,8 +20,8 @@
+ /* Set up args to fixup func, needs only two arguments */
+ @@ -79,7 +82,7 @@ _dl_runtime_resolve:
+ copy %r19,%r25 /* (2) reloc offset */
- #undef __mips16_syscall0
+ /* Call the real address resolver. */
+ - bl _dl_fixup,%rp
+ +3: bl _dl_fixup,%rp
+ copy %r21,%r19 /* set fixup func ltp */
- -long long __nomips16
- -__mips16_syscall0 (long number)
- +long long int __nomips16
- +__mips16_syscall0 (long int number)
- {
- union __mips_syscall_return ret;
- ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0);
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
- index fa985a96e5..16a567e834 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
- @@ -20,9 +20,9 @@
+ /* While the linker will set a function pointer to NULL when it
+ @@ -102,7 +105,7 @@ _dl_runtime_resolve:
+ copy %r29, %r19
- #undef __mips16_syscall1
+ /* Reload arguments fp args */
+ - ldo -56(%sp),%r1
+ + ldo -64(%sp),%r1
+ fldd,ma -8(%r1),%fr4
+ fldd,ma -8(%r1),%fr5
+ fldd,ma -8(%r1),%fr6
+ @@ -129,6 +132,25 @@ _dl_runtime_resolve:
+ bv %r0(%rp)
+ ldo -128(%sp),%sp
- -long long __nomips16
- -__mips16_syscall1 (long a0,
- - long number)
- +long long int __nomips16
- +__mips16_syscall1 (long int a0,
- + long int number)
- {
- union __mips_syscall_return ret;
- ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1,
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
- index f042ac815d..c0a856c344 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
- @@ -20,9 +20,9 @@
-
- #undef __mips16_syscall2
+ +2:
+ + /* Set up args for _dl_fix_reloc_arg. */
+ + copy %r22,%r26 /* (1) function pointer */
+ + depi 0,31,2,%r26 /* clear least significant bits */
+ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
+ +
+ + /* Save ltp and link map arg for _dl_fixup. */
+ + stw %r21,-56(%sp) /* ltp */
+ + stw %r25,-60(%sp) /* struct link map */
+ +
+ + /* Find reloc offset. */
+ + bl _dl_fix_reloc_arg,%rp
+ + copy %r21,%r19 /* set func ltp */
+ +
+ + /* Set up args for _dl_fixup. */
+ + ldw -56(%sp),%r21 /* ltp */
+ + ldw -60(%sp),%r26 /* (1) struct link map */
+ + b 3b
+ + copy %ret0,%r25 /* (2) reloc offset */
+ .EXIT
+ .PROCEND
+ cfi_endproc
+ @@ -153,7 +175,7 @@ _dl_runtime_profile:
+ copy %sp, %r1 /* Copy previous sp */
+ /* Save function result address (on entry) */
+ stwm %r28,192(%sp)
+ - /* Fillin some frame info to follow ABI */
+ + /* Fill in some frame info to follow ABI */
+ stw %r1,-4(%sp) /* Previous sp */
+ stw %r21,-32(%sp) /* PIC register value */
- -long long __nomips16
- -__mips16_syscall2 (long a0, long a1,
- - long number)
- +long long int __nomips16
- +__mips16_syscall2 (long int a0, long int a1,
- + long int number)
- {
- union __mips_syscall_return ret;
- ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2,
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
- index dfe2f7feb5..042768ebf2 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
- @@ -20,9 +20,9 @@
+ @@ -181,10 +203,11 @@ _dl_runtime_profile:
+ fstd,ma %fr5,8(%r1)
+ fstd,ma %fr6,8(%r1)
+ fstd,ma %fr7,8(%r1)
+ - /* 32-bit stack pointer and return register */
+ - stw %sp,-56(%sp)
+ - stw %r2,-52(%sp)
- #undef __mips16_syscall3
+ + /* Test PA_GP_RELOC bit. */
+ + bb,>= %r19,31,2f /* branch if not reloc offset */
+ + /* 32-bit stack pointer */
+ + stw %sp,-56(%sp)
- -long long __nomips16
- -__mips16_syscall3 (long a0, long a1, long a2,
- - long number)
- +long long int __nomips16
- +__mips16_syscall3 (long int a0, long int a1, long int a2,
- + long int number)
- {
- union __mips_syscall_return ret;
- ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3,
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
- index 39de510357..8658d822ab 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
- @@ -20,9 +20,9 @@
+ /* Set up args to fixup func, needs five arguments */
+ ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */
+ @@ -197,7 +220,7 @@ _dl_runtime_profile:
+ stw %r1, -52(%sp) /* (5) long int *framesizep */
- #undef __mips16_syscall4
+ /* Call the real address resolver. */
+ - bl _dl_profile_fixup,%rp
+ +3: bl _dl_profile_fixup,%rp
+ copy %r21,%r19 /* set fixup func ltp */
- -long long __nomips16
- -__mips16_syscall4 (long a0, long a1, long a2, long a3,
- - long number)
- +long long int __nomips16
- +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3,
- + long int number)
- {
- union __mips_syscall_return ret;
- ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4,
- diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
- index beefcf284b..0c6a83e9b3 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
- +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
- @@ -52,7 +52,7 @@
- #undef INLINE_SYSCALL
- #define INLINE_SYSCALL(name, nr, args...) \
- ({ INTERNAL_SYSCALL_DECL (_sc_err); \
- - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
- { \
- __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
- @@ -61,10 +61,10 @@
- result_var; })
+ /* Load up the returned function descriptor */
+ @@ -215,7 +238,9 @@ _dl_runtime_profile:
+ fldd,ma 8(%r1),%fr5
+ fldd,ma 8(%r1),%fr6
+ fldd,ma 8(%r1),%fr7
+ - ldw -52(%sp),%rp
+ +
+ + /* Reload rp register -(192+20) without adjusting stack */
+ + ldw -212(%sp),%rp
- #undef INTERNAL_SYSCALL_DECL
- -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
- +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+ /* Reload static link register -(192+16) without adjusting stack */
+ ldw -208(%sp),%r29
+ @@ -303,6 +328,33 @@ L(cont):
+ ldw -20(%sp),%rp
+ /* Return */
+ bv,n 0(%r2)
+ +
+ +2:
+ + /* Set up args for _dl_fix_reloc_arg. */
+ + copy %r22,%r26 /* (1) function pointer */
+ + depi 0,31,2,%r26 /* clear least significant bits */
+ + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */
+ +
+ + /* Save ltp and link map arg for _dl_fixup. */
+ + stw %r21,-92(%sp) /* ltp */
+ + stw %r25,-116(%sp) /* struct link map */
+ +
+ + /* Find reloc offset. */
+ + bl _dl_fix_reloc_arg,%rp
+ + copy %r21,%r19 /* set func ltp */
+ +
+ + /* Restore fixup ltp. */
+ + ldw -92(%sp),%r21 /* ltp */
+ +
+ + /* Set up args to fixup func, needs five arguments */
+ + ldw -116(%sp),%r26 /* (1) struct link map */
+ + copy %ret0,%r25 /* (2) reloc offset */
+ + stw %r25,-120(%sp) /* Save reloc offset */
+ + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */
+ + ldo -56(%sp),%r23 /* (4) La_hppa_regs */
+ + ldo -112(%sp), %r1
+ + b 3b
+ + stw %r1, -52(%sp) /* (5) long int *framesizep */
+ .EXIT
+ .PROCEND
+ cfi_endproc
+ diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
+ index 8af0789a9c..4334ade2a0 100644
+ --- a/sysdeps/i386/dl-machine.h
+ +++ b/sysdeps/i386/dl-machine.h
+ @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
+ {
+ # ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+ - && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ - _dl_error_printf ("\
+ + if (sym_map->l_type == lt_executable)
+ + _dl_fatal_printf ("\
+ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
+ +and creates an unsatisfiable circular dependency.\n",
+ + RTLD_PROGNAME, strtab + refsym->st_name,
+ + map->l_name);
+ + else
+ + _dl_error_printf ("\
+ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+ - RTLD_PROGNAME, map->l_name,
+ - sym_map->l_name,
+ - strtab + refsym->st_name);
+ + RTLD_PROGNAME, map->l_name,
+ + sym_map->l_name,
+ + strtab + refsym->st_name);
+ }
+ # endif
+ value = ((Elf32_Addr (*) (void)) value) ();
+ diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h
+ index b4bcd8fb6c..6094af8fec 100644
+ --- a/sysdeps/i386/sysdep.h
+ +++ b/sysdeps/i386/sysdep.h
+ @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \
- #undef INTERNAL_SYSCALL_ERROR_P
- -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
- +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+ # define SETUP_PIC_REG(reg) \
+ .ifndef GET_PC_THUNK(reg); \
+ - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \
+ + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \
+ .globl GET_PC_THUNK(reg); \
+ .hidden GET_PC_THUNK(reg); \
+ .p2align 4; \
+ @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \
- #undef INTERNAL_SYSCALL_ERRNO
- #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
- @@ -103,11 +103,11 @@
+ # define SETUP_PIC_REG_STR(reg) \
+ ".ifndef " GET_PC_THUNK_STR (reg) "\n" \
+ - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \
+ + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \
+ + GET_PC_THUNK_STR (reg) ",comdat\n" \
+ ".globl " GET_PC_THUNK_STR (reg) "\n" \
+ ".hidden " GET_PC_THUNK_STR (reg) "\n" \
+ ".p2align 4\n" \
+ diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile
+ index 995e90d6da..6030adf7e7 100644
+ --- a/sysdeps/ieee754/ldbl-96/Makefile
+ +++ b/sysdeps/ieee754/ldbl-96/Makefile
+ @@ -17,5 +17,8 @@
+ # <https://www.gnu.org/licenses/>.
- union __mips_syscall_return
- {
- - long long val;
- + long long int val;
- struct
- {
- - long v0;
- - long v1;
- + long int v0;
- + long int v1;
- }
- reg;
- };
- @@ -152,13 +152,13 @@ union __mips_syscall_return
+ ifeq ($(subdir),math)
+ -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96
+ +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo
+ +ifeq ($(have-ssp),yes)
+ +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all
+ endif
+ +endif # $(subdir) == math
+ diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
+ index 5f742321ae..bcdf20179f 100644
+ --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
+ +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c
+ @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y)
+ return 0;
+ }
- #define internal_syscall0(v0_init, input, number, err, dummy...) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -175,14 +175,15 @@ union __mips_syscall_return
-
- #define internal_syscall1(v0_init, input, number, err, arg1) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -199,15 +200,17 @@ union __mips_syscall_return
+ + if ((i0 & 0x80000000) == 0)
+ + {
+ + /* Pseudo-zero and unnormal representations are not valid
+ + representations of long double. We need to avoid stack
+ + corruption in __kernel_rem_pio2, which expects input in a
+ + particular normal form, but those representations do not need
+ + to be consistently handled like any particular floating-point
+ + value. */
+ + y[1] = y[0] = __builtin_nanl ("");
+ + return 0;
+ + }
+ +
+ /* Split the 64 bits of the mantissa into three 24-bit integers
+ stored in a double array. */
+ exp = j0 - 23;
+ diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c
+ new file mode 100644
+ index 0000000000..f59b97769d
+ --- /dev/null
+ +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c
+ @@ -0,0 +1,41 @@
+ +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487).
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <math.h>
+ +#include <math_ldbl.h>
+ +#include <stdint.h>
+ +
+ +static int
+ +do_test (void)
+ +{
+ + for (int i = 0; i < 64; i++)
+ + {
+ + uint64_t sig = i == 63 ? 0 : 1ULL << i;
+ + long double ld;
+ + SET_LDOUBLE_WORDS (ld, 0x4141,
+ + sig >> 32, sig & 0xffffffffULL);
+ + /* The requirement is that no stack overflow occurs when the
+ + pseudo-zero or unnormal goes through range reduction. */
+ + volatile long double ldr;
+ + ldr = sinl (ld);
+ + (void) ldr;
+ + }
+ + return 0;
+ +}
+ +
+ +#include <support/test-driver.c>
+ diff --git a/sysdeps/posix/getcwd.c b/sysdeps/posix/getcwd.c
+ index f00b337a13..839d78d7b7 100644
+ --- a/sysdeps/posix/getcwd.c
+ +++ b/sysdeps/posix/getcwd.c
+ @@ -241,6 +241,14 @@ __getcwd (char *buf, size_t size)
+ char *path;
+ #ifndef NO_ALLOCATION
+ size_t allocated = size;
+ +
+ + /* A size of 1 byte is never useful. */
+ + if (allocated == 1)
+ + {
+ + __set_errno (ERANGE);
+ + return NULL;
+ + }
+ +
+ if (size == 0)
+ {
+ if (buf != NULL)
+ diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c
+ index e613e6a344..a03f478fc7 100644
+ --- a/sysdeps/posix/system.c
+ +++ b/sysdeps/posix/system.c
+ @@ -101,7 +101,8 @@ cancel_handler (void *arg)
+ static int
+ do_system (const char *line)
+ {
+ - int status;
+ + int status = -1;
+ + int ret;
+ pid_t pid;
+ struct sigaction sa;
+ #ifndef _LIBC_REENTRANT
+ @@ -144,14 +145,14 @@ do_system (const char *line)
+ __posix_spawnattr_setflags (&spawn_attr,
+ POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK);
- #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -225,16 +228,19 @@ union __mips_syscall_return
- #define internal_syscall3(v0_init, input, number, err, \
- arg1, arg2, arg3) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -252,16 +258,20 @@ union __mips_syscall_return
- #define internal_syscall4(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + long int _arg4 = (long int) (arg4); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7") = (long) (arg4); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7") = _arg4; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -285,63 +295,66 @@ union __mips_syscall_return
- compiler specifics required for the stack arguments to be pushed,
- which would be the case if these syscalls were inlined. */
+ - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr,
+ - (char *const[]){ (char*) SHELL_NAME,
+ - (char*) "-c",
+ - (char *) line, NULL },
+ - __environ);
+ + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr,
+ + (char *const[]){ (char *) SHELL_NAME,
+ + (char *) "-c",
+ + (char *) line, NULL },
+ + __environ);
+ __posix_spawnattr_destroy (&spawn_attr);
- -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3,
- - long arg4, long arg5,
- - long number);
- +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2,
- + long int arg3, long int arg4,
- + long int arg5,
- + long int number);
- libc_hidden_proto (__mips_syscall5, nomips16)
+ - if (status == 0)
+ + if (ret == 0)
+ {
+ /* Cancellation results in cleanup handlers running as exceptions in
+ the block where they were installed, so it is safe to reference
+ @@ -186,6 +187,9 @@ do_system (const char *line)
+ }
+ DO_UNLOCK ();
- #define internal_syscall5(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5) \
- ({ \
- union __mips_syscall_return _sc_ret; \
- - _sc_ret.val = __mips_syscall5 ((long) (arg1), \
- - (long) (arg2), \
- - (long) (arg3), \
- - (long) (arg4), \
- - (long) (arg5), \
- - (long) (number)); \
- + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \
- + (long int) (arg2), \
- + (long int) (arg3), \
- + (long int) (arg4), \
- + (long int) (arg5), \
- + (long int) (number)); \
- err = _sc_ret.reg.v1; \
- _sc_ret.reg.v0; \
- })
+ + if (ret != 0)
+ + __set_errno (ret);
+ +
+ return status;
+ }
- -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3,
- - long arg4, long arg5, long arg6,
- - long number);
- +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2,
- + long int arg3, long int arg4,
- + long int arg5, long int arg6,
- + long int number);
- libc_hidden_proto (__mips_syscall6, nomips16)
-
- #define internal_syscall6(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5, arg6) \
- ({ \
- union __mips_syscall_return _sc_ret; \
- - _sc_ret.val = __mips_syscall6 ((long) (arg1), \
- - (long) (arg2), \
- - (long) (arg3), \
- - (long) (arg4), \
- - (long) (arg5), \
- - (long) (arg6), \
- - (long) (number)); \
- + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \
- + (long int) (arg2), \
- + (long int) (arg3), \
- + (long int) (arg4), \
- + (long int) (arg5), \
- + (long int) (arg6), \
- + (long int) (number)); \
- err = _sc_ret.reg.v1; \
- _sc_ret.reg.v0; \
- })
-
- -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3,
- - long arg4, long arg5, long arg6,
- - long arg7,
- - long number);
- +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2,
- + long int arg3, long int arg4,
- + long int arg5, long int arg6,
- + long int arg7,
- + long int number);
- libc_hidden_proto (__mips_syscall7, nomips16)
-
- #define internal_syscall7(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
- ({ \
- union __mips_syscall_return _sc_ret; \
- - _sc_ret.val = __mips_syscall7 ((long) (arg1), \
- - (long) (arg2), \
- - (long) (arg3), \
- - (long) (arg4), \
- - (long) (arg5), \
- - (long) (arg6), \
- - (long) (arg7), \
- - (long) (number)); \
- + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \
- + (long int) (arg2), \
- + (long int) (arg3), \
- + (long int) (arg4), \
- + (long int) (arg5), \
- + (long int) (arg6), \
- + (long int) (arg7), \
- + (long int) (number)); \
- err = _sc_ret.reg.v1; \
- _sc_ret.reg.v0; \
- })
- diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
- index f96636538a..4a9d7054f9 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
- +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
- @@ -47,14 +47,14 @@
-
- /* Convert X to a long long, without losing any bits if it is one
- already or warning if it is a 32-bit pointer. */
- -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X))
- +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X))
-
- /* Define a macro which expands into the inline wrapper code for a system
- call. */
- #undef INLINE_SYSCALL
- #define INLINE_SYSCALL(name, nr, args...) \
- ({ INTERNAL_SYSCALL_DECL (_sc_err); \
- - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
- { \
- __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
- @@ -63,10 +63,10 @@
- result_var; })
-
- #undef INTERNAL_SYSCALL_DECL
- -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
- +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+ diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h
+ index 2ba009e919..829eec266a 100644
+ --- a/sysdeps/powerpc/powerpc32/sysdep.h
+ +++ b/sysdeps/powerpc/powerpc32/sysdep.h
+ @@ -179,8 +179,8 @@ GOT_LABEL: ; \
+ #else
+ /* Position-dependent code does not require access to the GOT. */
+ # define __GLRO(rOUT, rGOT, member, offset) \
+ - lis rOUT,(member+LOWORD)@ha; \
+ - lwz rOUT,(member+LOWORD)@l(rOUT)
+ + lis rOUT,(member)@ha; \
+ + lwz rOUT,(member)@l(rOUT)
+ #endif /* PIC */
- #undef INTERNAL_SYSCALL_ERROR_P
- -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
- +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+ #endif /* __ASSEMBLER__ */
+ diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
+ index 8a53a1088f..362a2b713c 100644
+ --- a/sysdeps/powerpc/powerpc64/backtrace.c
+ +++ b/sysdeps/powerpc/powerpc64/backtrace.c
+ @@ -54,11 +54,22 @@ struct signal_frame_64 {
+ /* We don't care about the rest, since the IP value is at 'uc' field. */
+ };
- #undef INTERNAL_SYSCALL_ERRNO
- #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
- @@ -112,13 +112,13 @@
+ +/* Test if the address match to the inside the trampoline code.
+ + Up to and including kernel 5.8, returning from an interrupt or syscall to a
+ + signal handler starts execution directly at the handler's entry point, with
+ + LR set to address of the sigreturn trampoline (the vDSO symbol).
+ + Newer kernels will branch to signal handler from the trampoline instead, so
+ + checking the stacktrace against the vDSO entrypoint does not work in such
+ + case.
+ + The vDSO branches with a 'bctrl' instruction, so checking either the
+ + vDSO address itself and the next instruction should cover all kernel
+ + versions. */
+ static inline bool
+ is_sigtramp_address (void *nip)
+ {
+ #ifdef HAVE_SIGTRAMP_RT64
+ - if (nip == GLRO (dl_vdso_sigtramp_rt64))
+ + if (nip == GLRO (dl_vdso_sigtramp_rt64) ||
+ + nip == GLRO (dl_vdso_sigtramp_rt64) + 4)
+ return true;
+ #endif
+ return false;
+ diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
+ index fa46e9e351..e7f576338d 100644
+ --- a/sysdeps/s390/configure
+ +++ b/sysdeps/s390/configure
+ @@ -123,7 +123,9 @@ void testinsn (char *buf)
+ __asm__ (".machine \"arch13\" \n\t"
+ ".machinemode \"zarch_nohighgprs\" \n\t"
+ "lghi %%r0,16 \n\t"
+ - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+ + "mvcrl 0(%0),32(%0) \n\t"
+ + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
+ + : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ @@ -271,7 +273,9 @@ else
+ void testinsn (char *buf)
+ {
+ __asm__ ("lghi %%r0,16 \n\t"
+ - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+ + "mvcrl 0(%0),32(%0) \n\t"
+ + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
+ + : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+ diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
+ index 3ed5a8ef87..5c3479e8cf 100644
+ --- a/sysdeps/s390/configure.ac
+ +++ b/sysdeps/s390/configure.ac
+ @@ -88,7 +88,9 @@ void testinsn (char *buf)
+ __asm__ (".machine \"arch13\" \n\t"
+ ".machinemode \"zarch_nohighgprs\" \n\t"
+ "lghi %%r0,16 \n\t"
+ - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+ + "mvcrl 0(%0),32(%0) \n\t"
+ + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
+ + : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ dnl test, if assembler supports S390 arch13 instructions
+ @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF
+ void testinsn (char *buf)
+ {
+ __asm__ ("lghi %%r0,16 \n\t"
+ - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
+ + "mvcrl 0(%0),32(%0) \n\t"
+ + "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
+ + : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ dnl test, if assembler supports S390 arch13 zarch instructions as default
+ diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
+ index 5fc85e129f..ee59b5de14 100644
+ --- a/sysdeps/s390/memmove.c
+ +++ b/sysdeps/s390/memmove.c
+ @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
+ s390_libc_ifunc_expr (__redirect_memmove, memmove,
+ ({
+ s390_libc_ifunc_expr_stfle_init ();
+ - (HAVE_MEMMOVE_ARCH13
+ + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2)
+ && S390_IS_ARCH13_MIE3 (stfle_bits))
+ ? MEMMOVE_ARCH13
+ : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
+ diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
+ index e6195c6e26..17c0cc3952 100644
+ --- a/sysdeps/s390/multiarch/ifunc-impl-list.c
+ +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
+ @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, memmove,
+ # if HAVE_MEMMOVE_ARCH13
+ IFUNC_IMPL_ADD (array, i, memmove,
+ - S390_IS_ARCH13_MIE3 (stfle_bits),
+ + ((dl_hwcap & HWCAP_S390_VXRS_EXT2)
+ + && S390_IS_ARCH13_MIE3 (stfle_bits)),
+ MEMMOVE_ARCH13)
+ # endif
+ # if HAVE_MEMMOVE_Z13
+ diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies
+ new file mode 100644
+ index 0000000000..71b28ee1a4
+ --- /dev/null
+ +++ b/sysdeps/sh/be/sh4/fpu/Implies
+ @@ -0,0 +1 @@
+ +sh/sh4/fpu
+ diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies
+ new file mode 100644
+ index 0000000000..71b28ee1a4
+ --- /dev/null
+ +++ b/sysdeps/sh/le/sh4/fpu/Implies
+ @@ -0,0 +1 @@
+ +sh/sh4/fpu
+ diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh
+ index c07626677f..4f6c3490a2 100644
+ --- a/sysdeps/unix/make-syscalls.sh
+ +++ b/sysdeps/unix/make-syscalls.sh
+ @@ -30,6 +30,7 @@
+ # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction)
+ # s: non-NULL string (e.g., 1st arg to open)
+ # S: optionally-NULL string (e.g., 1st arg to acct)
+ +# U: unsigned long int (32-bit types are zero-extended to 64-bit types)
+ # v: vararg scalar (e.g., optional 3rd arg to open)
+ # V: byte-per-page vector (3rd arg to mincore)
+ # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4)
+ @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do
+ ?:?????????) nargs=9;;
+ esac
- #define internal_syscall0(v0_init, input, number, err, dummy...) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a3 asm ("$7"); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
+ + # Derive the unsigned long int arguments from the argument signature
+ + ulong_arg_1=0
+ + ulong_arg_2=0
+ + ulong_count=0
+ + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U)
+ + do
+ + ulong_count=$(expr $ulong_count + 1)
+ + ulong_arg=$(echo $U | sed -e "s/:U//")
+ + case $ulong_count in
+ + 1)
+ + ulong_arg_1=$ulong_arg
+ + ;;
+ + 2)
+ + ulong_arg_2=$ulong_arg
+ + ;;
+ + *)
+ + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)"
+ + exit 2
+ + esac
+ + done
+ +
+ # Make sure only the first syscall rule is used, if multiple dirs
+ # define the same syscall.
+ echo ''
+ @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do
+ \$(make-target-directory)
+ (echo '#define SYSCALL_NAME $syscall'; \\
+ echo '#define SYSCALL_NARGS $nargs'; \\
+ + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\
+ + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\
+ echo '#define SYSCALL_SYMBOL $strong'; \\
+ echo '#define SYSCALL_NOERRNO $noerrno'; \\
+ echo '#define SYSCALL_ERRVAL $errval'; \\
+ diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S
+ index cf6c7a58fb..f807a8603f 100644
+ --- a/sysdeps/unix/syscall-template.S
+ +++ b/sysdeps/unix/syscall-template.S
+ @@ -25,6 +25,12 @@
+ defining a few macros:
+ SYSCALL_NAME syscall name
+ SYSCALL_NARGS number of arguments this call takes
+ + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this
+ + call takes. 0 means that there are no
+ + unsigned long int arguments.
+ + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this
+ + call takes. 0 means that there is at most
+ + one unsigned long int argument.
+ SYSCALL_SYMBOL primary symbol name
+ SYSCALL_NOERRNO 1 to define a no-errno version (see below)
+ SYSCALL_ERRVAL 1 to define an error-value version (see below)
+ @@ -44,9 +50,31 @@
+ /* This indirection is needed so that SYMBOL gets macro-expanded. */
+ #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL)
+
+ -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N)
+ -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N)
+ -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N)
+ +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros
+ + have 2 extra arguments for unsigned long int arguments:
+ + Extra argument 1: Position of the first unsigned long int argument.
+ + Extra argument 2: Position of the second unsigned long int argument.
+ + */
+ +#ifndef PSEUDOS_HAVE_ULONG_INDICES
+ +# undef SYSCALL_ULONG_ARG_1
+ +# define SYSCALL_ULONG_ARG_1 0
+ +#endif
+ +
+ +#if SYSCALL_ULONG_ARG_1
+ +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \
+ + PSEUDO (SYMBOL, NAME, N, U1, U2)
+ +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \
+ + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2)
+ +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \
+ + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2)
+ +#else
+ +# define T_PSEUDO(SYMBOL, NAME, N) \
+ + PSEUDO (SYMBOL, NAME, N)
+ +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \
+ + PSEUDO_NOERRNO (SYMBOL, NAME, N)
+ +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \
+ + PSEUDO_ERRVAL (SYMBOL, NAME, N)
+ +#endif
+ #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL)
+ #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL)
+ #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL)
+ @@ -56,7 +84,12 @@
+ /* This kind of system call stub never returns an error.
+ We return the return value register to the caller unexamined. */
+
+ +# if SYSCALL_ULONG_ARG_1
+ +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
+ + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
+ +# else
+ T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
+ +# endif
+ ret_NOERRNO
+ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL)
+
+ @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL)
+ value, or zero for success. We may massage the kernel's return value
+ to meet that ABI, but we never set errno here. */
+
+ +# if SYSCALL_ULONG_ARG_1
+ +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
+ + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
+ +# else
+ T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
+ +# endif
+ ret_ERRVAL
+ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL)
+
+ @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL)
+ /* This is a "normal" system call stub: if there is an error,
+ it returns -1 and sets errno. */
+
+ +# if SYSCALL_ULONG_ARG_1
+ +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS,
+ + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2)
+ +# else
+ T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS)
+ +# endif
+ ret
+ T_PSEUDO_END (SYSCALL_SYMBOL)
+
+ diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list
+ index e28e801c7a..6b22b2cb45 100644
+ --- a/sysdeps/unix/syscalls.list
+ +++ b/sysdeps/unix/syscalls.list
+ @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill
+ link - link i:ss __link link
+ listen - listen i:ii __listen listen
+ lseek - lseek i:iii __libc_lseek __lseek lseek
+ -madvise - madvise i:pii __madvise madvise
+ +madvise - madvise i:pUi __madvise madvise
+ mkdir - mkdir i:si __mkdir mkdir
+ -mmap - mmap b:aniiii __mmap mmap
+ -mprotect - mprotect i:aii __mprotect mprotect
+ -munmap - munmap i:ai __munmap munmap
+ +mmap - mmap b:aUiiii __mmap mmap
+ +mprotect - mprotect i:aUi __mprotect mprotect
+ +munmap - munmap i:aU __munmap munmap
+ open - open Ci:siv __libc_open __open open
+ profil - profil i:piii __profil profil
+ ptrace - ptrace i:iiii ptrace
+ -read - read Ci:ibn __libc_read __read read
+ -readlink - readlink i:spi __readlink readlink
+ +read - read Ci:ibU __libc_read __read read
+ +readlink - readlink i:spU __readlink readlink
+ readv - readv Ci:ipi __readv readv
+ reboot - reboot i:i reboot
+ -recv - recv Ci:ibni __libc_recv recv
+ -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom
+ +recv - recv Ci:ibUi __libc_recv recv
+ +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom
+ recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg
+ rename - rename i:ss rename
+ rmdir - rmdir i:s __rmdir rmdir
+ select - select Ci:iPPPP __select __libc_select select
+ -send - send Ci:ibni __libc_send __send send
+ +send - send Ci:ibUi __libc_send __send send
+ sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg
+ -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto
+ +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto
+ setdomain - setdomainname i:si setdomainname
+ setegid - setegid i:i __setegid setegid
+ seteuid - seteuid i:i __seteuid seteuid
+ @@ -94,5 +94,5 @@ uname - uname i:p __uname uname
+ unlink - unlink i:s __unlink unlink
+ utimes - utimes i:sp __utimes utimes
+ vhangup - vhangup i:i vhangup
+ -write - write Ci:ibn __libc_write __write write
+ +write - write Ci:ibU __libc_write __write write
+ writev - writev Ci:ipi __writev writev
+ diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
+ index f12b7b1a2d..0a0da00151 100644
+ --- a/sysdeps/unix/sysv/linux/Makefile
+ +++ b/sysdeps/unix/sysv/linux/Makefile
+ @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \
+ setfsuid setfsgid epoll_pwait signalfd \
+ eventfd eventfd_read eventfd_write prlimit \
+ personality epoll_wait tee vmsplice splice \
+ - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get
+ + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \
+ + prctl \
+ + process_vm_readv process_vm_writev
+
+ CFLAGS-gethostid.c = -fexceptions
+ CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables
+ @@ -273,7 +275,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \
+
+ sysdep_headers += bits/fcntl-linux.h
+
+ -tests += tst-fallocate tst-fallocate64
+ +tests += tst-fallocate tst-fallocate64 tst-getcwd-smallbuff
+ endif
+
+ ifeq ($(subdir),elf)
+ diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+ index 9378387747..c8471947b9 100644
+ --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+ +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h
+ @@ -17,6 +17,7 @@
+ #define __NR_clock_nanosleep 115
+ #define __NR_clock_settime 112
+ #define __NR_clone 220
+ +#define __NR_clone3 435
+ #define __NR_close 57
+ #define __NR_connect 203
+ #define __NR_copy_file_range 285
+ diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+ index 1389cea1b3..346d045fb4 100644
+ --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+ +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+ @@ -51,8 +51,12 @@
+
+ #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \
+ && MIDR_PARTNUM(midr) == 0x000)
+ -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
+ - && MIDR_PARTNUM(midr) == 0xd0c)
+ +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
+ + && MIDR_PARTNUM(midr) == 0xd0c)
+ +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
+ + && MIDR_PARTNUM(midr) == 0xd49)
+ +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
+ + && MIDR_PARTNUM(midr) == 0xd40)
+
+ #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \
+ && MIDR_PARTNUM(midr) == 0x000)
+ diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data
+ index a60053b914..08af68b5e8 100644
+ --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data
+ +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data
+ @@ -7,6 +7,9 @@ libc.so: malloc
+ libc.so: memalign
+ libc.so: realloc
+ libm.so: matherr
+ +# If outline atomics are used, libgcc (built outside of glibc) may
+ +# call __getauxval using the PLT.
+ +libc.so: __getauxval ?
+ # The dynamic loader needs __tls_get_addr for TLS.
+ ld.so: __tls_get_addr
+ # The main malloc is interposed into the dynamic linker, for
+ diff --git a/sysdeps/unix/sysv/linux/getpt.c b/sysdeps/unix/sysv/linux/getpt.c
+ index 1803b232c9..3cc745e11a 100644
+ --- a/sysdeps/unix/sysv/linux/getpt.c
+ +++ b/sysdeps/unix/sysv/linux/getpt.c
+ @@ -16,69 +16,18 @@
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+ -#include <errno.h>
+ #include <fcntl.h>
+ -#include <stdlib.h>
+ #include <unistd.h>
+ #include <paths.h>
+ -#include <sys/statfs.h>
+ -
+ -#include "linux_fsinfo.h"
+
+ /* Path to the master pseudo terminal cloning device. */
+ #define _PATH_DEVPTMX _PATH_DEV "ptmx"
+ -/* Directory containing the UNIX98 pseudo terminals. */
+ -#define _PATH_DEVPTS _PATH_DEV "pts"
+ -
+ -/* Prototype for function that opens BSD-style master pseudo-terminals. */
+ -extern int __bsd_getpt (void) attribute_hidden;
+
+ /* Open a master pseudo terminal and return its file descriptor. */
+ int
+ __posix_openpt (int oflag)
+ {
+ - static int have_no_dev_ptmx;
+ - int fd;
+ -
+ - if (!have_no_dev_ptmx)
+ - {
+ - fd = __open (_PATH_DEVPTMX, oflag);
+ - if (fd != -1)
+ - {
+ - struct statfs fsbuf;
+ - static int devpts_mounted;
+ -
+ - /* Check that the /dev/pts filesystem is mounted
+ - or if /dev is a devfs filesystem (this implies /dev/pts). */
+ - if (devpts_mounted
+ - || (__statfs (_PATH_DEVPTS, &fsbuf) == 0
+ - && fsbuf.f_type == DEVPTS_SUPER_MAGIC)
+ - || (__statfs (_PATH_DEV, &fsbuf) == 0
+ - && fsbuf.f_type == DEVFS_SUPER_MAGIC))
+ - {
+ - /* Everything is ok. */
+ - devpts_mounted = 1;
+ - return fd;
+ - }
+ -
+ - /* If /dev/pts is not mounted then the UNIX98 pseudo terminals
+ - are not usable. */
+ - __close (fd);
+ - have_no_dev_ptmx = 1;
+ - __set_errno (ENOENT);
+ - }
+ - else
+ - {
+ - if (errno == ENOENT || errno == ENODEV)
+ - have_no_dev_ptmx = 1;
+ - else
+ - return -1;
+ - }
+ - }
+ - else
+ - __set_errno (ENOENT);
+ -
+ - return -1;
+ + return __open (_PATH_DEVPTMX, oflag);
+ }
+ weak_alias (__posix_openpt, posix_openpt)
+
+ @@ -86,16 +35,6 @@ weak_alias (__posix_openpt, posix_openpt)
+ int
+ __getpt (void)
+ {
+ - int fd = __posix_openpt (O_RDWR);
+ - if (fd == -1)
+ - fd = __bsd_getpt ();
+ - return fd;
+ + return __posix_openpt (O_RDWR);
+ }
+ -
+ -
+ -#define PTYNAME1 "pqrstuvwxyzabcde";
+ -#define PTYNAME2 "0123456789abcdef";
+ -
+ -#define __getpt __bsd_getpt
+ -#define HAVE_POSIX_OPENPT
+ -#include <sysdeps/unix/bsd/getpt.c>
+ +weak_alias (__getpt, getpt)
+ diff --git a/sysdeps/unix/sysv/linux/grantpt.c b/sysdeps/unix/sysv/linux/grantpt.c
+ index 2030e07fa6..43122f9a76 100644
+ --- a/sysdeps/unix/sysv/linux/grantpt.c
+ +++ b/sysdeps/unix/sysv/linux/grantpt.c
+ @@ -1,44 +1,41 @@
+ -#include <assert.h>
+ -#include <ctype.h>
+ -#include <dirent.h>
+ -#include <errno.h>
+ -#include <fcntl.h>
+ -#include <paths.h>
+ -#include <stdlib.h>
+ -#include <unistd.h>
+ +/* grantpt implementation for Linux.
+ + Copyright (C) 1998-2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ + Contributed by Zack Weinberg <zack@rabi.phys.columbia.edu>, 1998.
+
+ -#include <not-cancel.h>
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+
+ -#include "pty-private.h"
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+
+ -#if HAVE_PT_CHOWN
+ -/* Close all file descriptors except the one specified. */
+ -static void
+ -close_all_fds (void)
+ -{
+ - DIR *dir = __opendir ("/proc/self/fd");
+ - if (dir != NULL)
+ - {
+ - struct dirent64 *d;
+ - while ((d = __readdir64 (dir)) != NULL)
+ - if (isdigit (d->d_name[0]))
+ - {
+ - char *endp;
+ - long int fd = strtol (d->d_name, &endp, 10);
+ - if (*endp == '\0' && fd != PTY_FILENO && fd != dirfd (dir))
+ - __close_nocancel_nostatus (fd);
+ - }
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <errno.h>
+ +#include <stdlib.h>
+ +#include <sys/ioctl.h>
+ +#include <termios.h>
+
+ - __closedir (dir);
+ +int
+ +grantpt (int fd)
+ +{
+ + /* Without pt_chown on Linux, we have delegated the creation of the
+ + pty node with the right group and permission mode to the kernel, and
+ + non-root users are unlikely to be able to change it. Therefore let's
+ + consider that POSIX enforcement is the responsibility of the whole
+ + system and not only the GNU libc. */
+
+ - int nullfd = __open_nocancel (_PATH_DEVNULL, O_RDONLY);
+ - assert (nullfd == STDIN_FILENO);
+ - nullfd = __open_nocancel (_PATH_DEVNULL, O_WRONLY);
+ - assert (nullfd == STDOUT_FILENO);
+ - __dup2 (STDOUT_FILENO, STDERR_FILENO);
+ - }
+ + /* Verify that fd refers to a ptmx descriptor. */
+ + unsigned int ptyno;
+ + int ret = __ioctl (fd, TIOCGPTN, &ptyno);
+ + if (ret != 0 && errno == ENOTTY)
+ + /* POSIX requires EINVAL instead of ENOTTY provided by the kernel. */
+ + __set_errno (EINVAL);
+ + return ret;
+ }
+ -# define CLOSE_ALL_FDS() close_all_fds()
+ -#endif
+ -
+ -#include <sysdeps/unix/grantpt.c>
+ diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+ index 9d8ffbe860..bf61b66b70 100644
+ --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+ +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h
+ @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t;
+ typedef intmax_t atomic_max_t;
+ typedef uintmax_t uatomic_max_t;
+
+ +#define atomic_full_barrier() __sync_synchronize ()
+ +
+ #define __HAVE_64B_ATOMICS 0
+ #define USE_ATOMIC_COMPILER_BUILTINS 0
+
+ +/* We use the compiler atomic load and store builtins as the generic
+ + defines are not atomic. In particular, we need to use compare and
+ + exchange for stores as the implementation is synthesized. */
+ +void __atomic_link_error (void);
+ +#define __atomic_check_size_ls(mem) \
+ + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
+ + __atomic_link_error ();
+ +
+ +#define atomic_load_relaxed(mem) \
+ + ({ __atomic_check_size_ls((mem)); \
+ + __atomic_load_n ((mem), __ATOMIC_RELAXED); })
+ +#define atomic_load_acquire(mem) \
+ + ({ __atomic_check_size_ls((mem)); \
+ + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
+ +
+ +#define atomic_store_relaxed(mem, val) \
+ + do { \
+ + __atomic_check_size_ls((mem)); \
+ + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
+ + } while (0)
+ +#define atomic_store_release(mem, val) \
+ + do { \
+ + __atomic_check_size_ls((mem)); \
+ + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
+ + } while (0)
+ +
+ /* XXX Is this actually correct? */
+ #define ATOMIC_EXCHANGE_USES_CAS 1
+
+ diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h
+ index ed873d9dd4..796663a23a 100644
+ --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h
+ @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall0(name,dummy) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r12) \
+ @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall1(name,arg1) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ + long int __arg1 = (long int) (arg1); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r12) \
+ @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall2(name,arg1,arg2) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ - register long __r6 __asm__("r6") = (long)(arg2); \
+ + long int __arg1 = (long int) (arg1); \
+ + long int __arg2 = (long int) (arg2); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ + register long int __r6 __asm__("r6") = __arg2; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r6), "r"(__r12) \
+ @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall3(name,arg1,arg2,arg3) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ - register long __r6 __asm__("r6") = (long)(arg2); \
+ - register long __r7 __asm__("r7") = (long)(arg3); \
+ + long int __arg1 = (long int) (arg1); \
+ + long int __arg2 = (long int) (arg2); \
+ + long int __arg3 = (long int) (arg3); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ + register long int __r6 __asm__("r6") = __arg2; \
+ + register long int __r7 __asm__("r7") = __arg3; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \
+ @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall4(name,arg1,arg2,arg3,arg4) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ - register long __r6 __asm__("r6") = (long)(arg2); \
+ - register long __r7 __asm__("r7") = (long)(arg3); \
+ - register long __r8 __asm__("r8") = (long)(arg4); \
+ + long int __arg1 = (long int) (arg1); \
+ + long int __arg2 = (long int) (arg2); \
+ + long int __arg3 = (long int) (arg3); \
+ + long int __arg4 = (long int) (arg4); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ + register long int __r6 __asm__("r6") = __arg2; \
+ + register long int __r7 __asm__("r7") = __arg3; \
+ + register long int __r8 __asm__("r8") = __arg4; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \
+ @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ - register long __r6 __asm__("r6") = (long)(arg2); \
+ - register long __r7 __asm__("r7") = (long)(arg3); \
+ - register long __r8 __asm__("r8") = (long)(arg4); \
+ - register long __r9 __asm__("r9") = (long)(arg5); \
+ + long int __arg1 = (long int) (arg1); \
+ + long int __arg2 = (long int) (arg2); \
+ + long int __arg3 = (long int) (arg3); \
+ + long int __arg4 = (long int) (arg4); \
+ + long int __arg5 = (long int) (arg5); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ + register long int __r6 __asm__("r6") = __arg2; \
+ + register long int __r7 __asm__("r7") = __arg3; \
+ + register long int __r8 __asm__("r8") = __arg4; \
+ + register long int __r9 __asm__("r9") = __arg5; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \
+ @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \
+
+ # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \
+ ({ \
+ - register long __ret __asm__("r3"); \
+ - register long __r12 __asm__("r12") = name; \
+ - register long __r5 __asm__("r5") = (long)(arg1); \
+ - register long __r6 __asm__("r6") = (long)(arg2); \
+ - register long __r7 __asm__("r7") = (long)(arg3); \
+ - register long __r8 __asm__("r8") = (long)(arg4); \
+ - register long __r9 __asm__("r9") = (long)(arg5); \
+ - register long __r10 __asm__("r10") = (long)(arg6); \
+ + long int __arg1 = (long int) (arg1); \
+ + long int __arg2 = (long int) (arg2); \
+ + long int __arg3 = (long int) (arg3); \
+ + long int __arg4 = (long int) (arg4); \
+ + long int __arg5 = (long int) (arg5); \
+ + long int __arg6 = (long int) (arg6); \
+ + register long int __ret __asm__("r3"); \
+ + register long int __r12 __asm__("r12") = name; \
+ + register long int __r5 __asm__("r5") = __arg1; \
+ + register long int __r6 __asm__("r6") = __arg2; \
+ + register long int __r7 __asm__("r7") = __arg3; \
+ + register long int __r8 __asm__("r8") = __arg4; \
+ + register long int __r9 __asm__("r9") = __arg5; \
+ + register long int __r10 __asm__("r10") = __arg6; \
+ __asm__ __volatile__( "brki r14,8; nop;" \
+ : "=r"(__ret) \
+ : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
+ index b2bbf10181..ff445a5406 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S
+ @@ -22,9 +22,9 @@
+ .text
+ .set nomips16
+
+ -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4,
+ - long arg5,
+ - long number) */
+ +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3,
+ + long int arg4, long int arg5,
+ + long int number) */
+
+ ENTRY(__mips_syscall5)
+ lw v0, 20(sp)
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
+ index 572d7c1137..2b4a3117d1 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S
+ @@ -22,9 +22,9 @@
+ .text
+ .set nomips16
+
+ -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4,
+ - long arg5, long arg6,
+ - long number) */
+ +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3,
+ + long int arg4, long int arg5, long int arg6,
+ + long int number) */
+
+ ENTRY(__mips_syscall6)
+ lw v0, 24(sp)
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
+ index 05164cb253..2723bbb138 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S
+ @@ -22,9 +22,10 @@
+ .text
+ .set nomips16
+
+ -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4,
+ - long arg5, long arg6, long arg7,
+ - long number) */
+ +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3,
+ + long int arg4, long int arg5, long int arg6,
+ + long int arg7,
+ + long int number) */
+
+ ENTRY(__mips_syscall7)
+ lw v0, 28(sp)
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
+ index 9bf551ace8..f23ede0259 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h
+ @@ -19,51 +19,57 @@
+ #ifndef MIPS16_SYSCALL_H
+ #define MIPS16_SYSCALL_H 1
+
+ -long long __nomips16 __mips16_syscall0 (long number);
+ +long long int __nomips16 __mips16_syscall0 (long int number);
+ #define __mips16_syscall0(dummy, number) \
+ - __mips16_syscall0 ((long) (number))
+ + __mips16_syscall0 ((long int) (number))
+
+ -long long __nomips16 __mips16_syscall1 (long a0,
+ - long number);
+ +long long int __nomips16 __mips16_syscall1 (long int a0,
+ + long int number);
+ #define __mips16_syscall1(a0, number) \
+ - __mips16_syscall1 ((long) (a0), \
+ - (long) (number))
+ + __mips16_syscall1 ((long int) (a0), \
+ + (long int) (number))
+
+ -long long __nomips16 __mips16_syscall2 (long a0, long a1,
+ - long number);
+ +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1,
+ + long int number);
+ #define __mips16_syscall2(a0, a1, number) \
+ - __mips16_syscall2 ((long) (a0), (long) (a1), \
+ - (long) (number))
+ + __mips16_syscall2 ((long int) (a0), (long int) (a1), \
+ + (long int) (number))
+
+ -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2,
+ - long number);
+ +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1,
+ + long int a2,
+ + long int number);
+ #define __mips16_syscall3(a0, a1, a2, number) \
+ - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \
+ - (long) (number))
+ + __mips16_syscall3 ((long int) (a0), (long int) (a1), \
+ + (long int) (a2), \
+ + (long int) (number))
+
+ -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3,
+ - long number);
+ +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1,
+ + long int a2, long int a3,
+ + long int number);
+ #define __mips16_syscall4(a0, a1, a2, a3, number) \
+ - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \
+ - (long) (a3), \
+ - (long) (number))
+ + __mips16_syscall4 ((long int) (a0), (long int) (a1), \
+ + (long int) (a2), (long int) (a3), \
+ + (long int) (number))
+
+ /* The remaining ones use regular MIPS wrappers. */
+
+ #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \
+ - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \
+ - (long) (a3), (long) (a4), \
+ - (long) (number))
+ + __mips_syscall5 ((long int) (a0), (long int) (a1), \
+ + (long int) (a2), (long int) (a3), \
+ + (long int) (a4), \
+ + (long int) (number))
+
+ #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \
+ - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \
+ - (long) (a3), (long) (a4), (long) (a5), \
+ - (long) (number))
+ + __mips_syscall6 ((long int) (a0), (long int) (a1), \
+ + (long int) (a2), (long int) (a3), \
+ + (long int) (a4), (long int) (a5), \
+ + (long int) (number))
+
+ #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \
+ - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \
+ - (long) (a3), (long) (a4), (long) (a5), \
+ - (long) (a6), \
+ - (long) (number))
+ + __mips_syscall7 ((long int) (a0), (long int) (a1), \
+ + (long int) (a2), (long int) (a3), \
+ + (long int) (a4), (long int) (a5), \
+ + (long int) (a6), \
+ + (long int) (number))
+
+ #endif
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
+ index 92f16e2724..43c05f8050 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c
+ @@ -20,8 +20,8 @@
+
+ #undef __mips16_syscall0
+
+ -long long __nomips16
+ -__mips16_syscall0 (long number)
+ +long long int __nomips16
+ +__mips16_syscall0 (long int number)
+ {
+ union __mips_syscall_return ret;
+ ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0);
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
+ index fa985a96e5..16a567e834 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c
+ @@ -20,9 +20,9 @@
+
+ #undef __mips16_syscall1
+
+ -long long __nomips16
+ -__mips16_syscall1 (long a0,
+ - long number)
+ +long long int __nomips16
+ +__mips16_syscall1 (long int a0,
+ + long int number)
+ {
+ union __mips_syscall_return ret;
+ ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1,
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
+ index f042ac815d..c0a856c344 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c
+ @@ -20,9 +20,9 @@
+
+ #undef __mips16_syscall2
+
+ -long long __nomips16
+ -__mips16_syscall2 (long a0, long a1,
+ - long number)
+ +long long int __nomips16
+ +__mips16_syscall2 (long int a0, long int a1,
+ + long int number)
+ {
+ union __mips_syscall_return ret;
+ ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2,
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
+ index dfe2f7feb5..042768ebf2 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c
+ @@ -20,9 +20,9 @@
+
+ #undef __mips16_syscall3
+
+ -long long __nomips16
+ -__mips16_syscall3 (long a0, long a1, long a2,
+ - long number)
+ +long long int __nomips16
+ +__mips16_syscall3 (long int a0, long int a1, long int a2,
+ + long int number)
+ {
+ union __mips_syscall_return ret;
+ ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3,
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
+ index 39de510357..8658d822ab 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c
+ @@ -20,9 +20,9 @@
+
+ #undef __mips16_syscall4
+
+ -long long __nomips16
+ -__mips16_syscall4 (long a0, long a1, long a2, long a3,
+ - long number)
+ +long long int __nomips16
+ +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3,
+ + long int number)
+ {
+ union __mips_syscall_return ret;
+ ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4,
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
+ index beefcf284b..0c6a83e9b3 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
+ @@ -52,7 +52,7 @@
+ #undef INLINE_SYSCALL
+ #define INLINE_SYSCALL(name, nr, args...) \
+ ({ INTERNAL_SYSCALL_DECL (_sc_err); \
+ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
+ { \
+ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
+ @@ -61,10 +61,10 @@
+ result_var; })
+
+ #undef INTERNAL_SYSCALL_DECL
+ -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
+ +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+
+ #undef INTERNAL_SYSCALL_ERROR_P
+ -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
+ +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+
+ #undef INTERNAL_SYSCALL_ERRNO
+ #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
+ @@ -103,11 +103,11 @@
+
+ union __mips_syscall_return
+ {
+ - long long val;
+ + long long int val;
+ struct
+ {
+ - long v0;
+ - long v1;
+ + long int v0;
+ + long int v1;
+ }
+ reg;
+ };
+ @@ -152,13 +152,13 @@ union __mips_syscall_return
+
+ #define internal_syscall0(v0_init, input, number, err, dummy...) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -175,14 +175,15 @@ union __mips_syscall_return
+
+ #define internal_syscall1(v0_init, input, number, err, arg1) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -199,15 +200,17 @@ union __mips_syscall_return
+
+ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -225,16 +228,19 @@ union __mips_syscall_return
+ #define internal_syscall3(v0_init, input, number, err, \
+ arg1, arg2, arg3) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -252,16 +258,20 @@ union __mips_syscall_return
+ #define internal_syscall4(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7") = (long) (arg4); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7") = _arg4; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -285,63 +295,66 @@ union __mips_syscall_return
+ compiler specifics required for the stack arguments to be pushed,
+ which would be the case if these syscalls were inlined. */
+
+ -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3,
+ - long arg4, long arg5,
+ - long number);
+ +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2,
+ + long int arg3, long int arg4,
+ + long int arg5,
+ + long int number);
+ libc_hidden_proto (__mips_syscall5, nomips16)
+
+ #define internal_syscall5(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5) \
+ ({ \
+ union __mips_syscall_return _sc_ret; \
+ - _sc_ret.val = __mips_syscall5 ((long) (arg1), \
+ - (long) (arg2), \
+ - (long) (arg3), \
+ - (long) (arg4), \
+ - (long) (arg5), \
+ - (long) (number)); \
+ + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \
+ + (long int) (arg2), \
+ + (long int) (arg3), \
+ + (long int) (arg4), \
+ + (long int) (arg5), \
+ + (long int) (number)); \
+ err = _sc_ret.reg.v1; \
+ _sc_ret.reg.v0; \
+ })
+
+ -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3,
+ - long arg4, long arg5, long arg6,
+ - long number);
+ +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2,
+ + long int arg3, long int arg4,
+ + long int arg5, long int arg6,
+ + long int number);
+ libc_hidden_proto (__mips_syscall6, nomips16)
+
+ #define internal_syscall6(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5, arg6) \
+ ({ \
+ union __mips_syscall_return _sc_ret; \
+ - _sc_ret.val = __mips_syscall6 ((long) (arg1), \
+ - (long) (arg2), \
+ - (long) (arg3), \
+ - (long) (arg4), \
+ - (long) (arg5), \
+ - (long) (arg6), \
+ - (long) (number)); \
+ + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \
+ + (long int) (arg2), \
+ + (long int) (arg3), \
+ + (long int) (arg4), \
+ + (long int) (arg5), \
+ + (long int) (arg6), \
+ + (long int) (number)); \
+ err = _sc_ret.reg.v1; \
+ _sc_ret.reg.v0; \
+ })
+
+ -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3,
+ - long arg4, long arg5, long arg6,
+ - long arg7,
+ - long number);
+ +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2,
+ + long int arg3, long int arg4,
+ + long int arg5, long int arg6,
+ + long int arg7,
+ + long int number);
+ libc_hidden_proto (__mips_syscall7, nomips16)
+
+ #define internal_syscall7(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ ({ \
+ union __mips_syscall_return _sc_ret; \
+ - _sc_ret.val = __mips_syscall7 ((long) (arg1), \
+ - (long) (arg2), \
+ - (long) (arg3), \
+ - (long) (arg4), \
+ - (long) (arg5), \
+ - (long) (arg6), \
+ - (long) (arg7), \
+ - (long) (number)); \
+ + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \
+ + (long int) (arg2), \
+ + (long int) (arg3), \
+ + (long int) (arg4), \
+ + (long int) (arg5), \
+ + (long int) (arg6), \
+ + (long int) (arg7), \
+ + (long int) (number)); \
+ err = _sc_ret.reg.v1; \
+ _sc_ret.reg.v0; \
+ })
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
+ index f96636538a..4a9d7054f9 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
+ @@ -47,14 +47,14 @@
+
+ /* Convert X to a long long, without losing any bits if it is one
+ already or warning if it is a 32-bit pointer. */
+ -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X))
+ +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X))
+
+ /* Define a macro which expands into the inline wrapper code for a system
+ call. */
+ #undef INLINE_SYSCALL
+ #define INLINE_SYSCALL(name, nr, args...) \
+ ({ INTERNAL_SYSCALL_DECL (_sc_err); \
+ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
+ { \
+ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
+ @@ -63,10 +63,10 @@
+ result_var; })
+
+ #undef INTERNAL_SYSCALL_DECL
+ -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
+ +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+
+ #undef INTERNAL_SYSCALL_ERROR_P
+ -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
+ +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+
+ #undef INTERNAL_SYSCALL_ERRNO
+ #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
+ @@ -112,13 +112,13 @@
+
+ #define internal_syscall0(v0_init, input, number, err, dummy...) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a3 asm ("$7"); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -135,14 +135,15 @@
+
+ #define internal_syscall1(v0_init, input, number, err, arg1) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a3 asm ("$7"); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -159,15 +160,17 @@
+
+ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + long long int _arg2 = ARGIFY (arg2); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a1 asm ("$5") = ARGIFY (arg2); \
+ - register long long __a3 asm ("$7"); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a1 asm ("$5") = _arg2; \
+ + register long long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -185,16 +188,19 @@
+ #define internal_syscall3(v0_init, input, number, err, \
+ arg1, arg2, arg3) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + long long int _arg2 = ARGIFY (arg2); \
+ + long long int _arg3 = ARGIFY (arg3); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a1 asm ("$5") = ARGIFY (arg2); \
+ - register long long __a2 asm ("$6") = ARGIFY (arg3); \
+ - register long long __a3 asm ("$7"); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a1 asm ("$5") = _arg2; \
+ + register long long int __a2 asm ("$6") = _arg3; \
+ + register long long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -212,16 +218,20 @@
+ #define internal_syscall4(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + long long int _arg2 = ARGIFY (arg2); \
+ + long long int _arg3 = ARGIFY (arg3); \
+ + long long int _arg4 = ARGIFY (arg4); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a1 asm ("$5") = ARGIFY (arg2); \
+ - register long long __a2 asm ("$6") = ARGIFY (arg3); \
+ - register long long __a3 asm ("$7") = ARGIFY (arg4); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a1 asm ("$5") = _arg2; \
+ + register long long int __a2 asm ("$6") = _arg3; \
+ + register long long int __a3 asm ("$7") = _arg4; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -239,17 +249,22 @@
+ #define internal_syscall5(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + long long int _arg2 = ARGIFY (arg2); \
+ + long long int _arg3 = ARGIFY (arg3); \
+ + long long int _arg4 = ARGIFY (arg4); \
+ + long long int _arg5 = ARGIFY (arg5); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a1 asm ("$5") = ARGIFY (arg2); \
+ - register long long __a2 asm ("$6") = ARGIFY (arg3); \
+ - register long long __a3 asm ("$7") = ARGIFY (arg4); \
+ - register long long __a4 asm ("$8") = ARGIFY (arg5); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a1 asm ("$5") = _arg2; \
+ + register long long int __a2 asm ("$6") = _arg3; \
+ + register long long int __a3 asm ("$7") = _arg4; \
+ + register long long int __a4 asm ("$8") = _arg5; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
v0_init \
- @@ -135,14 +135,15 @@
+ @@ -267,18 +282,24 @@
+ #define internal_syscall6(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5, arg6) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long long int _arg1 = ARGIFY (arg1); \
+ + long long int _arg2 = ARGIFY (arg2); \
+ + long long int _arg3 = ARGIFY (arg3); \
+ + long long int _arg4 = ARGIFY (arg4); \
+ + long long int _arg5 = ARGIFY (arg5); \
+ + long long int _arg6 = ARGIFY (arg6); \
+ + register long long int __s0 asm ("$16") __attribute__ ((unused))\
+ = (number); \
+ - register long long __v0 asm ("$2"); \
+ - register long long __a0 asm ("$4") = ARGIFY (arg1); \
+ - register long long __a1 asm ("$5") = ARGIFY (arg2); \
+ - register long long __a2 asm ("$6") = ARGIFY (arg3); \
+ - register long long __a3 asm ("$7") = ARGIFY (arg4); \
+ - register long long __a4 asm ("$8") = ARGIFY (arg5); \
+ - register long long __a5 asm ("$9") = ARGIFY (arg6); \
+ + register long long int __v0 asm ("$2"); \
+ + register long long int __a0 asm ("$4") = _arg1; \
+ + register long long int __a1 asm ("$5") = _arg2; \
+ + register long long int __a2 asm ("$6") = _arg3; \
+ + register long long int __a3 asm ("$7") = _arg4; \
+ + register long long int __a4 asm ("$8") = _arg5; \
+ + register long long int __a5 asm ("$9") = _arg6; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
+ index 9d30291f84..3e1f1cc3c5 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
+ @@ -50,7 +50,7 @@
+ #undef INLINE_SYSCALL
+ #define INLINE_SYSCALL(name, nr, args...) \
+ ({ INTERNAL_SYSCALL_DECL (_sc_err); \
+ - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
+ if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
+ { \
+ __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
+ @@ -59,10 +59,10 @@
+ result_var; })
+
+ #undef INTERNAL_SYSCALL_DECL
+ -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
+ +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+
+ #undef INTERNAL_SYSCALL_ERROR_P
+ -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
+ +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+
+ #undef INTERNAL_SYSCALL_ERRNO
+ #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
+ @@ -108,13 +108,13 @@
+
+ #define internal_syscall0(v0_init, input, number, err, dummy...) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -131,14 +131,15 @@
+
+ #define internal_syscall1(v0_init, input, number, err, arg1) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -155,15 +156,17 @@
+
+ #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -181,16 +184,19 @@
+ #define internal_syscall3(v0_init, input, number, err, \
+ arg1, arg2, arg3) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7"); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7"); \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -208,16 +214,20 @@
+ #define internal_syscall4(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7") = (long) (arg4); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7") = _arg4; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -235,17 +245,22 @@
+ #define internal_syscall5(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + long int _arg5 = (long int) (arg5); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7") = (long) (arg4); \
+ - register long __a4 asm ("$8") = (long) (arg5); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7") = _arg4; \
+ + register long int __a4 asm ("$8") = _arg5; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ @@ -263,18 +278,24 @@
+ #define internal_syscall6(v0_init, input, number, err, \
+ arg1, arg2, arg3, arg4, arg5, arg6) \
+ ({ \
+ - long _sys_result; \
+ + long int _sys_result; \
+ \
+ { \
+ - register long __s0 asm ("$16") __attribute__ ((unused)) \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + long int _arg5 = (long int) (arg5); \
+ + long int _arg6 = (long int) (arg6); \
+ + register long int __s0 asm ("$16") __attribute__ ((unused)) \
+ = (number); \
+ - register long __v0 asm ("$2"); \
+ - register long __a0 asm ("$4") = (long) (arg1); \
+ - register long __a1 asm ("$5") = (long) (arg2); \
+ - register long __a2 asm ("$6") = (long) (arg3); \
+ - register long __a3 asm ("$7") = (long) (arg4); \
+ - register long __a4 asm ("$8") = (long) (arg5); \
+ - register long __a5 asm ("$9") = (long) (arg6); \
+ + register long int __v0 asm ("$2"); \
+ + register long int __a0 asm ("$4") = _arg1; \
+ + register long int __a1 asm ("$5") = _arg2; \
+ + register long int __a2 asm ("$6") = _arg3; \
+ + register long int __a3 asm ("$7") = _arg4; \
+ + register long int __a4 asm ("$8") = _arg5; \
+ + register long int __a5 asm ("$9") = _arg6; \
+ __asm__ volatile ( \
+ ".set\tnoreorder\n\t" \
+ v0_init \
+ diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
+ index 26adf2cd04..a9baff3c17 100644
+ --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
+ +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
+ @@ -20,7 +20,7 @@
+ #include <sys/asm.h>
+
+ /* Usage:
+ - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7)
+ + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7)
+
+ We need to do some arg shifting, syscall_number will be in v0. */
+
+ diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h
+ index cdfc0b1b58..a4cf1540fe 100644
+ --- a/sysdeps/unix/sysv/linux/mips/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h
+ @@ -36,8 +36,8 @@
+ the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */
+ #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \
+ ({ \
+ - long _ret = funcptr (args); \
+ - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \
+ + long int _ret = funcptr (args); \
+ + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \
+ if (err) \
+ _ret = -_ret; \
+ _ret; \
+ diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h
+ new file mode 100644
+ index 0000000000..a009899983
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h
+ @@ -0,0 +1,67 @@
+ +/* Return backtrace of current program state. Arch-specific bits.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#ifndef _UNWIND_ARCH_H
+ +#define _UNWIND_ARCH_H
+ +
+ +#include <stdint.h>
+ +
+ +/* MIPS fallback code handle a frame where its FDE can not be obtained
+ + (for instance a signal frame) by reading the kernel allocated signal frame
+ + and adding '2' to the value of 'sc_pc' [1]. The added value is used to
+ + recognize an end of an EH region on mips16 [2].
+ +
+ + The idea here is to adjust the obtained signal frame ADDR value and remove
+ + the libgcc added value by checking if the previous frame is a signal frame
+ + one.
+ +
+ + [1] libgcc/config/mips/linux-unwind.h from gcc code.
+ + [2] gcc/config/mips/mips.h from gcc code. */
+ +
+ +static inline void *
+ +unwind_arch_adjustment (void *prev, void *addr)
+ +{
+ + uint32_t *pc = (uint32_t *) prev;
+ +
+ + if (pc == NULL)
+ + return addr;
+ +
+ + /* For MIPS16 or microMIPS frame libgcc makes no adjustment. */
+ + if ((uintptr_t) pc & 0x3)
+ + return addr;
+ +
+ + /* The vDSO containes either
+ +
+ + 24021061 li v0, 0x1061 (rt_sigreturn)
+ + 0000000c syscall
+ + or
+ + 24021017 li v0, 0x1017 (sigreturn)
+ + 0000000c syscall */
+ + if (pc[1] != 0x0000000c)
+ + return addr;
+ +#if _MIPS_SIM == _ABIO32
+ + if (pc[0] == (0x24020000 | __NR_sigreturn))
+ + return (void *) ((uintptr_t) addr - 2);
+ +#endif
+ + if (pc[0] == (0x24020000 | __NR_rt_sigreturn))
+ + return (void *) ((uintptr_t) addr - 2);
+ +
+ + return addr;
+ +}
+ +
+ +#endif
+ diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c
+ index 27879e76cd..fd46aec1a0 100644
+ --- a/sysdeps/unix/sysv/linux/msgctl.c
+ +++ b/sysdeps/unix/sysv/linux/msgctl.c
+ @@ -21,6 +21,7 @@
+ #include <sysdep.h>
+ #include <shlib-compat.h>
+ #include <errno.h>
+ +#include <linux/posix_types.h> /* For __kernel_mode_t. */
+
+ #ifndef DEFAULT_VERSION
+ # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ @@ -61,7 +62,6 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf)
+
+ int ret = msgctl_syscall (msqid, cmd, buf);
+
+ -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ if (ret >= 0)
+ {
+ switch (cmd)
+ @@ -69,10 +69,16 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf)
+ case IPC_STAT:
+ case MSG_STAT:
+ case MSG_STAT_ANY:
+ +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ buf->msg_perm.mode >>= 16;
+ +#else
+ + /* Old Linux kernel versions might not clear the mode padding. */
+ + if (sizeof ((struct msqid_ds){0}.msg_perm.mode)
+ + != sizeof (__kernel_mode_t))
+ + buf->msg_perm.mode &= 0xFFFF;
+ +#endif
+ }
+ }
+ -#endif
+
+ return ret;
+ }
+ diff --git a/sysdeps/unix/sysv/linux/nios2/kernel-features.h b/sysdeps/unix/sysv/linux/nios2/kernel-features.h
+ deleted file mode 100644
+ index d68d114981..0000000000
+ --- a/sysdeps/unix/sysv/linux/nios2/kernel-features.h
+ +++ /dev/null
+ @@ -1,22 +0,0 @@
+ -/* Set flags signalling availability of kernel features based on given
+ - kernel version number. NIOS2 version.
+ - Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ - This file is part of the GNU C Library.
+ -
+ - The GNU C Library is free software; you can redistribute it and/or
+ - modify it under the terms of the GNU Lesser General Public
+ - License as published by the Free Software Foundation; either
+ - version 2.1 of the License, or (at your option) any later version.
+ -
+ - The GNU C Library is distributed in the hope that it will be useful,
+ - but WITHOUT ANY WARRANTY; without even the implied warranty of
+ - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ - Lesser General Public License for more details.
+ -
+ - You should have received a copy of the GNU Lesser General Public
+ - License along with the GNU C Library; if not, see
+ - <https://www.gnu.org/licenses/>. */
+ -
+ -#include_next <kernel-features.h>
+ -
+ -#undef __ASSUME_SYSVIPC_DEFAULT_IPC_64
+ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
+ index 725dfafde8..ffc150851e 100644
+ --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h
+ @@ -134,47 +134,47 @@
+ # define LOADARGS_0(name, dummy) \
+ r0 = name
+ # define LOADARGS_1(name, __arg1) \
+ - long int arg1 = (long int) (__arg1); \
+ + long int _arg1 = (long int) (__arg1); \
+ LOADARGS_0(name, 0); \
+ extern void __illegally_sized_syscall_arg1 (void); \
+ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 4) \
+ __illegally_sized_syscall_arg1 (); \
+ - r3 = arg1
+ + r3 = _arg1
+ # define LOADARGS_2(name, __arg1, __arg2) \
+ - long int arg2 = (long int) (__arg2); \
+ + long int _arg2 = (long int) (__arg2); \
+ LOADARGS_1(name, __arg1); \
+ extern void __illegally_sized_syscall_arg2 (void); \
+ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 4) \
+ __illegally_sized_syscall_arg2 (); \
+ - r4 = arg2
+ + r4 = _arg2
+ # define LOADARGS_3(name, __arg1, __arg2, __arg3) \
+ - long int arg3 = (long int) (__arg3); \
+ + long int _arg3 = (long int) (__arg3); \
+ LOADARGS_2(name, __arg1, __arg2); \
+ extern void __illegally_sized_syscall_arg3 (void); \
+ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 4) \
+ __illegally_sized_syscall_arg3 (); \
+ - r5 = arg3
+ + r5 = _arg3
+ # define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \
+ - long int arg4 = (long int) (__arg4); \
+ + long int _arg4 = (long int) (__arg4); \
+ LOADARGS_3(name, __arg1, __arg2, __arg3); \
+ extern void __illegally_sized_syscall_arg4 (void); \
+ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 4) \
+ __illegally_sized_syscall_arg4 (); \
+ - r6 = arg4
+ + r6 = _arg4
+ # define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \
+ - long int arg5 = (long int) (__arg5); \
+ + long int _arg5 = (long int) (__arg5); \
+ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \
+ extern void __illegally_sized_syscall_arg5 (void); \
+ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 4) \
+ __illegally_sized_syscall_arg5 (); \
+ - r7 = arg5
+ + r7 = _arg5
+ # define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \
+ - long int arg6 = (long int) (__arg6); \
+ + long int _arg6 = (long int) (__arg6); \
+ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \
+ extern void __illegally_sized_syscall_arg6 (void); \
+ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 4) \
+ __illegally_sized_syscall_arg6 (); \
+ - r8 = arg6
+ + r8 = _arg6
+
+ # define ASM_INPUT_0 "0" (r0)
+ # define ASM_INPUT_1 ASM_INPUT_0, "1" (r3)
+ diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
+ index ee7f43653d..8a3f1c43e4 100644
+ --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h
+ @@ -139,47 +139,47 @@
+ #define LOADARGS_0(name, dummy) \
+ r0 = name
+ #define LOADARGS_1(name, __arg1) \
+ - long int arg1 = (long int) (__arg1); \
+ + long int _arg1 = (long int) (__arg1); \
+ LOADARGS_0(name, 0); \
+ extern void __illegally_sized_syscall_arg1 (void); \
+ if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 8) \
+ __illegally_sized_syscall_arg1 (); \
+ - r3 = arg1
+ + r3 = _arg1
+ #define LOADARGS_2(name, __arg1, __arg2) \
+ - long int arg2 = (long int) (__arg2); \
+ + long int _arg2 = (long int) (__arg2); \
+ LOADARGS_1(name, __arg1); \
+ extern void __illegally_sized_syscall_arg2 (void); \
+ if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 8) \
+ __illegally_sized_syscall_arg2 (); \
+ - r4 = arg2
+ + r4 = _arg2
+ #define LOADARGS_3(name, __arg1, __arg2, __arg3) \
+ - long int arg3 = (long int) (__arg3); \
+ + long int _arg3 = (long int) (__arg3); \
+ LOADARGS_2(name, __arg1, __arg2); \
+ extern void __illegally_sized_syscall_arg3 (void); \
+ if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 8) \
+ __illegally_sized_syscall_arg3 (); \
+ - r5 = arg3
+ + r5 = _arg3
+ #define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \
+ - long int arg4 = (long int) (__arg4); \
+ + long int _arg4 = (long int) (__arg4); \
+ LOADARGS_3(name, __arg1, __arg2, __arg3); \
+ extern void __illegally_sized_syscall_arg4 (void); \
+ if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 8) \
+ __illegally_sized_syscall_arg4 (); \
+ - r6 = arg4
+ + r6 = _arg4
+ #define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \
+ - long int arg5 = (long int) (__arg5); \
+ + long int _arg5 = (long int) (__arg5); \
+ LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \
+ extern void __illegally_sized_syscall_arg5 (void); \
+ if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 8) \
+ __illegally_sized_syscall_arg5 (); \
+ - r7 = arg5
+ + r7 = _arg5
+ #define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \
+ - long int arg6 = (long int) (__arg6); \
+ + long int _arg6 = (long int) (__arg6); \
+ LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \
+ extern void __illegally_sized_syscall_arg6 (void); \
+ if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 8) \
+ __illegally_sized_syscall_arg6 (); \
+ - r8 = arg6
+ + r8 = _arg6
+
+ #define ASM_INPUT_0 "0" (r0)
+ #define ASM_INPUT_1 ASM_INPUT_0, "1" (r3)
+ diff --git a/sysdeps/unix/sysv/linux/prctl.c b/sysdeps/unix/sysv/linux/prctl.c
+ new file mode 100644
+ index 0000000000..d5725f14cf
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/prctl.c
+ @@ -0,0 +1,42 @@
+ +/* prctl - Linux specific syscall.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <sysdep.h>
+ +#include <stdarg.h>
+ +#include <sys/prctl.h>
+ +
+ +/* Unconditionally read all potential arguments. This may pass
+ + garbage values to the kernel, but avoids the need for teaching
+ + glibc the argument counts of individual options (including ones
+ + that are added to the kernel in the future). */
+ +
+ +int
+ +__prctl (int option, ...)
+ +{
+ + va_list arg;
+ + va_start (arg, option);
+ + unsigned long int arg2 = va_arg (arg, unsigned long int);
+ + unsigned long int arg3 = va_arg (arg, unsigned long int);
+ + unsigned long int arg4 = va_arg (arg, unsigned long int);
+ + unsigned long int arg5 = va_arg (arg, unsigned long int);
+ + va_end (arg);
+ + return INLINE_SYSCALL_CALL (prctl, option, arg2, arg3, arg4, arg5);
+ +}
+ +
+ +libc_hidden_def (__prctl)
+ +weak_alias (__prctl, prctl)
+ diff --git a/sysdeps/unix/sysv/linux/process_vm_readv.c b/sysdeps/unix/sysv/linux/process_vm_readv.c
+ new file mode 100644
+ index 0000000000..e1377f7e50
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/process_vm_readv.c
+ @@ -0,0 +1,32 @@
+ +/* process_vm_readv - Linux specific syscall.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <unistd.h>
+ +#include <sysdep.h>
+ +#include <errno.h>
+ +#include <sys/uio.h>
+ +
+ +ssize_t
+ +process_vm_readv (pid_t pid, const struct iovec *local_iov,
+ + unsigned long int liovcnt,
+ + const struct iovec *remote_iov,
+ + unsigned long int riovcnt, unsigned long int flags)
+ +{
+ + return INLINE_SYSCALL_CALL (process_vm_readv, pid, local_iov,
+ + liovcnt, remote_iov, riovcnt, flags);
+ +}
+ diff --git a/sysdeps/unix/sysv/linux/process_vm_writev.c b/sysdeps/unix/sysv/linux/process_vm_writev.c
+ new file mode 100644
+ index 0000000000..944ab9b7f1
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/process_vm_writev.c
+ @@ -0,0 +1,32 @@
+ +/* process_vm_writev - Linux specific syscall.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <unistd.h>
+ +#include <sysdep.h>
+ +#include <errno.h>
+ +#include <sys/uio.h>
+ +
+ +ssize_t
+ +process_vm_writev (pid_t pid, const struct iovec *local_iov,
+ + unsigned long int liovcnt,
+ + const struct iovec *remote_iov,
+ + unsigned long int riovcnt, unsigned long int flags)
+ +{
+ + return INLINE_SYSCALL_CALL (process_vm_writev, pid, local_iov,
+ + liovcnt, remote_iov, riovcnt, flags);
+ +}
+ diff --git a/sysdeps/unix/sysv/linux/ptsname.c b/sysdeps/unix/sysv/linux/ptsname.c
+ index 81d9d26f1e..3e9be3f0d4 100644
+ --- a/sysdeps/unix/sysv/linux/ptsname.c
+ +++ b/sysdeps/unix/sysv/linux/ptsname.c
+ @@ -21,39 +21,14 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include <sys/ioctl.h>
+ -#include <sys/stat.h>
+ -#include <sys/sysmacros.h>
+ #include <termios.h>
+ #include <unistd.h>
+
+ #include <_itoa.h>
+
+ -/* Check if DEV corresponds to a master pseudo terminal device. */
+ -#define MASTER_P(Dev) \
+ - (__gnu_dev_major ((Dev)) == 2 \
+ - || (__gnu_dev_major ((Dev)) == 4 \
+ - && __gnu_dev_minor ((Dev)) >= 128 && __gnu_dev_minor ((Dev)) < 192) \
+ - || (__gnu_dev_major ((Dev)) >= 128 && __gnu_dev_major ((Dev)) < 136))
+ -
+ -/* Check if DEV corresponds to a slave pseudo terminal device. */
+ -#define SLAVE_P(Dev) \
+ - (__gnu_dev_major ((Dev)) == 3 \
+ - || (__gnu_dev_major ((Dev)) == 4 \
+ - && __gnu_dev_minor ((Dev)) >= 192 && __gnu_dev_minor ((Dev)) < 256) \
+ - || (__gnu_dev_major ((Dev)) >= 136 && __gnu_dev_major ((Dev)) < 144))
+ -
+ -/* Note that major number 4 corresponds to the old BSD style pseudo
+ - terminal devices. As of Linux 2.1.115 these are no longer
+ - supported. They have been replaced by major numbers 2 (masters)
+ - and 3 (slaves). */
+ -
+ /* Directory where we can find the slave pty nodes. */
+ #define _PATH_DEVPTS "/dev/pts/"
+
+ -/* The are declared in getpt.c. */
+ -extern const char __libc_ptyname1[] attribute_hidden;
+ -extern const char __libc_ptyname2[] attribute_hidden;
+ -
+ /* Static buffer for `ptsname'. */
+ static char buffer[sizeof (_PATH_DEVPTS) + 20];
+
+ @@ -68,19 +43,15 @@ ptsname (int fd)
+ }
+
+
+ +/* Store at most BUFLEN characters of the pathname of the slave pseudo
+ + terminal associated with the master FD is open on in BUF.
+ + Return 0 on success, otherwise an error number. */
+ int
+ -__ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp)
+ +__ptsname_r (int fd, char *buf, size_t buflen)
+ {
+ int save_errno = errno;
+ unsigned int ptyno;
+
+ - if (!__isatty (fd))
+ - {
+ - __set_errno (ENOTTY);
+ - return ENOTTY;
+ - }
+ -
+ -#ifdef TIOCGPTN
+ if (__ioctl (fd, TIOCGPTN, &ptyno) == 0)
+ {
+ /* Buffer we use to print the number in. For a maximum size for
+ @@ -101,67 +72,11 @@ __ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp)
+
+ memcpy (__stpcpy (buf, devpts), p, &numbuf[sizeof (numbuf)] - p);
+ }
+ - else if (errno != EINVAL)
+ - return errno;
+ else
+ -#endif
+ - {
+ - char *p;
+ -
+ - if (buflen < strlen (_PATH_TTY) + 3)
+ - {
+ - __set_errno (ERANGE);
+ - return ERANGE;
+ - }
+ -
+ - if (__fxstat64 (_STAT_VER, fd, stp) < 0)
+ - return errno;
+ -
+ - /* Check if FD really is a master pseudo terminal. */
+ - if (! MASTER_P (stp->st_rdev))
+ - {
+ - __set_errno (ENOTTY);
+ - return ENOTTY;
+ - }
+ -
+ - ptyno = __gnu_dev_minor (stp->st_rdev);
+ -
+ - if (ptyno / 16 >= strlen (__libc_ptyname1))
+ - {
+ - __set_errno (ENOTTY);
+ - return ENOTTY;
+ - }
+ -
+ - p = __stpcpy (buf, _PATH_TTY);
+ - p[0] = __libc_ptyname1[ptyno / 16];
+ - p[1] = __libc_ptyname2[ptyno % 16];
+ - p[2] = '\0';
+ - }
+ -
+ - if (__xstat64 (_STAT_VER, buf, stp) < 0)
+ + /* Bad file descriptor, or not a ptmx descriptor. */
+ return errno;
+
+ - /* Check if the name we're about to return really corresponds to a
+ - slave pseudo terminal. */
+ - if (! S_ISCHR (stp->st_mode) || ! SLAVE_P (stp->st_rdev))
+ - {
+ - /* This really is a configuration problem. */
+ - __set_errno (ENOTTY);
+ - return ENOTTY;
+ - }
+ -
+ __set_errno (save_errno);
+ return 0;
+ }
+ -
+ -
+ -/* Store at most BUFLEN characters of the pathname of the slave pseudo
+ - terminal associated with the master FD is open on in BUF.
+ - Return 0 on success, otherwise an error number. */
+ -int
+ -__ptsname_r (int fd, char *buf, size_t buflen)
+ -{
+ - struct stat64 st;
+ - return __ptsname_internal (fd, buf, buflen, &st);
+ -}
+ weak_alias (__ptsname_r, ptsname_r)
+ diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h
+ index 201bf9a91b..2bd9b16f32 100644
+ --- a/sysdeps/unix/sysv/linux/riscv/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h
+ @@ -176,10 +176,11 @@
+ # define internal_syscall1(number, err, arg0) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -193,11 +194,13 @@
+ # define internal_syscall2(number, err, arg0, arg1) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -211,12 +214,15 @@
+ # define internal_syscall3(number, err, arg0, arg1, arg2) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ - register long int __a2 asm ("a2") = (long int) (arg2); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ + register long int __a2 asm ("a2") = _arg2; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -230,13 +236,17 @@
+ # define internal_syscall4(number, err, arg0, arg1, arg2, arg3) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ - register long int __a2 asm ("a2") = (long int) (arg2); \
+ - register long int __a3 asm ("a3") = (long int) (arg3); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ + register long int __a2 asm ("a2") = _arg2; \
+ + register long int __a3 asm ("a3") = _arg3; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -250,14 +260,19 @@
+ # define internal_syscall5(number, err, arg0, arg1, arg2, arg3, arg4) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ - register long int __a2 asm ("a2") = (long int) (arg2); \
+ - register long int __a3 asm ("a3") = (long int) (arg3); \
+ - register long int __a4 asm ("a4") = (long int) (arg4); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ + register long int __a2 asm ("a2") = _arg2; \
+ + register long int __a3 asm ("a3") = _arg3; \
+ + register long int __a4 asm ("a4") = _arg4; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -271,15 +286,21 @@
+ # define internal_syscall6(number, err, arg0, arg1, arg2, arg3, arg4, arg5) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + long int _arg5 = (long int) (arg5); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ - register long int __a2 asm ("a2") = (long int) (arg2); \
+ - register long int __a3 asm ("a3") = (long int) (arg3); \
+ - register long int __a4 asm ("a4") = (long int) (arg4); \
+ - register long int __a5 asm ("a5") = (long int) (arg5); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ + register long int __a2 asm ("a2") = _arg2; \
+ + register long int __a3 asm ("a3") = _arg3; \
+ + register long int __a4 asm ("a4") = _arg4; \
+ + register long int __a5 asm ("a5") = _arg5; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ @@ -294,16 +315,23 @@
+ # define internal_syscall7(number, err, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \
+ ({ \
+ long int _sys_result; \
+ + long int _arg0 = (long int) (arg0); \
+ + long int _arg1 = (long int) (arg1); \
+ + long int _arg2 = (long int) (arg2); \
+ + long int _arg3 = (long int) (arg3); \
+ + long int _arg4 = (long int) (arg4); \
+ + long int _arg5 = (long int) (arg5); \
+ + long int _arg6 = (long int) (arg6); \
+ \
+ { \
+ register long int __a7 asm ("a7") = number; \
+ - register long int __a0 asm ("a0") = (long int) (arg0); \
+ - register long int __a1 asm ("a1") = (long int) (arg1); \
+ - register long int __a2 asm ("a2") = (long int) (arg2); \
+ - register long int __a3 asm ("a3") = (long int) (arg3); \
+ - register long int __a4 asm ("a4") = (long int) (arg4); \
+ - register long int __a5 asm ("a5") = (long int) (arg5); \
+ - register long int __a6 asm ("a6") = (long int) (arg6); \
+ + register long int __a0 asm ("a0") = _arg0; \
+ + register long int __a1 asm ("a1") = _arg1; \
+ + register long int __a2 asm ("a2") = _arg2; \
+ + register long int __a3 asm ("a3") = _arg3; \
+ + register long int __a4 asm ("a4") = _arg4; \
+ + register long int __a5 asm ("a5") = _arg5; \
+ + register long int __a6 asm ("a6") = _arg6; \
+ __asm__ volatile ( \
+ "scall\n\t" \
+ : "+r" (__a0) \
+ diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c
+ index 0c3eb0932f..30571af49f 100644
+ --- a/sysdeps/unix/sysv/linux/semctl.c
+ +++ b/sysdeps/unix/sysv/linux/semctl.c
+ @@ -22,6 +22,7 @@
+ #include <sysdep.h>
+ #include <shlib-compat.h>
+ #include <errno.h>
+ +#include <linux/posix_types.h> /* For __kernel_mode_t. */
+
+ /* Define a `union semun' suitable for Linux here. */
+ union semun
+ @@ -92,7 +93,6 @@ __new_semctl (int semid, int semnum, int cmd, ...)
+
+ int ret = semctl_syscall (semid, semnum, cmd, arg);
+
+ -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ if (ret >= 0)
+ {
+ switch (cmd)
+ @@ -100,10 +100,16 @@ __new_semctl (int semid, int semnum, int cmd, ...)
+ case IPC_STAT:
+ case SEM_STAT:
+ case SEM_STAT_ANY:
+ +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ arg.buf->sem_perm.mode >>= 16;
+ +#else
+ + /* Old Linux kernel versions might not clear the mode padding. */
+ + if (sizeof ((struct semid_ds){0}.sem_perm.mode)
+ + != sizeof (__kernel_mode_t))
+ + arg.buf->sem_perm.mode &= 0xFFFF;
+ +#endif
+ }
+ }
+ -#endif
+
+ return ret;
+ }
+ diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
+ new file mode 100644
+ index 0000000000..7eeaf15a5a
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
+ @@ -0,0 +1 @@
+ +unix/sysv/linux/sh/sh4/fpu
+ diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
+ new file mode 100644
+ index 0000000000..7eeaf15a5a
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
+ @@ -0,0 +1 @@
+ +unix/sysv/linux/sh/sh4/fpu
+ diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c
+ index 39fa861e17..f41b359b8b 100644
+ --- a/sysdeps/unix/sysv/linux/shmctl.c
+ +++ b/sysdeps/unix/sysv/linux/shmctl.c
+ @@ -22,6 +22,7 @@
+ #include <sysdep.h>
+ #include <shlib-compat.h>
+ #include <errno.h>
+ +#include <linux/posix_types.h> /* For __kernel_mode_t. */
+
+ #ifndef DEFAULT_VERSION
+ # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ @@ -63,7 +64,6 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf)
+
+ int ret = shmctl_syscall (shmid, cmd, buf);
+
+ -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ if (ret >= 0)
+ {
+ switch (cmd)
+ @@ -71,10 +71,16 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf)
+ case IPC_STAT:
+ case SHM_STAT:
+ case SHM_STAT_ANY:
+ +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ buf->shm_perm.mode >>= 16;
+ +#else
+ + /* Old Linux kernel versions might not clear the mode padding. */
+ + if (sizeof ((struct shmid_ds){0}.shm_perm.mode)
+ + != sizeof (__kernel_mode_t))
+ + buf->shm_perm.mode &= 0xFFFF;
+ +#endif
+ }
+ }
+ -#endif
+
+ return ret;
+ }
+ diff --git a/sysdeps/unix/sysv/linux/sparc/Makefile b/sysdeps/unix/sysv/linux/sparc/Makefile
+ index b0d182a439..1475039677 100644
+ --- a/sysdeps/unix/sysv/linux/sparc/Makefile
+ +++ b/sysdeps/unix/sysv/linux/sparc/Makefile
+ @@ -11,8 +11,12 @@ ifeq ($(subdir),sysvipc)
+ sysdep_routines += getshmlba
+ endif
+
+ +ifeq ($(subdir),signal)
+ +sysdep_routines += sigreturn_stub
+ +endif
+ +
+ ifeq ($(subdir),nptl)
+ # pull in __syscall_error routine
+ -libpthread-routines += sysdep
+ -libpthread-shared-only-routines += sysdep
+ +libpthread-routines += sysdep sigreturn_stub
+ +libpthread-shared-only-routines += sysdep sigreturn_stub
+ endif
+ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c
+ index 6b2f664226..938aa7aa8c 100644
+ --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c
+ +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c
+ @@ -24,8 +24,8 @@
+ #include <kernel_sigaction.h>
+ #include <sysdep.h>
+
+ -static void __rt_sigreturn_stub (void);
+ -static void __sigreturn_stub (void);
+ +void __rt_sigreturn_stub (void);
+ +void __sigreturn_stub (void);
+
+ #define STUB(act, sigsetsize) \
+ (act) ? ((unsigned long)((act->sa_flags & SA_SIGINFO) \
+ @@ -35,25 +35,3 @@ static void __sigreturn_stub (void);
+ (sigsetsize)
+
+ #include <sysdeps/unix/sysv/linux/sigaction.c>
+ -
+ -static
+ -inhibit_stack_protector
+ -void
+ -__rt_sigreturn_stub (void)
+ -{
+ - __asm__ ("mov %0, %%g1\n\t"
+ - "ta 0x10\n\t"
+ - : /* no outputs */
+ - : "i" (__NR_rt_sigreturn));
+ -}
+ -
+ -static
+ -inhibit_stack_protector
+ -void
+ -__sigreturn_stub (void)
+ -{
+ - __asm__ ("mov %0, %%g1\n\t"
+ - "ta 0x10\n\t"
+ - : /* no outputs */
+ - : "i" (__NR_sigreturn));
+ -}
+ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S
+ new file mode 100644
+ index 0000000000..727cc94737
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S
+ @@ -0,0 +1,34 @@
+ +/* Sigreturn stub function used on sa_restore field.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <sysdep.h>
+ +
+ +/* These functions must not change the register window or the stack
+ + pointer [1].
+ +
+ + [1] https://lkml.org/lkml/2016/5/27/465 */
+ +
+ +ENTRY (__rt_sigreturn_stub)
+ + mov __NR_rt_sigreturn, %g1
+ + ta 0x10
+ +END (__rt_sigreturn_stub)
+ +
+ +ENTRY (__sigreturn_stub)
+ + mov __NR_sigreturn, %g1
+ + ta 0x10
+ +END (__sigreturn_stub)
+ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c
+ index 9c0dc2a630..4e26172321 100644
+ --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c
+ +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c
+ @@ -22,21 +22,11 @@
+ #include <syscall.h>
+ #include <sysdep.h>
+
+ -static void __rt_sigreturn_stub (void);
+ +/* Defined on sigreturn_stub.S. */
+ +void __rt_sigreturn_stub (void);
+
+ #define STUB(act, sigsetsize) \
+ (((unsigned long) &__rt_sigreturn_stub) - 8), \
+ (sigsetsize)
+
+ #include <sysdeps/unix/sysv/linux/sigaction.c>
+ -
+ -static
+ -inhibit_stack_protector
+ -void
+ -__rt_sigreturn_stub (void)
+ -{
+ - __asm__ ("mov %0, %%g1\n\t"
+ - "ta 0x6d\n\t"
+ - : /* no outputs */
+ - : "i" (__NR_rt_sigreturn));
+ -}
+ diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S
+ new file mode 100644
+ index 0000000000..add4766831
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S
+ @@ -0,0 +1,29 @@
+ +/* Sigreturn stub function used on sa_restore field.
+ + Copyright (C) 2020 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <sysdep.h>
+ +
+ +/* This function must not change the register window or the stack
+ + pointer [1].
+ +
+ + [1] https://lkml.org/lkml/2016/5/27/465 */
+ +
+ +ENTRY (__rt_sigreturn_stub)
+ + mov __NR_rt_sigreturn, %g1
+ + ta 0x6d
+ +END (__rt_sigreturn_stub)
+ diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list
+ index 36e087d8f4..3d89814003 100644
+ --- a/sysdeps/unix/sysv/linux/syscall-names.list
+ +++ b/sysdeps/unix/sysv/linux/syscall-names.list
+ @@ -21,8 +21,8 @@
+ # This file can list all potential system calls. The names are only
+ # used if the installed kernel headers also provide them.
+
+ -# The list of system calls is current as of Linux 5.4.
+ -kernel 5.4
+ +# The list of system calls is current as of Linux 5.5.
+ +kernel 5.5
+
+ FAST_atomic_update
+ FAST_cmpxchg
+ diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list
+ index 5f1352ad43..52e6dafc86 100644
+ --- a/sysdeps/unix/sysv/linux/syscalls.list
+ +++ b/sysdeps/unix/sysv/linux/syscalls.list
+ @@ -28,25 +28,24 @@ inotify_add_watch EXTRA inotify_add_watch i:isi inotify_add_watch
+ inotify_init EXTRA inotify_init i: inotify_init
+ inotify_init1 EXTRA inotify_init1 i:I inotify_init1
+ inotify_rm_watch EXTRA inotify_rm_watch i:ii inotify_rm_watch
+ -ioperm - ioperm i:iii ioperm
+ +ioperm - ioperm i:UUi ioperm
+ iopl - iopl i:i iopl
+ klogctl EXTRA syslog i:isi klogctl
+ lchown - lchown i:sii __lchown lchown
+ -mincore - mincore i:anV mincore
+ -mlock - mlock i:bn mlock
+ +mincore - mincore i:aUV mincore
+ +mlock - mlock i:bU mlock
+ mlockall - mlockall i:i mlockall
+ -mount EXTRA mount i:sssip __mount mount
+ -mremap EXTRA mremap b:ainip __mremap mremap
+ -munlock - munlock i:ai munlock
+ +mount EXTRA mount i:sssUp __mount mount
+ +mremap EXTRA mremap b:aUUip __mremap mremap
+ +munlock - munlock i:aU munlock
+ munlockall - munlockall i: munlockall
+ nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28
+ pipe - pipe i:f __pipe pipe
+ pipe2 - pipe2 i:fi __pipe2 pipe2
+ pivot_root EXTRA pivot_root i:ss pivot_root
+ -prctl EXTRA prctl i:iiiii __prctl prctl
+ query_module EXTRA query_module i:sipip __compat_query_module query_module@GLIBC_2.0:GLIBC_2.23
+ quotactl EXTRA quotactl i:isip quotactl
+ -remap_file_pages - remap_file_pages i:piiii __remap_file_pages remap_file_pages
+ +remap_file_pages - remap_file_pages i:pUiUi __remap_file_pages remap_file_pages
+ sched_getp - sched_getparam i:ip __sched_getparam sched_getparam
+ sched_gets - sched_getscheduler i:i __sched_getscheduler sched_getscheduler
+ sched_primax - sched_get_priority_max i:i __sched_get_priority_max sched_get_priority_max
+ @@ -55,8 +54,8 @@ sched_rr_gi - sched_rr_get_interval i:ip __sched_rr_get_interval sched_rr_get_in
+ sched_setp - sched_setparam i:ip __sched_setparam sched_setparam
+ sched_sets - sched_setscheduler i:iip __sched_setscheduler sched_setscheduler
+ sched_yield - sched_yield i: __sched_yield sched_yield
+ -sendfile - sendfile i:iipi sendfile
+ -sendfile64 - sendfile64 i:iipi sendfile64
+ +sendfile - sendfile i:iipU sendfile
+ +sendfile64 - sendfile64 i:iipU sendfile64
+ setfsgid EXTRA setfsgid i:i setfsgid
+ setfsuid EXTRA setfsuid i:i setfsuid
+ setpgid - setpgid i:ii __setpgid setpgid
+ @@ -73,19 +72,19 @@ chown - chown i:sii __libc_chown __chown chown
+ fchownat - fchownat i:isiii fchownat
+ linkat - linkat i:isisi linkat
+ mkdirat - mkdirat i:isi mkdirat
+ -readlinkat - readlinkat i:issi readlinkat
+ +readlinkat - readlinkat i:issU readlinkat
+ symlinkat - symlinkat i:sis symlinkat
+ unlinkat - unlinkat i:isi unlinkat
+
+ -setxattr - setxattr i:sspii setxattr
+ -lsetxattr - lsetxattr i:sspii lsetxattr
+ -fsetxattr - fsetxattr i:ispii fsetxattr
+ -getxattr - getxattr i:sspi getxattr
+ -lgetxattr - lgetxattr i:sspi lgetxattr
+ -fgetxattr - fgetxattr i:ispi fgetxattr
+ -listxattr - listxattr i:ssi listxattr
+ -llistxattr - llistxattr i:ssi llistxattr
+ -flistxattr - flistxattr i:isi flistxattr
+ +setxattr - setxattr i:sspUi setxattr
+ +lsetxattr - lsetxattr i:sspUi lsetxattr
+ +fsetxattr - fsetxattr i:ispUi fsetxattr
+ +getxattr - getxattr i:sspU getxattr
+ +lgetxattr - lgetxattr i:sspU lgetxattr
+ +fgetxattr - fgetxattr i:ispU fgetxattr
+ +listxattr - listxattr i:ssU listxattr
+ +llistxattr - llistxattr i:ssU llistxattr
+ +flistxattr - flistxattr i:isU flistxattr
+ removexattr - removexattr i:ss removexattr
+ lremovexattr - lremovexattr i:ss lremovexattr
+ fremovexattr - fremovexattr i:is fremovexattr
+ @@ -102,8 +101,6 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at
+
+ setns EXTRA setns i:ii setns
+
+ -process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv
+ -process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev
+ memfd_create EXTRA memfd_create i:si memfd_create
+ pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc
+ pkey_free EXTRA pkey_free i:i pkey_free
+ diff --git a/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c
+ new file mode 100644
+ index 0000000000..55362f6060
+ --- /dev/null
+ +++ b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c
+ @@ -0,0 +1,259 @@
+ +/* Verify that getcwd returns ERANGE for size 1 byte and does not underflow
+ + buffer when the CWD is too long and is also a mount target of /. See bug
+ + #28769 or CVE-2021-3999 for more context.
+ + Copyright The GNU Toolchain Authors.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <errno.h>
+ +#include <fcntl.h>
+ +#include <intprops.h>
+ +#include <limits.h>
+ +#include <stdio.h>
+ +#include <stdlib.h>
+ +#include <string.h>
+ +#include <sys/mount.h>
+ +#include <sys/stat.h>
+ +#include <sys/types.h>
+ +#include <sys/wait.h>
+ +
+ +#include <sys/socket.h>
+ +#include <sys/un.h>
+ +#include <support/check.h>
+ +#include <support/temp_file.h>
+ +#include <support/test-driver.h>
+ +#include <support/xsched.h>
+ +#include <support/xunistd.h>
+ +
+ +static char *base;
+ +#define BASENAME "tst-getcwd-smallbuff"
+ +#define MOUNT_NAME "mpoint"
+ +static int sockfd[2];
+ +
+ +static void
+ +do_cleanup (void)
+ +{
+ + support_chdir_toolong_temp_directory (base);
+ + TEST_VERIFY_EXIT (rmdir (MOUNT_NAME) == 0);
+ + free (base);
+ +}
+ +
+ +static void
+ +send_fd (const int sock, const int fd)
+ +{
+ + struct msghdr msg = {0};
+ + union
+ + {
+ + struct cmsghdr hdr;
+ + char buf[CMSG_SPACE (sizeof (int))];
+ + } cmsgbuf = {0};
+ + struct cmsghdr *cmsg;
+ + struct iovec vec;
+ + char ch = 'A';
+ + ssize_t n;
+ +
+ + msg.msg_control = &cmsgbuf.buf;
+ + msg.msg_controllen = sizeof (cmsgbuf.buf);
+ +
+ + cmsg = CMSG_FIRSTHDR (&msg);
+ + cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ + cmsg->cmsg_level = SOL_SOCKET;
+ + cmsg->cmsg_type = SCM_RIGHTS;
+ + memcpy (CMSG_DATA (cmsg), &fd, sizeof (fd));
+ +
+ + vec.iov_base = &ch;
+ + vec.iov_len = 1;
+ + msg.msg_iov = &vec;
+ + msg.msg_iovlen = 1;
+ +
+ + while ((n = sendmsg (sock, &msg, 0)) == -1 && errno == EINTR);
+ +
+ + TEST_VERIFY_EXIT (n == 1);
+ +}
+ +
+ +static int
+ +recv_fd (const int sock)
+ +{
+ + struct msghdr msg = {0};
+ + union
+ + {
+ + struct cmsghdr hdr;
+ + char buf[CMSG_SPACE(sizeof(int))];
+ + } cmsgbuf = {0};
+ + struct cmsghdr *cmsg;
+ + struct iovec vec;
+ + ssize_t n;
+ + char ch = '\0';
+ + int fd = -1;
+ +
+ + vec.iov_base = &ch;
+ + vec.iov_len = 1;
+ + msg.msg_iov = &vec;
+ + msg.msg_iovlen = 1;
+ +
+ + msg.msg_control = &cmsgbuf.buf;
+ + msg.msg_controllen = sizeof (cmsgbuf.buf);
+ +
+ + while ((n = recvmsg (sock, &msg, 0)) == -1 && errno == EINTR);
+ + if (n != 1 || ch != 'A')
+ + return -1;
+ +
+ + cmsg = CMSG_FIRSTHDR (&msg);
+ + if (cmsg == NULL)
+ + return -1;
+ + if (cmsg->cmsg_type != SCM_RIGHTS)
+ + return -1;
+ + memcpy (&fd, CMSG_DATA (cmsg), sizeof (fd));
+ + if (fd < 0)
+ + return -1;
+ + return fd;
+ +}
+ +
+ +static int
+ +child_func (void * const arg)
+ +{
+ + xclose (sockfd[0]);
+ + const int sock = sockfd[1];
+ + char ch;
+ +
+ + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1);
+ + TEST_VERIFY_EXIT (ch == '1');
+ +
+ + if (mount ("/", MOUNT_NAME, NULL, MS_BIND | MS_REC, NULL))
+ + FAIL_EXIT1 ("mount failed: %m\n");
+ + const int fd = xopen ("mpoint",
+ + O_RDONLY | O_PATH | O_DIRECTORY | O_NOFOLLOW, 0);
+ +
+ + send_fd (sock, fd);
+ + xclose (fd);
+ +
+ + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1);
+ + TEST_VERIFY_EXIT (ch == 'a');
+ +
+ + xclose (sock);
+ + return 0;
+ +}
+ +
+ +static void
+ +update_map (char * const mapping, const char * const map_file)
+ +{
+ + const size_t map_len = strlen (mapping);
+ +
+ + const int fd = xopen (map_file, O_WRONLY, 0);
+ + xwrite (fd, mapping, map_len);
+ + xclose (fd);
+ +}
+ +
+ +static void
+ +proc_setgroups_write (const long child_pid, const char * const str)
+ +{
+ + const size_t str_len = strlen(str);
+ +
+ + char setgroups_path[sizeof ("/proc//setgroups") + INT_STRLEN_BOUND (long)];
+ +
+ + snprintf (setgroups_path, sizeof (setgroups_path),
+ + "/proc/%ld/setgroups", child_pid);
+ +
+ + const int fd = open (setgroups_path, O_WRONLY);
+ +
+ + if (fd < 0)
+ + {
+ + TEST_VERIFY_EXIT (errno == ENOENT);
+ + FAIL_UNSUPPORTED ("/proc/%ld/setgroups not found\n", child_pid);
+ + }
+ +
+ + xwrite (fd, str, str_len);
+ + xclose(fd);
+ +}
+ +
+ +static char child_stack[1024 * 1024];
+ +
+ +int
+ +do_test (void)
+ +{
+ + base = support_create_and_chdir_toolong_temp_directory (BASENAME);
+ +
+ + xmkdir (MOUNT_NAME, S_IRWXU);
+ + atexit (do_cleanup);
+ +
+ + /* Check whether user namespaces are supported. */
+ + {
+ + pid_t pid = xfork ();
+ + if (pid == 0)
+ + {
+ + if (unshare (CLONE_NEWUSER | CLONE_NEWNS) != 0)
+ + _exit (EXIT_UNSUPPORTED);
+ + else
+ + _exit (0);
+ + }
+ + int status;
+ + xwaitpid (pid, &status, 0);
+ + TEST_VERIFY_EXIT (WIFEXITED (status));
+ + if (WEXITSTATUS (status) != 0)
+ + return WEXITSTATUS (status);
+ + }
+ +
+ + TEST_VERIFY_EXIT (socketpair (AF_UNIX, SOCK_STREAM, 0, sockfd) == 0);
+ + pid_t child_pid = xclone (child_func, NULL, child_stack,
+ + sizeof (child_stack),
+ + CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD);
+ +
+ + xclose (sockfd[1]);
+ + const int sock = sockfd[0];
+ +
+ + char map_path[sizeof ("/proc//uid_map") + INT_STRLEN_BOUND (long)];
+ + char map_buf[sizeof ("0 1") + INT_STRLEN_BOUND (long)];
+ +
+ + snprintf (map_path, sizeof (map_path), "/proc/%ld/uid_map",
+ + (long) child_pid);
+ + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getuid());
+ + update_map (map_buf, map_path);
+ +
+ + proc_setgroups_write ((long) child_pid, "deny");
+ + snprintf (map_path, sizeof (map_path), "/proc/%ld/gid_map",
+ + (long) child_pid);
+ + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getgid());
+ + update_map (map_buf, map_path);
+ +
+ + TEST_VERIFY_EXIT (send (sock, "1", 1, MSG_NOSIGNAL) == 1);
+ + const int fd = recv_fd (sock);
+ + TEST_VERIFY_EXIT (fd >= 0);
+ + TEST_VERIFY_EXIT (fchdir (fd) == 0);
+ +
+ + static char buf[2 * 10 + 1];
+ + memset (buf, 'A', sizeof (buf));
+ +
+ + /* Finally, call getcwd and check if it resulted in a buffer underflow. */
+ + char * cwd = getcwd (buf + sizeof (buf) / 2, 1);
+ + TEST_VERIFY (cwd == NULL);
+ + TEST_VERIFY (errno == ERANGE);
+ +
+ + for (int i = 0; i < sizeof (buf); i++)
+ + if (buf[i] != 'A')
+ + {
+ + printf ("buf[%d] = %02x\n", i, (unsigned int) buf[i]);
+ + support_record_failure ();
+ + }
+ +
+ + TEST_VERIFY_EXIT (send (sock, "a", 1, MSG_NOSIGNAL) == 1);
+ + xclose (sock);
+ + TEST_VERIFY_EXIT (xwaitpid (child_pid, NULL, 0) == child_pid);
+ +
+ + return 0;
+ +}
+ +
+ +#define CLEANUP_HANDLER do_cleanup
+ +#include <support/test-driver.c>
+ diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+ index c2eb37e575..c7f740a1df 100644
+ --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h
+ @@ -61,13 +61,31 @@
+ # define SYSCALL_ERROR_LABEL syscall_error
+ # endif
+
+ +/* PSEUDO and T_PSEUDO macros have 2 extra arguments for unsigned long
+ + int arguments. */
+ +# define PSEUDOS_HAVE_ULONG_INDICES 1
+ +
+ +# ifndef SYSCALL_ULONG_ARG_1
+ +# define SYSCALL_ULONG_ARG_1 0
+ +# define SYSCALL_ULONG_ARG_2 0
+ +# endif
+ +
+ # undef PSEUDO
+ -# define PSEUDO(name, syscall_name, args) \
+ - .text; \
+ - ENTRY (name) \
+ - DO_CALL (syscall_name, args); \
+ - cmpq $-4095, %rax; \
+ +# if SYSCALL_ULONG_ARG_1
+ +# define PSEUDO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \
+ + cmpq $-4095, %rax; \
+ jae SYSCALL_ERROR_LABEL
+ +# else
+ +# define PSEUDO(name, syscall_name, args) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, 0, 0); \
+ + cmpq $-4095, %rax; \
+ + jae SYSCALL_ERROR_LABEL
+ +# endif
+
+ # undef PSEUDO_END
+ # define PSEUDO_END(name) \
+ @@ -75,10 +93,17 @@
+ END (name)
+
+ # undef PSEUDO_NOERRNO
+ -# define PSEUDO_NOERRNO(name, syscall_name, args) \
+ - .text; \
+ - ENTRY (name) \
+ - DO_CALL (syscall_name, args)
+ +# if SYSCALL_ULONG_ARG_1
+ +# define PSEUDO_NOERRNO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2)
+ +# else
+ +# define PSEUDO_NOERRNO(name, syscall_name, args) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, 0, 0)
+ +# endif
+
+ # undef PSEUDO_END_NOERRNO
+ # define PSEUDO_END_NOERRNO(name) \
+ @@ -87,11 +112,19 @@
+ # define ret_NOERRNO ret
+
+ # undef PSEUDO_ERRVAL
+ -# define PSEUDO_ERRVAL(name, syscall_name, args) \
+ - .text; \
+ - ENTRY (name) \
+ - DO_CALL (syscall_name, args); \
+ +# if SYSCALL_ULONG_ARG_1
+ +# define PSEUDO_ERRVAL(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \
+ + negq %rax
+ +# else
+ +# define PSEUDO_ERRVAL(name, syscall_name, args) \
+ + .text; \
+ + ENTRY (name) \
+ + DO_CALL (syscall_name, args, 0, 0); \
+ negq %rax
+ +# endif
+
+ # undef PSEUDO_END_ERRVAL
+ # define PSEUDO_END_ERRVAL(name) \
+ @@ -163,8 +196,10 @@
+ Syscalls of more than 6 arguments are not supported. */
+
+ # undef DO_CALL
+ -# define DO_CALL(syscall_name, args) \
+ +# define DO_CALL(syscall_name, args, ulong_arg_1, ulong_arg_2) \
+ DOARGS_##args \
+ + ZERO_EXTEND_##ulong_arg_1 \
+ + ZERO_EXTEND_##ulong_arg_2 \
+ movl $SYS_ify (syscall_name), %eax; \
+ syscall;
+
+ @@ -176,6 +211,14 @@
+ # define DOARGS_5 DOARGS_4
+ # define DOARGS_6 DOARGS_5
+
+ +# define ZERO_EXTEND_0 /* nothing */
+ +# define ZERO_EXTEND_1 /* nothing */
+ +# define ZERO_EXTEND_2 /* nothing */
+ +# define ZERO_EXTEND_3 /* nothing */
+ +# define ZERO_EXTEND_4 /* nothing */
+ +# define ZERO_EXTEND_5 /* nothing */
+ +# define ZERO_EXTEND_6 /* nothing */
+ +
+ #else /* !__ASSEMBLER__ */
+ /* Define a macro which expands inline into the wrapper code for a system
+ call. */
+ @@ -210,12 +253,15 @@
+ /* Registers clobbered by syscall. */
+ # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx"
+
+ -/* Create a variable 'name' based on type 'X' to avoid explicit types.
+ - This is mainly used set use 64-bits arguments in x32. */
+ -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name
+ -/* Explicit cast the argument to avoid integer from pointer warning on
+ - x32. */
+ -#define ARGIFY(X) ((__typeof__ ((X) - (X))) (X))
+ +/* NB: This also works when X is an array. For an array X, type of
+ + (X) - (X) is ptrdiff_t, which is signed, since size of ptrdiff_t
+ + == size of pointer, cast is a NOP. */
+ +#define TYPEFY1(X) __typeof__ ((X) - (X))
+ +/* Explicit cast the argument. */
+ +#define ARGIFY(X) ((TYPEFY1 (X)) (X))
+ +/* Create a variable 'name' based on type of variable 'X' to avoid
+ + explicit types. */
+ +#define TYPEFY(X, name) __typeof__ (ARGIFY (X)) name
+
+ #undef INTERNAL_SYSCALL
+ #define INTERNAL_SYSCALL(name, err, nr, args...) \
+ diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
+ index 5bf9eed80b..62e6f8fe11 100644
+ --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
+ +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h
+ @@ -26,4 +26,39 @@
+ #undef LO_HI_LONG
+ #define LO_HI_LONG(val) (val)
+
+ +#ifdef __ASSEMBLER__
+ +/* Zero-extend 32-bit unsigned long int arguments to 64 bits. */
+ +# undef ZERO_EXTEND_1
+ +# define ZERO_EXTEND_1 movl %edi, %edi;
+ +# undef ZERO_EXTEND_2
+ +# define ZERO_EXTEND_2 movl %esi, %esi;
+ +# undef ZERO_EXTEND_3
+ +# define ZERO_EXTEND_3 movl %edx, %edx;
+ +# if SYSCALL_ULONG_ARG_1 == 4 || SYSCALL_ULONG_ARG_2 == 4
+ +# undef DOARGS_4
+ +# define DOARGS_4 movl %ecx, %r10d;
+ +# else
+ +# undef ZERO_EXTEND_4
+ +# define ZERO_EXTEND_4 movl %r10d, %r10d;
+ +# endif
+ +# undef ZERO_EXTEND_5
+ +# define ZERO_EXTEND_5 movl %r8d, %r8d;
+ +# undef ZERO_EXTEND_6
+ +# define ZERO_EXTEND_6 movl %r9d, %r9d;
+ +#else /* !__ASSEMBLER__ */
+ +# undef ARGIFY
+ +/* Enforce zero-extension for pointers and array system call arguments.
+ + For integer types, extend to int64_t (the full register) using a
+ + regular cast, resulting in zero or sign extension based on the
+ + signedness of the original type. */
+ +# define ARGIFY(X) \
+ + ({ \
+ + _Pragma ("GCC diagnostic push"); \
+ + _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\""); \
+ + (__builtin_classify_type (X) == 5 \
+ + ? (uintptr_t) (X) : (int64_t) (X)); \
+ + _Pragma ("GCC diagnostic pop"); \
+ + })
+ +#endif /* __ASSEMBLER__ */
+ +
+ #endif /* linux/x86_64/x32/sysdep.h */
+ diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
+ index 95182a508c..b7aec5df2b 100644
+ --- a/sysdeps/x86/Makefile
+ +++ b/sysdeps/x86/Makefile
+ @@ -12,6 +12,42 @@ endif
+ ifeq ($(subdir),setjmp)
+ gen-as-const-headers += jmp_buf-ssp.sym
+ sysdep_routines += __longjmp_cancel
+ +ifneq ($(enable-cet),no)
+ +ifneq ($(have-tunables),no)
+ +tests += tst-setjmp-cet
+ +tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on
+ +endif
+ +endif
+ +endif
+ +
+ +ifeq ($(subdir),string)
+ +sysdep_routines += cacheinfo
+ +
+ +tests += \
+ + tst-memchr-rtm \
+ + tst-memcmp-rtm \
+ + tst-memmove-rtm \
+ + tst-memrchr-rtm \
+ + tst-memset-rtm \
+ + tst-strchr-rtm \
+ + tst-strcpy-rtm \
+ + tst-strlen-rtm \
+ + tst-strncmp-rtm \
+ + tst-strrchr-rtm \
+ + tst-wcsncmp-rtm \
+ +# tests
+ +
+ +CFLAGS-tst-memchr-rtm.c += -mrtm
+ +CFLAGS-tst-memcmp-rtm.c += -mrtm
+ +CFLAGS-tst-memmove-rtm.c += -mrtm
+ +CFLAGS-tst-memrchr-rtm.c += -mrtm
+ +CFLAGS-tst-memset-rtm.c += -mrtm
+ +CFLAGS-tst-strchr-rtm.c += -mrtm
+ +CFLAGS-tst-strcpy-rtm.c += -mrtm
+ +CFLAGS-tst-strlen-rtm.c += -mrtm
+ +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error
+ +CFLAGS-tst-strrchr-rtm.c += -mrtm
+ +CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error
+ endif
+
+ ifeq ($(enable-cet),yes)
+ diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
+ index e3e8ef27bb..39c13b7195 100644
+ --- a/sysdeps/x86/cacheinfo.c
+ +++ b/sysdeps/x86/cacheinfo.c
+ @@ -722,7 +722,7 @@ intel_bug_no_cache_info:
+ threads = 1 << ((ecx >> 12) & 0x0f);
+ }
+
+ - if (threads == 0)
+ + if (threads == 0 || cpu_features->basic.family >= 0x17)
+ {
+ /* If APIC ID width is not available, use logical
+ processor count. */
+ @@ -737,8 +737,22 @@ intel_bug_no_cache_info:
+ if (threads > 0)
+ shared /= threads;
+
+ - /* Account for exclusive L2 and L3 caches. */
+ - shared += core;
+ + /* Get shared cache per ccx for Zen architectures. */
+ + if (cpu_features->basic.family >= 0x17)
+ + {
+ + unsigned int eax;
+ +
+ + /* Get number of threads share the L3 cache in CCX. */
+ + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
+ +
+ + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
+ + shared *= threads_per_ccx;
+ + }
+ + else
+ + {
+ + /* Account for exclusive L2 and L3 caches. */
+ + shared += core;
+ + }
+ }
+
+ #ifndef DISABLE_PREFETCHW
+ @@ -778,14 +792,20 @@ intel_bug_no_cache_info:
+ __x86_shared_cache_size = shared;
+ }
+
+ - /* The large memcpy micro benchmark in glibc shows that 6 times of
+ - shared cache size is the approximate value above which non-temporal
+ - store becomes faster on a 8-core processor. This is the 3/4 of the
+ - total shared cache size. */
+ + /* The default setting for the non_temporal threshold is 3/4 of one
+ + thread's share of the chip's cache. For most Intel and AMD processors
+ + with an initial release date between 2017 and 2020, a thread's typical
+ + share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
+ + threshold leaves 125 KBytes to 500 KBytes of the thread's data
+ + in cache after a maximum temporal copy, which will maintain
+ + in cache a reasonable portion of the thread's stack and other
+ + active data. If the threshold is set higher than one thread's
+ + share of the cache, it has a substantial risk of negatively
+ + impacting the performance of other threads running on the chip. */
+ __x86_shared_non_temporal_threshold
+ = (cpu_features->non_temporal_threshold != 0
+ ? cpu_features->non_temporal_threshold
+ - : __x86_shared_cache_size * threads * 3 / 4);
+ + : __x86_shared_cache_size * 3 / 4);
+ }
+
+ #endif
+ diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+ index 81a170a819..e1c22e3e58 100644
+ --- a/sysdeps/x86/cpu-features.c
+ +++ b/sysdeps/x86/cpu-features.c
+ @@ -333,6 +333,9 @@ init_cpu_features (struct cpu_features *cpu_features)
+
+ get_extended_indices (cpu_features);
+
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
+ + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM;
+ +
+ if (family == 0x06)
+ {
+ model += extended_model;
+ @@ -394,11 +397,42 @@ init_cpu_features (struct cpu_features *cpu_features)
+ break;
+ }
+
+ - /* Disable TSX on some Haswell processors to avoid TSX on kernels that
+ - weren't updated with the latest microcode package (which disables
+ - broken feature by default). */
+ + /* Disable TSX on some processors to avoid TSX on kernels that
+ + weren't updated with the latest microcode package (which
+ + disables broken feature by default). */
+ switch (model)
+ {
+ + case 0x55:
+ + if (stepping <= 5)
+ + goto disable_tsx;
+ + break;
+ + case 0x8e:
+ + /* NB: Although the errata documents that for model == 0x8e,
+ + only 0xb stepping or lower are impacted, the intention of
+ + the errata was to disable TSX on all client processors on
+ + all steppings. Include 0xc stepping which is an Intel
+ + Core i7-8665U, a client mobile processor. */
+ + case 0x9e:
+ + if (stepping > 0xc)
+ + break;
+ + /* Fall through. */
+ + case 0x4e:
+ + case 0x5e:
+ + {
+ + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
+ + processors listed in:
+ +
+ +https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
+ + */
+ +disable_tsx:
+ + cpu_features->cpuid[index_cpu_HLE].reg_HLE
+ + &= ~bit_cpu_HLE;
+ + cpu_features->cpuid[index_cpu_RTM].reg_RTM
+ + &= ~bit_cpu_RTM;
+ + cpu_features->cpuid[index_cpu_RTM_ALWAYS_ABORT].reg_RTM_ALWAYS_ABORT
+ + |= bit_cpu_RTM_ALWAYS_ABORT;
+ + }
+ + break;
+ case 0x3f:
+ /* Xeon E7 v3 with stepping >= 4 has working TSX. */
+ if (stepping >= 4)
+ @@ -424,8 +458,24 @@ init_cpu_features (struct cpu_features *cpu_features)
+ cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
+ |= bit_arch_Prefer_No_VZEROUPPER;
+ else
+ - cpu_features->feature[index_arch_Prefer_No_AVX512]
+ - |= bit_arch_Prefer_No_AVX512;
+ + {
+ + cpu_features->feature[index_arch_Prefer_No_AVX512]
+ + |= bit_arch_Prefer_No_AVX512;
+ +
+ + /* Avoid RTM abort triggered by VZEROUPPER inside a
+ + transactionally executing RTM region. */
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER]
+ + |= bit_arch_Prefer_No_VZEROUPPER;
+ +
+ + /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
+ + requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
+ + requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
+ + AVX2 strcmp is faster than EVEX strcmp. */
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ + cpu_features->feature[index_arch_Prefer_AVX2_STRCMP]
+ + |= bit_arch_Prefer_AVX2_STRCMP;
+ + }
+ }
+ /* This spells out "AuthenticAMD" or "HygonGenuine". */
+ else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
+ index aea83e6e31..9fb97907b5 100644
+ --- a/sysdeps/x86/cpu-features.h
+ +++ b/sysdeps/x86/cpu-features.h
+ @@ -499,6 +499,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define bit_cpu_AVX512_4VNNIW (1u << 2)
+ #define bit_cpu_AVX512_4FMAPS (1u << 3)
+ #define bit_cpu_FSRM (1u << 4)
+ +#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11)
+ #define bit_cpu_PCONFIG (1u << 18)
+ #define bit_cpu_IBT (1u << 20)
+ #define bit_cpu_IBRS_IBPB (1u << 26)
+ @@ -667,6 +668,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
+ #define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7
+ #define index_cpu_FSRM COMMON_CPUID_INDEX_7
+ +#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7
+ #define index_cpu_PCONFIG COMMON_CPUID_INDEX_7
+ #define index_cpu_IBT COMMON_CPUID_INDEX_7
+ #define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7
+ @@ -835,6 +837,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define reg_AVX512_4VNNIW edx
+ #define reg_AVX512_4FMAPS edx
+ #define reg_FSRM edx
+ +#define reg_RTM_ALWAYS_ABORT edx
+ #define reg_PCONFIG edx
+ #define reg_IBT edx
+ #define reg_IBRS_IBPB edx
+ @@ -897,6 +900,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define bit_arch_Prefer_FSRM (1u << 13)
+ #define bit_arch_Prefer_No_AVX512 (1u << 14)
+ #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15)
+ +#define bit_arch_Prefer_AVX2_STRCMP (1u << 16)
+
+ #define index_arch_Fast_Rep_String FEATURE_INDEX_2
+ #define index_arch_Fast_Copy_Backward FEATURE_INDEX_2
+ @@ -914,6 +918,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define index_arch_Prefer_No_AVX512 FEATURE_INDEX_2
+ #define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_2
+ #define index_arch_Prefer_FSRM FEATURE_INDEX_2
+ +#define index_arch_Prefer_AVX2_STRCMP FEATURE_INDEX_2
+
+ /* XCR0 Feature flags. */
+ #define bit_XMM_state (1u << 1)
+ diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
+ index 861bd7bcaa..cb83ecc3b2 100644
+ --- a/sysdeps/x86/cpu-tunables.c
+ +++ b/sysdeps/x86/cpu-tunables.c
+ @@ -282,6 +282,9 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+ CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features,
+ Fast_Copy_Backward, disable,
+ 18);
+ + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+ + (n, cpu_features, Prefer_AVX2_STRCMP, AVX2_Usable,
+ + disable, 18);
+ }
+ break;
+ case 19:
+ diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c
+ index ca3b5849bc..8ffaf94a00 100644
+ --- a/sysdeps/x86/dl-cet.c
+ +++ b/sysdeps/x86/dl-cet.c
+ @@ -105,7 +105,11 @@ dl_cet_check (struct link_map *m, const char *program)
+ /* No legacy object check if both IBT and SHSTK are always on. */
+ if (enable_ibt_type == CET_ALWAYS_ON
+ && enable_shstk_type == CET_ALWAYS_ON)
+ - return;
+ + {
+ + THREAD_SETMEM (THREAD_SELF, header.feature_1,
+ + GL(dl_x86_feature_1)[0]);
+ + return;
+ + }
+
+ /* Check if IBT is enabled by kernel. */
+ bool ibt_enabled
+ diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
+ index 0f55987ae5..bbb5cd356d 100644
+ --- a/sysdeps/x86/tst-get-cpu-features.c
+ +++ b/sysdeps/x86/tst-get-cpu-features.c
+ @@ -176,6 +176,7 @@ do_test (void)
+ CHECK_CPU_FEATURE (AVX512_4VNNIW);
+ CHECK_CPU_FEATURE (AVX512_4FMAPS);
+ CHECK_CPU_FEATURE (FSRM);
+ + CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT);
+ CHECK_CPU_FEATURE (PCONFIG);
+ CHECK_CPU_FEATURE (IBT);
+ CHECK_CPU_FEATURE (IBRS_IBPB);
+ diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c
+ new file mode 100644
+ index 0000000000..e47494011e
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-memchr-rtm.c
+ @@ -0,0 +1,54 @@
+ +/* Test case for memchr inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + string1[100] = 'c';
+ + string1[STRING_SIZE - 100] = 'c';
+ + char *p = memchr (string1, 'c', STRING_SIZE);
+ + if (p == &string1[100])
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + char *p = memchr (string1, 'c', STRING_SIZE);
+ + if (p == &string1[100])
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("memchr", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c
+ new file mode 100644
+ index 0000000000..e4c8a623bb
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-memcmp-rtm.c
+ @@ -0,0 +1,52 @@
+ +/* Test case for memcmp inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +char string2[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + memset (string2, 'a', STRING_SIZE);
+ + if (memcmp (string1, string2, STRING_SIZE) == 0)
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + if (memcmp (string1, string2, STRING_SIZE) == 0)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("memcmp", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c
+ new file mode 100644
+ index 0000000000..4bf97ef1e3
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-memmove-rtm.c
+ @@ -0,0 +1,53 @@
+ +/* Test case for memmove inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +char string2[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + if (memmove (string2, string1, STRING_SIZE) == string2
+ + && memcmp (string2, string1, STRING_SIZE) == 0)
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + if (memmove (string2, string1, STRING_SIZE) == string2
+ + && memcmp (string2, string1, STRING_SIZE) == 0)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("memmove", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c
+ new file mode 100644
+ index 0000000000..a57a5a8eb9
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-memrchr-rtm.c
+ @@ -0,0 +1,54 @@
+ +/* Test case for memrchr inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + string1[100] = 'c';
+ + string1[STRING_SIZE - 100] = 'c';
+ + char *p = memrchr (string1, 'c', STRING_SIZE);
+ + if (p == &string1[STRING_SIZE - 100])
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + char *p = memrchr (string1, 'c', STRING_SIZE);
+ + if (p == &string1[STRING_SIZE - 100])
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("memrchr", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c
+ new file mode 100644
+ index 0000000000..bf343a4dad
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-memset-rtm.c
+ @@ -0,0 +1,45 @@
+ +/* Test case for memset inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + return EXIT_SUCCESS;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE);
+ + return 0;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("memset", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c
+ new file mode 100644
+ index 0000000000..42c795d2a8
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-setjmp-cet.c
+ @@ -0,0 +1 @@
+ +#include <setjmp/tst-setjmp.c>
+ diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c
+ new file mode 100644
+ index 0000000000..a82e29c072
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-strchr-rtm.c
+ @@ -0,0 +1,54 @@
+ +/* Test case for strchr inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE - 1);
+ + string1[100] = 'c';
+ + string1[STRING_SIZE - 100] = 'c';
+ + char *p = strchr (string1, 'c');
+ + if (p == &string1[100])
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + char *p = strchr (string1, 'c');
+ + if (p == &string1[100])
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("strchr", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c
+ new file mode 100644
+ index 0000000000..2b2a583fb4
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-strcpy-rtm.c
+ @@ -0,0 +1,53 @@
+ +/* Test case for strcpy inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +char string2[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE - 1);
+ + if (strcpy (string2, string1) == string2
+ + && strcmp (string2, string1) == 0)
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + if (strcpy (string2, string1) == string2
+ + && strcmp (string2, string1) == 0)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("strcpy", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h
+ new file mode 100644
+ index 0000000000..6ed9eca017
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-string-rtm.h
+ @@ -0,0 +1,72 @@
+ +/* Test string function in a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <string.h>
+ +#include <x86intrin.h>
+ +#include <cpu-features.h>
+ +#include <support/check.h>
+ +#include <support/test-driver.h>
+ +
+ +static int
+ +do_test_1 (const char *name, unsigned int loop, int (*prepare) (void),
+ + int (*function) (void))
+ +{
+ + if (!CPU_FEATURE_USABLE (RTM))
+ + return EXIT_UNSUPPORTED;
+ +
+ + int status = prepare ();
+ + if (status != EXIT_SUCCESS)
+ + return status;
+ +
+ + unsigned int i;
+ + unsigned int naborts = 0;
+ + unsigned int failed = 0;
+ + for (i = 0; i < loop; i++)
+ + {
+ + failed |= function ();
+ + if (_xbegin() == _XBEGIN_STARTED)
+ + {
+ + failed |= function ();
+ + _xend();
+ + }
+ + else
+ + {
+ + failed |= function ();
+ + ++naborts;
+ + }
+ + }
+ +
+ + if (failed)
+ + FAIL_EXIT1 ("%s() failed", name);
+ +
+ + if (naborts)
+ + {
+ + /* NB: Low single digit (<= 5%) noise-level aborts are normal for
+ + TSX. */
+ + double rate = 100 * ((double) naborts) / ((double) loop);
+ + if (rate > 5)
+ + FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)",
+ + rate, naborts, loop);
+ + }
+ +
+ + return EXIT_SUCCESS;
+ +}
+ +
+ +static int do_test (void);
+ +
+ +#include <support/test-driver.c>
+ diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c
+ new file mode 100644
+ index 0000000000..0dcf14db87
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-strlen-rtm.c
+ @@ -0,0 +1,53 @@
+ +/* Test case for strlen inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE - 1);
+ + string1[STRING_SIZE - 100] = '\0';
+ + size_t len = strlen (string1);
+ + if (len == STRING_SIZE - 100)
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + size_t len = strlen (string1);
+ + if (len == STRING_SIZE - 100)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("strlen", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
+ new file mode 100644
+ index 0000000000..aef9866cf2
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-strncmp-rtm.c
+ @@ -0,0 +1,81 @@
+ +/* Test case for strncmp inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <stdint.h>
+ +#include <tst-string-rtm.h>
+ +
+ +#ifdef WIDE
+ +# define CHAR wchar_t
+ +# define MEMSET wmemset
+ +# define STRNCMP wcsncmp
+ +# define TEST_NAME "wcsncmp"
+ +#else /* !WIDE */
+ +# define CHAR char
+ +# define MEMSET memset
+ +# define STRNCMP strncmp
+ +# define TEST_NAME "strncmp"
+ +#endif /* !WIDE */
+ +
+ +
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +CHAR string1[STRING_SIZE];
+ +CHAR string2[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + MEMSET (string1, 'a', STRING_SIZE - 1);
+ + MEMSET (string2, 'a', STRING_SIZE - 1);
+ + if (STRNCMP (string1, string2, STRING_SIZE) == 0)
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + if (STRNCMP (string1, string2, STRING_SIZE) == 0)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function_overflow (void)
+ +{
+ + if (STRNCMP (string1, string2, SIZE_MAX) == 0)
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + int status = do_test_1 (TEST_NAME, LOOP, prepare, function);
+ + if (status != EXIT_SUCCESS)
+ + return status;
+ + status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow);
+ + return status;
+ +}
+ diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c
+ new file mode 100644
+ index 0000000000..e32bfaf5f5
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-strrchr-rtm.c
+ @@ -0,0 +1,53 @@
+ +/* Test case for strrchr inside a transactionally executing RTM region.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <tst-string-rtm.h>
+ +
+ +#define LOOP 3000
+ +#define STRING_SIZE 1024
+ +char string1[STRING_SIZE];
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +prepare (void)
+ +{
+ + memset (string1, 'a', STRING_SIZE - 1);
+ + string1[STRING_SIZE - 100] = 'c';
+ + char *p = strrchr (string1, 'c');
+ + if (p == &string1[STRING_SIZE - 100])
+ + return EXIT_SUCCESS;
+ + else
+ + return EXIT_FAILURE;
+ +}
+ +
+ +__attribute__ ((noinline, noclone))
+ +static int
+ +function (void)
+ +{
+ + char *p = strrchr (string1, 'c');
+ + if (p == &string1[STRING_SIZE - 100])
+ + return 0;
+ + else
+ + return 1;
+ +}
+ +
+ +static int
+ +do_test (void)
+ +{
+ + return do_test_1 ("strrchr", LOOP, prepare, function);
+ +}
+ diff --git a/sysdeps/x86/tst-wcsncmp-rtm.c b/sysdeps/x86/tst-wcsncmp-rtm.c
+ new file mode 100644
+ index 0000000000..bad3b86378
+ --- /dev/null
+ +++ b/sysdeps/x86/tst-wcsncmp-rtm.c
+ @@ -0,0 +1,21 @@
+ +/* Test case for wcsncmp inside a transactionally executing RTM region.
+ + Copyright (C) 2022 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#define WIDE 1
+ +#include <wchar.h>
+ +#include "tst-strncmp-rtm.c"
+ diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
+ index d51cf03ac9..b1951adce9 100644
+ --- a/sysdeps/x86_64/Makefile
+ +++ b/sysdeps/x86_64/Makefile
+ @@ -20,6 +20,8 @@ endif
+ ifeq ($(subdir),string)
+ sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
+ gen-as-const-headers += locale-defines.sym
+ +tests += \
+ + tst-rsi-strlen
+ endif
+
+ ifeq ($(subdir),elf)
+ @@ -150,6 +152,11 @@ ifeq ($(subdir),csu)
+ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
+ endif
+
+ +ifeq ($(subdir),wcsmbs)
+ +tests += \
+ + tst-rsi-wcslen
+ +endif
+ +
+ $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os
+ $(make-target-directory)
+ rm -f $@
+ diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
-old mode 100644
-new mode 100755
+ index 84f82c2406..fc1840e23f
+ --- a/sysdeps/x86_64/configure
+ +++ b/sysdeps/x86_64/configure
+ @@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then
+ build_mathvec=yes
+ fi
+
+ -if test "$static_pie" = yes; then
+ - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
+ -$as_echo_n "checking for linker static PIE support... " >&6; }
+ -if ${libc_cv_ld_static_pie+:} false; then :
+ - $as_echo_n "(cached) " >&6
+ -else
+ - cat > conftest.s <<\EOF
+ - .text
+ - .global _start
+ - .weak foo
+ -_start:
+ - leaq foo(%rip), %rax
+ -EOF
+ - libc_cv_pie_option="-Wl,-pie"
+ - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
+ - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ - (eval $ac_try) 2>&5
+ - ac_status=$?
+ - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ - test $ac_status = 0; }; }; then
+ - libc_cv_ld_static_pie=yes
+ - else
+ - libc_cv_ld_static_pie=no
+ - fi
+ -rm -f conftest*
+ -fi
+ -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
+ -$as_echo "$libc_cv_ld_static_pie" >&6; }
+ - if test "$libc_cv_ld_static_pie" != yes; then
+ - as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
+ - fi
+ -fi
+ -
+ $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
+
+
+ diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
+ index cdaba0c075..611a7d9ba3 100644
+ --- a/sysdeps/x86_64/configure.ac
+ +++ b/sysdeps/x86_64/configure.ac
+ @@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then
+ build_mathvec=yes
+ fi
+
+ -dnl Check if linker supports static PIE with the fix for
+ -dnl
+ -dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
+ -dnl
+ -if test "$static_pie" = yes; then
+ - AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
+ -cat > conftest.s <<\EOF
+ - .text
+ - .global _start
+ - .weak foo
+ -_start:
+ - leaq foo(%rip), %rax
+ -EOF
+ - libc_cv_pie_option="-Wl,-pie"
+ - if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
+ - libc_cv_ld_static_pie=yes
+ - else
+ - libc_cv_ld_static_pie=no
+ - fi
+ -rm -f conftest*])
+ - if test "$libc_cv_ld_static_pie" != yes; then
+ - AC_MSG_ERROR([linker support for static PIE needed])
+ - fi
+ -fi
+ -
+ dnl It is always possible to access static and hidden symbols in an
+ dnl position independent way.
+ AC_DEFINE(PI_STATIC_AND_HIDDEN)
+ diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
+ index 8e9baffeb4..74029871d8 100644
+ --- a/sysdeps/x86_64/dl-machine.h
+ +++ b/sysdeps/x86_64/dl-machine.h
+ @@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
+ {
+ # ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+ - && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+ - _dl_error_printf ("\
+ + if (sym_map->l_type == lt_executable)
+ + _dl_fatal_printf ("\
+ +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
+ +and creates an unsatisfiable circular dependency.\n",
+ + RTLD_PROGNAME, strtab + refsym->st_name,
+ + map->l_name);
+ + else
+ + _dl_error_printf ("\
+ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+ - RTLD_PROGNAME, map->l_name,
+ - sym_map->l_name,
+ - strtab + refsym->st_name);
+ + RTLD_PROGNAME, map->l_name,
+ + sym_map->l_name,
+ + strtab + refsym->st_name);
+ }
+ # endif
+ value = ((ElfW(Addr) (*) (void)) value) ();
+ diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
+ index a5c879d2af..070e5ef90b 100644
+ --- a/sysdeps/x86_64/memchr.S
+ +++ b/sysdeps/x86_64/memchr.S
+ @@ -21,9 +21,11 @@
+ #ifdef USE_AS_WMEMCHR
+ # define MEMCHR wmemchr
+ # define PCMPEQ pcmpeqd
+ +# define CHAR_PER_VEC 4
+ #else
+ # define MEMCHR memchr
+ # define PCMPEQ pcmpeqb
+ +# define CHAR_PER_VEC 16
+ #endif
+
+ /* fast SSE2 version with using pmaxub and 64 byte loop */
+ @@ -33,15 +35,14 @@ ENTRY(MEMCHR)
+ movd %esi, %xmm1
+ mov %edi, %ecx
+
+ +#ifdef __ILP32__
+ + /* Clear the upper 32 bits. */
+ + movl %edx, %edx
+ +#endif
+ #ifdef USE_AS_WMEMCHR
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+ - shl $2, %RDX_LP
+ #else
+ -# ifdef __ILP32__
+ - /* Clear the upper 32 bits. */
+ - movl %edx, %edx
+ -# endif
+ punpcklbw %xmm1, %xmm1
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+ @@ -60,13 +61,16 @@ ENTRY(MEMCHR)
+ test %eax, %eax
+
+ jnz L(matches_1)
+ - sub $16, %rdx
+ + sub $CHAR_PER_VEC, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ and $15, %ecx
+ and $-16, %rdi
+ +#ifdef USE_AS_WMEMCHR
+ + shr $2, %ecx
+ +#endif
+ add %rcx, %rdx
+ - sub $64, %rdx
+ + sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ jmp L(loop_prolog)
+
+ @@ -77,16 +81,21 @@ L(crosscache):
+ movdqa (%rdi), %xmm0
+
+ PCMPEQ %xmm1, %xmm0
+ -/* Check if there is a match. */
+ + /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+ -/* Remove the leading bytes. */
+ + /* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+ -/* Check which byte is a match. */
+ + /* Check which byte is a match. */
+ bsf %eax, %eax
+ -
+ +#ifdef USE_AS_WMEMCHR
+ + mov %eax, %esi
+ + shr $2, %esi
+ + sub %rsi, %rdx
+ +#else
+ sub %rax, %rdx
+ +#endif
+ jbe L(return_null)
+ add %rdi, %rax
+ add %rcx, %rax
+ @@ -94,15 +103,18 @@ L(crosscache):
+
+ .p2align 4
+ L(unaligned_no_match):
+ - /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+ possible addition overflow. */
+ neg %rcx
+ add $16, %rcx
+ +#ifdef USE_AS_WMEMCHR
+ + shr $2, %ecx
+ +#endif
+ sub %rcx, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ - sub $64, %rdx
+ + sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ .p2align 4
+ @@ -135,7 +147,7 @@ L(loop_prolog):
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+ - sub $64, %rdx
+ + sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ movdqa (%rdi), %xmm0
+ @@ -167,11 +179,14 @@ L(loop_prolog):
+ mov %rdi, %rcx
+ and $-64, %rdi
+ and $63, %ecx
+ +#ifdef USE_AS_WMEMCHR
+ + shr $2, %ecx
+ +#endif
+ add %rcx, %rdx
+
+ .p2align 4
+ L(align64_loop):
+ - sub $64, %rdx
+ + sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+ @@ -218,7 +233,7 @@ L(align64_loop):
+
+ .p2align 4
+ L(exit_loop):
+ - add $32, %edx
+ + add $(CHAR_PER_VEC * 2), %edx
+ jle L(exit_loop_32)
+
+ movdqa (%rdi), %xmm0
+ @@ -238,7 +253,7 @@ L(exit_loop):
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+ - sub $16, %edx
+ + sub $CHAR_PER_VEC, %edx
+ jle L(return_null)
+
+ PCMPEQ 48(%rdi), %xmm1
+ @@ -250,13 +265,13 @@ L(exit_loop):
+
+ .p2align 4
+ L(exit_loop_32):
+ - add $32, %edx
+ + add $(CHAR_PER_VEC * 2), %edx
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches_1)
+ - sub $16, %edx
+ + sub $CHAR_PER_VEC, %edx
+ jbe L(return_null)
+
+ PCMPEQ 16(%rdi), %xmm1
+ @@ -293,7 +308,13 @@ L(matches32):
+ .p2align 4
+ L(matches_1):
+ bsf %eax, %eax
+ +#ifdef USE_AS_WMEMCHR
+ + mov %eax, %esi
+ + shr $2, %esi
+ + sub %rsi, %rdx
+ +#else
+ sub %rax, %rdx
+ +#endif
+ jbe L(return_null)
+ add %rdi, %rax
+ ret
+ @@ -301,7 +322,13 @@ L(matches_1):
+ .p2align 4
+ L(matches16_1):
+ bsf %eax, %eax
+ +#ifdef USE_AS_WMEMCHR
+ + mov %eax, %esi
+ + shr $2, %esi
+ + sub %rsi, %rdx
+ +#else
+ sub %rax, %rdx
+ +#endif
+ jbe L(return_null)
+ lea 16(%rdi, %rax), %rax
+ ret
+ @@ -309,7 +336,13 @@ L(matches16_1):
+ .p2align 4
+ L(matches32_1):
+ bsf %eax, %eax
+ +#ifdef USE_AS_WMEMCHR
+ + mov %eax, %esi
+ + shr $2, %esi
+ + sub %rsi, %rdx
+ +#else
+ sub %rax, %rdx
+ +#endif
+ jbe L(return_null)
+ lea 32(%rdi, %rax), %rax
+ ret
+ @@ -317,7 +350,13 @@ L(matches32_1):
+ .p2align 4
+ L(matches48_1):
+ bsf %eax, %eax
+ +#ifdef USE_AS_WMEMCHR
+ + mov %eax, %esi
+ + shr $2, %esi
+ + sub %rsi, %rdx
+ +#else
+ sub %rax, %rdx
+ +#endif
+ jbe L(return_null)
+ lea 48(%rdi, %rax), %rax
+ ret
+ diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
+ index 395e432c09..da1446d731 100644
+ --- a/sysdeps/x86_64/multiarch/Makefile
+ +++ b/sysdeps/x86_64/multiarch/Makefile
+ @@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
+ memmove-avx512-unaligned-erms \
+ memset-sse2-unaligned-erms \
+ memset-avx2-unaligned-erms \
+ - memset-avx512-unaligned-erms
+ + memset-avx512-unaligned-erms \
+ + memchr-avx2-rtm \
+ + memcmp-avx2-movbe-rtm \
+ + memmove-avx-unaligned-erms-rtm \
+ + memrchr-avx2-rtm \
+ + memset-avx2-unaligned-erms-rtm \
+ + rawmemchr-avx2-rtm \
+ + strchr-avx2-rtm \
+ + strcmp-avx2-rtm \
+ + strchrnul-avx2-rtm \
+ + stpcpy-avx2-rtm \
+ + stpncpy-avx2-rtm \
+ + strcat-avx2-rtm \
+ + strcpy-avx2-rtm \
+ + strlen-avx2-rtm \
+ + strncat-avx2-rtm \
+ + strncmp-avx2-rtm \
+ + strncpy-avx2-rtm \
+ + strnlen-avx2-rtm \
+ + strrchr-avx2-rtm \
+ + memchr-evex \
+ + memcmp-evex-movbe \
+ + memmove-evex-unaligned-erms \
+ + memrchr-evex \
+ + memset-evex-unaligned-erms \
+ + rawmemchr-evex \
+ + stpcpy-evex \
+ + stpncpy-evex \
+ + strcat-evex \
+ + strchr-evex \
+ + strchrnul-evex \
+ + strcmp-evex \
+ + strcpy-evex \
+ + strlen-evex \
+ + strncat-evex \
+ + strncmp-evex \
+ + strncpy-evex \
+ + strnlen-evex \
+ + strrchr-evex
+ CFLAGS-varshift.c += -msse4
+ CFLAGS-strcspn-c.c += -msse4
+ CFLAGS-strpbrk-c.c += -msse4
+ @@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
+ wcscpy-ssse3 wcscpy-c \
+ wcschr-sse2 wcschr-avx2 \
+ wcsrchr-sse2 wcsrchr-avx2 \
+ - wcsnlen-sse4_1 wcsnlen-c \
+ - wcslen-sse2 wcslen-avx2 wcsnlen-avx2
+ + wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
+ + wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
+ + wcschr-avx2-rtm \
+ + wcscmp-avx2-rtm \
+ + wcslen-avx2-rtm \
+ + wcsncmp-avx2-rtm \
+ + wcsnlen-avx2-rtm \
+ + wcsrchr-avx2-rtm \
+ + wmemchr-avx2-rtm \
+ + wmemcmp-avx2-movbe-rtm \
+ + wcschr-evex \
+ + wcscmp-evex \
+ + wcslen-evex \
+ + wcsncmp-evex \
+ + wcsnlen-evex \
+ + wcsrchr-evex \
+ + wmemchr-evex \
+ + wmemcmp-evex-movbe
+ endif
+
+ ifeq ($(subdir),debug)
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
+ index 69f30398ae..925e5b61eb 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
+ @@ -21,16 +21,28 @@
+
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + && CPU_FEATURES_CPU_P (cpu_features, BMI2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ - return OPTIMIZE (avx2);
+ + {
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + return OPTIMIZE (evex);
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + return OPTIMIZE (avx2);
+ + }
+
+ return OPTIMIZE (sse2);
+ }
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+ index ce7eb1eecf..e712b148f5 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+ +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+ @@ -41,8 +41,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/memchr.c. */
+ IFUNC_IMPL (i, name, memchr,
+ IFUNC_IMPL_ADD (array, i, memchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __memchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, memchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, memchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __memchr_evex)
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/memcmp.c. */
+ @@ -51,6 +62,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ (HAS_ARCH_FEATURE (AVX2_Usable)
+ && HAS_CPU_FEATURE (MOVBE)),
+ __memcmp_avx2_movbe)
+ + IFUNC_IMPL_ADD (array, i, memcmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (MOVBE)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memcmp_avx2_movbe_rtm)
+ + IFUNC_IMPL_ADD (array, i, memcmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (MOVBE)),
+ + __memcmp_evex_movbe)
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1),
+ __memcmp_sse4_1)
+ IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
+ @@ -64,10 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memmove_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memmove_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ @@ -75,6 +96,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memmove_chk_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memmove_chk_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memmove_chk_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memmove_chk_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memmove_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memmove_chk_ssse3_back)
+ @@ -97,14 +132,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memmove_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, memmove,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memmove_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, memmove,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memmove_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, memmove,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memmove_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, memmove,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memmove_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memmove_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memmove_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memmove_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
+ __memmove_ssse3_back)
+ @@ -119,8 +168,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/memrchr.c. */
+ IFUNC_IMPL (i, name, memrchr,
+ IFUNC_IMPL_ADD (array, i, memrchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __memrchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, memrchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memrchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, memrchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __memrchr_evex)
+ +
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
+
+ #ifdef SHARED
+ @@ -139,10 +200,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_chk_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memset_chk_avx2_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memset_chk,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memset_chk_avx2_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memset_chk,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __memset_chk_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, __memset_chk,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __memset_chk_evex_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, __memset_chk,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ __memset_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ __memset_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ @@ -164,10 +243,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __memset_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memset_avx2_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, memset,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memset_avx2_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, memset,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __memset_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, memset,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __memset_evex_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, memset,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ __memset_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ __memset_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memset,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ @@ -177,22 +274,55 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */
+ IFUNC_IMPL (i, name, rawmemchr,
+ IFUNC_IMPL_ADD (array, i, rawmemchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __rawmemchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, rawmemchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __rawmemchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, rawmemchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __rawmemchr_evex)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strlen.c. */
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __strlen_avx2)
+ + IFUNC_IMPL_ADD (array, i, strlen,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strlen_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strlen,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strlen_evex)
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strnlen.c. */
+ IFUNC_IMPL (i, name, strnlen,
+ IFUNC_IMPL_ADD (array, i, strnlen,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __strnlen_avx2)
+ + IFUNC_IMPL_ADD (array, i, strnlen,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strnlen_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strnlen,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strnlen_evex)
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/stpncpy.c. */
+ @@ -201,6 +331,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ __stpncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpncpy, HAS_ARCH_FEATURE (AVX2_Usable),
+ __stpncpy_avx2)
+ + IFUNC_IMPL_ADD (array, i, stpncpy,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __stpncpy_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, stpncpy,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __stpncpy_evex)
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1,
+ __stpncpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+ @@ -211,6 +349,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ __stpcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpcpy, HAS_ARCH_FEATURE (AVX2_Usable),
+ __stpcpy_avx2)
+ + IFUNC_IMPL_ADD (array, i, stpcpy,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __stpcpy_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, stpcpy,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __stpcpy_evex)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
+
+ @@ -245,6 +391,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strcat,
+ IFUNC_IMPL_ADD (array, i, strcat, HAS_ARCH_FEATURE (AVX2_Usable),
+ __strcat_avx2)
+ + IFUNC_IMPL_ADD (array, i, strcat,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strcat_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strcat,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __strcat_evex)
+ IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
+ __strcat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
+ @@ -253,23 +407,56 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/strchr.c. */
+ IFUNC_IMPL (i, name, strchr,
+ IFUNC_IMPL_ADD (array, i, strchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __strchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, strchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strchr_evex)
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strchrnul.c. */
+ IFUNC_IMPL (i, name, strchrnul,
+ IFUNC_IMPL_ADD (array, i, strchrnul,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __strchrnul_avx2)
+ + IFUNC_IMPL_ADD (array, i, strchrnul,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strchrnul_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strchrnul,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strchrnul_evex)
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strrchr.c. */
+ IFUNC_IMPL (i, name, strrchr,
+ IFUNC_IMPL_ADD (array, i, strrchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __strrchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, strrchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strrchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strrchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strrchr_evex)
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strcmp.c. */
+ @@ -277,6 +464,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strcmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strcmp_avx2)
+ + IFUNC_IMPL_ADD (array, i, strcmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strcmp_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strcmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __strcmp_evex)
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
+ __strcmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
+ @@ -288,6 +484,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strcpy,
+ IFUNC_IMPL_ADD (array, i, strcpy, HAS_ARCH_FEATURE (AVX2_Usable),
+ __strcpy_avx2)
+ + IFUNC_IMPL_ADD (array, i, strcpy,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strcpy_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strcpy,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __strcpy_evex)
+ IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
+ __strcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
+ @@ -331,6 +535,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strncat,
+ IFUNC_IMPL_ADD (array, i, strncat, HAS_ARCH_FEATURE (AVX2_Usable),
+ __strncat_avx2)
+ + IFUNC_IMPL_ADD (array, i, strncat,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strncat_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strncat,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __strncat_evex)
+ IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
+ __strncat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncat, 1,
+ @@ -341,6 +553,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strncpy,
+ IFUNC_IMPL_ADD (array, i, strncpy, HAS_ARCH_FEATURE (AVX2_Usable),
+ __strncpy_avx2)
+ + IFUNC_IMPL_ADD (array, i, strncpy,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strncpy_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strncpy,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __strncpy_evex)
+ IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
+ __strncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncpy, 1,
+ @@ -368,29 +588,73 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/wcschr.c. */
+ IFUNC_IMPL (i, name, wcschr,
+ IFUNC_IMPL_ADD (array, i, wcschr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcschr_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcschr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcschr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcschr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcschr_evex)
+ IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
+ IFUNC_IMPL (i, name, wcsrchr,
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcsrchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcsrchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcsrchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcsrchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcsrchr_evex)
+ IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscmp.c. */
+ IFUNC_IMPL (i, name, wcscmp,
+ IFUNC_IMPL_ADD (array, i, wcscmp,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcscmp_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcscmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcscmp_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcscmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcscmp_evex)
+ IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
+ IFUNC_IMPL (i, name, wcsncmp,
+ IFUNC_IMPL_ADD (array, i, wcsncmp,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcsncmp_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcsncmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcsncmp_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcsncmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcsncmp_evex)
+ IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscpy.c. */
+ @@ -402,15 +666,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/wcslen.c. */
+ IFUNC_IMPL (i, name, wcslen,
+ IFUNC_IMPL_ADD (array, i, wcslen,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcslen_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcslen,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcslen_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcslen,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcslen_evex)
+ + IFUNC_IMPL_ADD (array, i, wcslen,
+ + CPU_FEATURE_USABLE (SSE4_1),
+ + __wcslen_sse4_1)
+ IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
+ IFUNC_IMPL (i, name, wcsnlen,
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wcsnlen_avx2)
+ + IFUNC_IMPL_ADD (array, i, wcsnlen,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wcsnlen_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wcsnlen,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wcsnlen_evex)
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ HAS_CPU_FEATURE (SSE4_1),
+ __wcsnlen_sse4_1)
+ @@ -419,8 +708,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/wmemchr.c. */
+ IFUNC_IMPL (i, name, wmemchr,
+ IFUNC_IMPL_ADD (array, i, wmemchr,
+ - HAS_ARCH_FEATURE (AVX2_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ __wmemchr_avx2)
+ + IFUNC_IMPL_ADD (array, i, wmemchr,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (BMI2)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wmemchr_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, wmemchr,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (BMI2)),
+ + __wmemchr_evex)
+ IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
+ @@ -429,6 +729,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ (HAS_ARCH_FEATURE (AVX2_Usable)
+ && HAS_CPU_FEATURE (MOVBE)),
+ __wmemcmp_avx2_movbe)
+ + IFUNC_IMPL_ADD (array, i, wmemcmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (MOVBE)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wmemcmp_avx2_movbe_rtm)
+ + IFUNC_IMPL_ADD (array, i, wmemcmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)
+ + && HAS_CPU_FEATURE (MOVBE)),
+ + __wmemcmp_evex_movbe)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1),
+ __wmemcmp_sse4_1)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
+ @@ -443,7 +753,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wmemset_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, wmemset,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __wmemset_avx2_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, wmemset,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __wmemset_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, wmemset,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __wmemset_avx512_unaligned))
+
+ #ifdef SHARED
+ @@ -453,10 +770,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ @@ -464,6 +781,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memcpy_chk_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memcpy_chk_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memcpy_chk_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memcpy_chk_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memcpy_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __memcpy_chk_ssse3_back)
+ @@ -486,6 +817,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __memcpy_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, memcpy,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memcpy_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, memcpy,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __memcpy_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, memcpy,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memcpy_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, memcpy,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __memcpy_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+ __memcpy_ssse3_back)
+ IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
+ @@ -494,10 +839,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __memcpy_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __memcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1,
+ @@ -511,10 +856,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __mempcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __mempcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ @@ -522,6 +867,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_chk_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __mempcpy_chk_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __mempcpy_chk_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __mempcpy_chk_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __mempcpy_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_chk_ssse3_back)
+ @@ -542,10 +901,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __mempcpy_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __mempcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ - HAS_ARCH_FEATURE (AVX512F_Usable),
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ __mempcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ @@ -553,6 +912,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ HAS_ARCH_FEATURE (AVX_Usable),
+ __mempcpy_avx_unaligned_erms)
+ + IFUNC_IMPL_ADD (array, i, mempcpy,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __mempcpy_avx_unaligned_rtm)
+ + IFUNC_IMPL_ADD (array, i, mempcpy,
+ + (HAS_ARCH_FEATURE (AVX_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __mempcpy_avx_unaligned_erms_rtm)
+ + IFUNC_IMPL_ADD (array, i, mempcpy,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __mempcpy_evex_unaligned)
+ + IFUNC_IMPL_ADD (array, i, mempcpy,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __mempcpy_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ __mempcpy_ssse3_back)
+ IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
+ @@ -568,6 +941,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strncmp,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __strncmp_avx2)
+ + IFUNC_IMPL_ADD (array, i, strncmp,
+ + (HAS_ARCH_FEATURE (AVX2_Usable)
+ + && HAS_CPU_FEATURE (RTM)),
+ + __strncmp_avx2_rtm)
+ + IFUNC_IMPL_ADD (array, i, strncmp,
+ + (HAS_ARCH_FEATURE (AVX512VL_Usable)
+ + && HAS_ARCH_FEATURE (AVX512BW_Usable)),
+ + __strncmp_evex)
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
+ __strncmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
+ @@ -582,6 +963,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ HAS_ARCH_FEATURE (AVX2_Usable),
+ __wmemset_chk_avx2_unaligned)
+ + IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ + HAS_ARCH_FEATURE (AVX512VL_Usable),
+ + __wmemset_chk_evex_unaligned)
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ HAS_ARCH_FEATURE (AVX512F_Usable),
+ __wmemset_chk_avx512_unaligned))
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+ index c14db39cf4..ebbb0c01cf 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+ @@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ - return OPTIMIZE (avx2_movbe);
+ + {
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + return OPTIMIZE (evex_movbe);
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_movbe_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + return OPTIMIZE (avx2_movbe);
+ + }
+
+ if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse4_1);
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
+ index 81673d2019..dfc5a28487 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
+ @@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+ attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+ + attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ @@ -48,21 +56,42 @@ IFUNC_SELECTOR (void)
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ - return OPTIMIZE (avx512_no_vzeroupper);
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx512_unaligned_erms);
+
+ - if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ - return OPTIMIZE (avx512_unaligned_erms);
+ + return OPTIMIZE (avx512_unaligned);
+ + }
+
+ - return OPTIMIZE (avx512_unaligned);
+ + return OPTIMIZE (avx512_no_vzeroupper);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ - if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ - return OPTIMIZE (avx_unaligned_erms);
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (evex_unaligned_erms);
+ +
+ + return OPTIMIZE (evex_unaligned);
+ + }
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx_unaligned_erms_rtm);
+ +
+ + return OPTIMIZE (avx_unaligned_rtm);
+ + }
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx_unaligned_erms);
+
+ - return OPTIMIZE (avx_unaligned);
+ + return OPTIMIZE (avx_unaligned);
+ + }
+ }
+
+ if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
+ index d690293385..48fdb24b02 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-memset.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
+ @@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
+ attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
+ + attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ @@ -45,21 +53,44 @@ IFUNC_SELECTOR (void)
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ - return OPTIMIZE (avx512_no_vzeroupper);
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx512_unaligned_erms);
+
+ - if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ - return OPTIMIZE (avx512_unaligned_erms);
+ + return OPTIMIZE (avx512_unaligned);
+ + }
+
+ - return OPTIMIZE (avx512_unaligned);
+ + return OPTIMIZE (avx512_no_vzeroupper);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+ {
+ - if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ - return OPTIMIZE (avx2_unaligned_erms);
+ - else
+ - return OPTIMIZE (avx2_unaligned);
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (evex_unaligned_erms);
+ +
+ + return OPTIMIZE (evex_unaligned);
+ + }
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx2_unaligned_erms_rtm);
+ +
+ + return OPTIMIZE (avx2_unaligned_rtm);
+ + }
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + {
+ + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ + return OPTIMIZE (avx2_unaligned_erms);
+ +
+ + return OPTIMIZE (avx2_unaligned);
+ + }
+ }
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+ index ae4f451803..f38a3b7501 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+ @@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ - return OPTIMIZE (avx2);
+ + {
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + return OPTIMIZE (evex);
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + return OPTIMIZE (avx2);
+ + }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return OPTIMIZE (sse2_unaligned);
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+ new file mode 100644
+ index 0000000000..564cc8cbec
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+ @@ -0,0 +1,52 @@
+ +/* Common definition for ifunc selections for wcslen and wcsnlen
+ + All versions must be listed in ifunc-impl-list.c.
+ + Copyright (C) 2017-2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#include <init-arch.h>
+ +
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+ +
+ +static inline void *
+ +IFUNC_SELECTOR (void)
+ +{
+ + const struct cpu_features* cpu_features = __get_cpu_features ();
+ +
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + && CPU_FEATURES_CPU_P (cpu_features, BMI2)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ + {
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable))
+ + return OPTIMIZE (evex);
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + return OPTIMIZE (avx2);
+ + }
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+ + return OPTIMIZE (sse4_1);
+ +
+ + return OPTIMIZE (sse2);
+ +}
+ diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+ index 583f6310a1..0ce29a229d 100644
+ --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+ +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+ @@ -20,6 +20,9 @@
+
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
+ + attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+ static inline void *
+ @@ -27,14 +30,21 @@ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ - return OPTIMIZE (avx512_unaligned);
+ - else
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable))
+ + {
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ + return OPTIMIZE (avx512_unaligned);
+ +
+ + return OPTIMIZE (evex_unaligned);
+ + }
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_unaligned_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx2_unaligned);
+ }
+
+ diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..87b076c7c4
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef MEMCHR
+ +# define MEMCHR __memchr_avx2_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "memchr-avx2.S"
+ diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
+ index e5a9abd211..0987616a1b 100644
+ --- a/sysdeps/x86_64/multiarch/memchr-avx2.S
+ +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
+ @@ -26,319 +26,407 @@
+
+ # ifdef USE_AS_WMEMCHR
+ # define VPCMPEQ vpcmpeqd
+ +# define VPBROADCAST vpbroadcastd
+ +# define CHAR_SIZE 4
+ # else
+ # define VPCMPEQ vpcmpeqb
+ +# define VPBROADCAST vpbroadcastb
+ +# define CHAR_SIZE 1
+ +# endif
+ +
+ +# ifdef USE_AS_RAWMEMCHR
+ +# define ERAW_PTR_REG ecx
+ +# define RRAW_PTR_REG rcx
+ +# define ALGN_PTR_REG rdi
+ +# else
+ +# define ERAW_PTR_REG edi
+ +# define RRAW_PTR_REG rdi
+ +# define ALGN_PTR_REG rcx
+ # endif
+
+ # ifndef VZEROUPPER
+ # define VZEROUPPER vzeroupper
+ # endif
+
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ # define VEC_SIZE 32
+ +# define PAGE_SIZE 4096
+ +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
+
+ - .section .text.avx,"ax",@progbits
+ + .section SECTION(.text),"ax",@progbits
+ ENTRY (MEMCHR)
+ # ifndef USE_AS_RAWMEMCHR
+ /* Check for zero length. */
+ +# ifdef __ILP32__
+ + /* Clear upper bits. */
+ + and %RDX_LP, %RDX_LP
+ +# else
+ test %RDX_LP, %RDX_LP
+ +# endif
+ jz L(null)
+ # endif
+ - movl %edi, %ecx
+ - /* Broadcast CHAR to YMM0. */
+ + /* Broadcast CHAR to YMMMATCH. */
+ vmovd %esi, %xmm0
+ -# ifdef USE_AS_WMEMCHR
+ - shl $2, %RDX_LP
+ - vpbroadcastd %xmm0, %ymm0
+ -# else
+ -# ifdef __ILP32__
+ - /* Clear the upper 32 bits. */
+ - movl %edx, %edx
+ -# endif
+ - vpbroadcastb %xmm0, %ymm0
+ -# endif
+ + VPBROADCAST %xmm0, %ymm0
+ /* Check if we may cross page boundary with one vector load. */
+ - andl $(2 * VEC_SIZE - 1), %ecx
+ - cmpl $VEC_SIZE, %ecx
+ - ja L(cros_page_boundary)
+ + movl %edi, %eax
+ + andl $(PAGE_SIZE - 1), %eax
+ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ + ja L(cross_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. */
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ + VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ -
+ # ifndef USE_AS_RAWMEMCHR
+ - jnz L(first_vec_x0_check)
+ - /* Adjust length and check the end of data. */
+ - subq $VEC_SIZE, %rdx
+ - jbe L(zero)
+ -# else
+ - jnz L(first_vec_x0)
+ + /* If length < CHAR_PER_VEC handle special. */
+ + cmpq $CHAR_PER_VEC, %rdx
+ + jbe L(first_vec_x0)
+ # endif
+ -
+ - /* Align data for aligned loads in the loop. */
+ - addq $VEC_SIZE, %rdi
+ - andl $(VEC_SIZE - 1), %ecx
+ - andq $-VEC_SIZE, %rdi
+ + testl %eax, %eax
+ + jz L(aligned_more)
+ + tzcntl %eax, %eax
+ + addq %rdi, %rax
+ + VZEROUPPER_RETURN
+
+ # ifndef USE_AS_RAWMEMCHR
+ - /* Adjust length. */
+ - addq %rcx, %rdx
+ + .p2align 5
+ +L(first_vec_x0):
+ + /* Check if first match was before length. */
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply length by 4 to get byte count. */
+ + sall $2, %edx
+ +# endif
+ + xorl %ecx, %ecx
+ + cmpl %eax, %edx
+ + leaq (%rdi, %rax), %rax
+ + cmovle %rcx, %rax
+ + VZEROUPPER_RETURN
+
+ - subq $(VEC_SIZE * 4), %rdx
+ - jbe L(last_4x_vec_or_less)
+ +L(null):
+ + xorl %eax, %eax
+ + ret
+ # endif
+ - jmp L(more_4x_vec)
+ -
+ .p2align 4
+ -L(cros_page_boundary):
+ - andl $(VEC_SIZE - 1), %ecx
+ - andq $-VEC_SIZE, %rdi
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ +L(cross_page_boundary):
+ + /* Save pointer before aligning as its original value is
+ + necessary for computer return address if byte is found or
+ + adjusting length if it is not and this is memchr. */
+ + movq %rdi, %rcx
+ + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
+ + and rdi for rawmemchr. */
+ + orq $(VEC_SIZE - 1), %ALGN_PTR_REG
+ + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Calculate length until end of page (length checked for a
+ + match). */
+ + leaq 1(%ALGN_PTR_REG), %rsi
+ + subq %RRAW_PTR_REG, %rsi
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Divide bytes by 4 to get wchar_t count. */
+ + shrl $2, %esi
+ +# endif
+ +# endif
+ /* Remove the leading bytes. */
+ - sarl %cl, %eax
+ - testl %eax, %eax
+ - jz L(aligned_more)
+ - tzcntl %eax, %eax
+ + sarxl %ERAW_PTR_REG, %eax, %eax
+ # ifndef USE_AS_RAWMEMCHR
+ /* Check the end of data. */
+ - cmpq %rax, %rdx
+ - jbe L(zero)
+ + cmpq %rsi, %rdx
+ + jbe L(first_vec_x0)
+ # endif
+ + testl %eax, %eax
+ + jz L(cross_page_continue)
+ + tzcntl %eax, %eax
+ + addq %RRAW_PTR_REG, %rax
+ +L(return_vzeroupper):
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+ +
+ + .p2align 4
+ +L(first_vec_x1):
+ + tzcntl %eax, %eax
+ + incq %rdi
+ addq %rdi, %rax
+ - addq %rcx, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ -L(aligned_more):
+ -# ifndef USE_AS_RAWMEMCHR
+ - /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
+ - instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
+ - overflow. */
+ - negq %rcx
+ - addq $VEC_SIZE, %rcx
+ +L(first_vec_x2):
+ + tzcntl %eax, %eax
+ + addq $(VEC_SIZE + 1), %rdi
+ + addq %rdi, %rax
+ + VZEROUPPER_RETURN
+
+ - /* Check the end of data. */
+ - subq %rcx, %rdx
+ - jbe L(zero)
+ -# endif
+ + .p2align 4
+ +L(first_vec_x3):
+ + tzcntl %eax, %eax
+ + addq $(VEC_SIZE * 2 + 1), %rdi
+ + addq %rdi, %rax
+ + VZEROUPPER_RETURN
+
+ - addq $VEC_SIZE, %rdi
+
+ -# ifndef USE_AS_RAWMEMCHR
+ - subq $(VEC_SIZE * 4), %rdx
+ - jbe L(last_4x_vec_or_less)
+ -# endif
+ + .p2align 4
+ +L(first_vec_x4):
+ + tzcntl %eax, %eax
+ + addq $(VEC_SIZE * 3 + 1), %rdi
+ + addq %rdi, %rax
+ + VZEROUPPER_RETURN
+
+ -L(more_4x_vec):
+ + .p2align 4
+ +L(aligned_more):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ - vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ - jnz L(first_vec_x0)
+
+ - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ +# ifndef USE_AS_RAWMEMCHR
+ +L(cross_page_continue):
+ + /* Align data to VEC_SIZE - 1. */
+ + xorl %ecx, %ecx
+ + subl %edi, %ecx
+ + orq $(VEC_SIZE - 1), %rdi
+ + /* esi is for adjusting length to see if near the end. */
+ + leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Divide bytes by 4 to get the wchar_t count. */
+ + sarl $2, %esi
+ +# endif
+ +# else
+ + orq $(VEC_SIZE - 1), %rdi
+ +L(cross_page_continue):
+ +# endif
+ + /* Load first VEC regardless. */
+ + VPCMPEQ 1(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Adjust length. If near end handle specially. */
+ + subq %rsi, %rdx
+ + jbe L(last_4x_vec_or_less)
+ +# endif
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+ - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+ - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+ - addq $(VEC_SIZE * 4), %rdi
+ -
+ -# ifndef USE_AS_RAWMEMCHR
+ - subq $(VEC_SIZE * 4), %rdx
+ - jbe L(last_4x_vec_or_less)
+ -# endif
+ -
+ - /* Align data to 4 * VEC_SIZE. */
+ - movq %rdi, %rcx
+ - andl $(4 * VEC_SIZE - 1), %ecx
+ - andq $-(4 * VEC_SIZE), %rdi
+ + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
+ + vpmovmskb %ymm1, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x4)
+
+ # ifndef USE_AS_RAWMEMCHR
+ - /* Adjust length. */
+ + /* Check if at last VEC_SIZE * 4 length. */
+ + subq $(CHAR_PER_VEC * 4), %rdx
+ + jbe L(last_4x_vec_or_less_cmpeq)
+ + /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
+ + length. */
+ + incq %rdi
+ + movl %edi, %ecx
+ + orq $(VEC_SIZE * 4 - 1), %rdi
+ + andl $(VEC_SIZE * 4 - 1), %ecx
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Divide bytes by 4 to get the wchar_t count. */
+ + sarl $2, %ecx
+ +# endif
+ addq %rcx, %rdx
+ +# else
+ + /* Align data to VEC_SIZE * 4 - 1 for loop. */
+ + incq %rdi
+ + orq $(VEC_SIZE * 4 - 1), %rdi
+ # endif
+
+ + /* Compare 4 * VEC at a time forward. */
+ .p2align 4
+ L(loop_4x_vec):
+ - /* Compare 4 * VEC at a time forward. */
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
+ - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
+ - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
+ -
+ + VPCMPEQ 1(%rdi), %ymm0, %ymm1
+ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2
+ + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3
+ + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+ vpor %ymm5, %ymm6, %ymm5
+
+ - vpmovmskb %ymm5, %eax
+ - testl %eax, %eax
+ - jnz L(4x_vec_end)
+ -
+ - addq $(VEC_SIZE * 4), %rdi
+ -
+ + vpmovmskb %ymm5, %ecx
+ # ifdef USE_AS_RAWMEMCHR
+ - jmp L(loop_4x_vec)
+ + subq $-(VEC_SIZE * 4), %rdi
+ + testl %ecx, %ecx
+ + jz L(loop_4x_vec)
+ # else
+ - subq $(VEC_SIZE * 4), %rdx
+ - ja L(loop_4x_vec)
+ + testl %ecx, %ecx
+ + jnz L(loop_4x_vec_end)
+
+ -L(last_4x_vec_or_less):
+ - /* Less than 4 * VEC and aligned to VEC_SIZE. */
+ - addl $(VEC_SIZE * 2), %edx
+ - jle L(last_2x_vec)
+ + subq $-(VEC_SIZE * 4), %rdi
+
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ - vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ - jnz L(first_vec_x0)
+ + subq $(CHAR_PER_VEC * 4), %rdx
+ + ja L(loop_4x_vec)
+
+ - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ + /* Fall through into less than 4 remaining vectors of length
+ + case. */
+ + VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ + .p2align 4
+ +L(last_4x_vec_or_less):
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply length by 4 to get byte count. */
+ + sall $2, %edx
+ +# endif
+ + /* Check if first VEC contained match. */
+ testl %eax, %eax
+ - jnz L(first_vec_x1)
+ + jnz L(first_vec_x1_check)
+
+ - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+ - vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ + /* If remaining length > VEC_SIZE * 2. */
+ + addl $(VEC_SIZE * 2), %edx
+ + jg L(last_4x_vec)
+
+ - jnz L(first_vec_x2_check)
+ - subl $VEC_SIZE, %edx
+ - jle L(zero)
+ +L(last_2x_vec):
+ + /* If remaining length < VEC_SIZE. */
+ + addl $VEC_SIZE, %edx
+ + jle L(zero_end)
+
+ - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+ + /* Check VEC2 and compare any match with remaining length. */
+ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ -
+ - jnz L(first_vec_x3_check)
+ - xorl %eax, %eax
+ - VZEROUPPER
+ - ret
+ + tzcntl %eax, %eax
+ + cmpl %eax, %edx
+ + jbe L(set_zero_end)
+ + addq $(VEC_SIZE + 1), %rdi
+ + addq %rdi, %rax
+ +L(zero_end):
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ -L(last_2x_vec):
+ - addl $(VEC_SIZE * 2), %edx
+ - VPCMPEQ (%rdi), %ymm0, %ymm1
+ +L(loop_4x_vec_end):
+ +# endif
+ + /* rawmemchr will fall through into this if match was found in
+ + loop. */
+ +
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ + jnz L(last_vec_x1_return)
+
+ - jnz L(first_vec_x0_check)
+ - subl $VEC_SIZE, %edx
+ - jle L(zero)
+ -
+ - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+ - vpmovmskb %ymm1, %eax
+ + vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+ - jnz L(first_vec_x1_check)
+ - xorl %eax, %eax
+ - VZEROUPPER
+ - ret
+ + jnz L(last_vec_x2_return)
+
+ - .p2align 4
+ -L(first_vec_x0_check):
+ - tzcntl %eax, %eax
+ - /* Check the end of data. */
+ - cmpq %rax, %rdx
+ - jbe L(zero)
+ + vpmovmskb %ymm3, %eax
+ + /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */
+ + salq $32, %rcx
+ + orq %rcx, %rax
+ + tzcntq %rax, %rax
+ +# ifdef USE_AS_RAWMEMCHR
+ + subq $(VEC_SIZE * 2 - 1), %rdi
+ +# else
+ + subq $-(VEC_SIZE * 2 + 1), %rdi
+ +# endif
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ +# ifndef USE_AS_RAWMEMCHR
+
+ .p2align 4
+ L(first_vec_x1_check):
+ tzcntl %eax, %eax
+ - /* Check the end of data. */
+ - cmpq %rax, %rdx
+ - jbe L(zero)
+ - addq $VEC_SIZE, %rax
+ + /* Adjust length. */
+ + subl $-(VEC_SIZE * 4), %edx
+ + /* Check if match within remaining length. */
+ + cmpl %eax, %edx
+ + jbe L(set_zero_end)
+ + incq %rdi
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ + .p2align 4
+ +L(set_zero_end):
+ + xorl %eax, %eax
+ + VZEROUPPER_RETURN
+ +# endif
+
+ .p2align 4
+ -L(first_vec_x2_check):
+ +L(last_vec_x1_return):
+ tzcntl %eax, %eax
+ - /* Check the end of data. */
+ - cmpq %rax, %rdx
+ - jbe L(zero)
+ - addq $(VEC_SIZE * 2), %rax
+ +# ifdef USE_AS_RAWMEMCHR
+ + subq $(VEC_SIZE * 4 - 1), %rdi
+ +# else
+ + incq %rdi
+ +# endif
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ -L(first_vec_x3_check):
+ +L(last_vec_x2_return):
+ tzcntl %eax, %eax
+ - /* Check the end of data. */
+ - cmpq %rax, %rdx
+ - jbe L(zero)
+ - addq $(VEC_SIZE * 3), %rax
+ +# ifdef USE_AS_RAWMEMCHR
+ + subq $(VEC_SIZE * 3 - 1), %rdi
+ +# else
+ + subq $-(VEC_SIZE + 1), %rdi
+ +# endif
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ +# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+ -L(zero):
+ - VZEROUPPER
+ -L(null):
+ - xorl %eax, %eax
+ - ret
+ -# endif
+ +L(last_4x_vec_or_less_cmpeq):
+ + VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
+ + vpmovmskb %ymm1, %eax
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply length by 4 to get byte count. */
+ + sall $2, %edx
+ +# endif
+ + subq $-(VEC_SIZE * 4), %rdi
+ + /* Check first VEC regardless. */
+ + testl %eax, %eax
+ + jnz L(first_vec_x1_check)
+
+ + /* If remaining length <= CHAR_PER_VEC * 2. */
+ + addl $(VEC_SIZE * 2), %edx
+ + jle L(last_2x_vec)
+ .p2align 4
+ -L(first_vec_x0):
+ - tzcntl %eax, %eax
+ - addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ +L(last_4x_vec):
+ + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ + vpmovmskb %ymm1, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2_return)
+
+ - .p2align 4
+ -L(first_vec_x1):
+ - tzcntl %eax, %eax
+ - addq $VEC_SIZE, %rax
+ - addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
+ + vpmovmskb %ymm1, %eax
+
+ - .p2align 4
+ -L(first_vec_x2):
+ + /* Create mask for possible matches within remaining length. */
+ + movq $-1, %rcx
+ + bzhiq %rdx, %rcx, %rcx
+ +
+ + /* Test matches in data against length match. */
+ + andl %ecx, %eax
+ + jnz L(last_vec_x3)
+ +
+ + /* if remaining length <= VEC_SIZE * 3 (Note this is after
+ + remaining length was found to be > VEC_SIZE * 2. */
+ + subl $VEC_SIZE, %edx
+ + jbe L(zero_end2)
+ +
+ + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
+ + vpmovmskb %ymm1, %eax
+ + /* Shift remaining length mask for last VEC. */
+ + shrq $32, %rcx
+ + andl %ecx, %eax
+ + jz L(zero_end2)
+ tzcntl %eax, %eax
+ - addq $(VEC_SIZE * 2), %rax
+ + addq $(VEC_SIZE * 3 + 1), %rdi
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ +L(zero_end2):
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ -L(4x_vec_end):
+ - vpmovmskb %ymm1, %eax
+ - testl %eax, %eax
+ - jnz L(first_vec_x0)
+ - vpmovmskb %ymm2, %eax
+ - testl %eax, %eax
+ - jnz L(first_vec_x1)
+ - vpmovmskb %ymm3, %eax
+ - testl %eax, %eax
+ - jnz L(first_vec_x2)
+ - vpmovmskb %ymm4, %eax
+ - testl %eax, %eax
+ -L(first_vec_x3):
+ +L(last_vec_x3):
+ tzcntl %eax, %eax
+ - addq $(VEC_SIZE * 3), %rax
+ + subq $-(VEC_SIZE * 2 + 1), %rdi
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ +# endif
+
+ END (MEMCHR)
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
+ new file mode 100644
+ index 0000000000..f3fdad4fda
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memchr-evex.S
+ @@ -0,0 +1,478 @@
+ +/* memchr/wmemchr optimized with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#if IS_IN (libc)
+ +
+ +# include <sysdep.h>
+ +
+ +# ifndef MEMCHR
+ +# define MEMCHR __memchr_evex
+ +# endif
+ +
+ +# ifdef USE_AS_WMEMCHR
+ +# define VPBROADCAST vpbroadcastd
+ +# define VPMINU vpminud
+ +# define VPCMP vpcmpd
+ +# define VPCMPEQ vpcmpeqd
+ +# define CHAR_SIZE 4
+ +# else
+ +# define VPBROADCAST vpbroadcastb
+ +# define VPMINU vpminub
+ +# define VPCMP vpcmpb
+ +# define VPCMPEQ vpcmpeqb
+ +# define CHAR_SIZE 1
+ +# endif
+ +
+ +# ifdef USE_AS_RAWMEMCHR
+ +# define RAW_PTR_REG rcx
+ +# define ALGN_PTR_REG rdi
+ +# else
+ +# define RAW_PTR_REG rdi
+ +# define ALGN_PTR_REG rcx
+ +# endif
+ +
+ +# define XMMZERO xmm23
+ +# define YMMZERO ymm23
+ +# define XMMMATCH xmm16
+ +# define YMMMATCH ymm16
+ +# define YMM1 ymm17
+ +# define YMM2 ymm18
+ +# define YMM3 ymm19
+ +# define YMM4 ymm20
+ +# define YMM5 ymm21
+ +# define YMM6 ymm22
+ +
+ +# define VEC_SIZE 32
+ +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
+ +# define PAGE_SIZE 4096
+ +
+ + .section .text.evex,"ax",@progbits
+ +ENTRY (MEMCHR)
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Check for zero length. */
+ + test %RDX_LP, %RDX_LP
+ + jz L(zero)
+ +
+ +# ifdef __ILP32__
+ + /* Clear the upper 32 bits. */
+ + movl %edx, %edx
+ +# endif
+ +# endif
+ + /* Broadcast CHAR to YMMMATCH. */
+ + VPBROADCAST %esi, %YMMMATCH
+ + /* Check if we may cross page boundary with one vector load. */
+ + movl %edi, %eax
+ + andl $(PAGE_SIZE - 1), %eax
+ + cmpl $(PAGE_SIZE - VEC_SIZE), %eax
+ + ja L(cross_page_boundary)
+ +
+ + /* Check the first VEC_SIZE bytes. */
+ + VPCMP $0, (%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* If length < CHAR_PER_VEC handle special. */
+ + cmpq $CHAR_PER_VEC, %rdx
+ + jbe L(first_vec_x0)
+ +# endif
+ + testl %eax, %eax
+ + jz L(aligned_more)
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (%rdi, %rax, CHAR_SIZE), %rax
+ +# else
+ + addq %rdi, %rax
+ +# endif
+ + ret
+ +
+ +# ifndef USE_AS_RAWMEMCHR
+ +L(zero):
+ + xorl %eax, %eax
+ + ret
+ +
+ + .p2align 5
+ +L(first_vec_x0):
+ + /* Check if first match was before length. */
+ + tzcntl %eax, %eax
+ + xorl %ecx, %ecx
+ + cmpl %eax, %edx
+ + leaq (%rdi, %rax, CHAR_SIZE), %rax
+ + cmovle %rcx, %rax
+ + ret
+ +# else
+ + /* NB: first_vec_x0 is 17 bytes which will leave
+ + cross_page_boundary (which is relatively cold) close enough
+ + to ideal alignment. So only realign L(cross_page_boundary) if
+ + rawmemchr. */
+ + .p2align 4
+ +# endif
+ +L(cross_page_boundary):
+ + /* Save pointer before aligning as its original value is
+ + necessary for computer return address if byte is found or
+ + adjusting length if it is not and this is memchr. */
+ + movq %rdi, %rcx
+ + /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi
+ + for rawmemchr. */
+ + andq $-VEC_SIZE, %ALGN_PTR_REG
+ + VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0
+ + kmovd %k0, %r8d
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Divide shift count by 4 since each bit in K0 represent 4
+ + bytes. */
+ + sarl $2, %eax
+ +# endif
+ +# ifndef USE_AS_RAWMEMCHR
+ + movl $(PAGE_SIZE / CHAR_SIZE), %esi
+ + subl %eax, %esi
+ +# endif
+ +# ifdef USE_AS_WMEMCHR
+ + andl $(CHAR_PER_VEC - 1), %eax
+ +# endif
+ + /* Remove the leading bytes. */
+ + sarxl %eax, %r8d, %eax
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Check the end of data. */
+ + cmpq %rsi, %rdx
+ + jbe L(first_vec_x0)
+ +# endif
+ + testl %eax, %eax
+ + jz L(cross_page_continue)
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax
+ +# else
+ + addq %RAW_PTR_REG, %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x1):
+ + tzcntl %eax, %eax
+ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x2):
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x3):
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x4):
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +
+ + .p2align 5
+ +L(aligned_more):
+ + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ + since data is only aligned to VEC_SIZE. */
+ +
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Align data to VEC_SIZE. */
+ +L(cross_page_continue):
+ + xorl %ecx, %ecx
+ + subl %edi, %ecx
+ + andq $-VEC_SIZE, %rdi
+ + /* esi is for adjusting length to see if near the end. */
+ + leal (VEC_SIZE * 5)(%rdi, %rcx), %esi
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Divide bytes by 4 to get the wchar_t count. */
+ + sarl $2, %esi
+ +# endif
+ +# else
+ + andq $-VEC_SIZE, %rdi
+ +L(cross_page_continue):
+ +# endif
+ + /* Load first VEC regardless. */
+ + VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Adjust length. If near end handle specially. */
+ + subq %rsi, %rdx
+ + jbe L(last_4x_vec_or_less)
+ +# endif
+ + testl %eax, %eax
+ + jnz L(first_vec_x1)
+ +
+ + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x2)
+ +
+ + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x3)
+ +
+ + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x4)
+ +
+ +
+ +# ifndef USE_AS_RAWMEMCHR
+ + /* Check if at last CHAR_PER_VEC * 4 length. */
+ + subq $(CHAR_PER_VEC * 4), %rdx
+ + jbe L(last_4x_vec_or_less_cmpeq)
+ + addq $VEC_SIZE, %rdi
+ +
+ + /* Align data to VEC_SIZE * 4 for the loop and readjust length.
+ + */
+ +# ifdef USE_AS_WMEMCHR
+ + movl %edi, %ecx
+ + andq $-(4 * VEC_SIZE), %rdi
+ + andl $(VEC_SIZE * 4 - 1), %ecx
+ + /* NB: Divide bytes by 4 to get the wchar_t count. */
+ + sarl $2, %ecx
+ + addq %rcx, %rdx
+ +# else
+ + addq %rdi, %rdx
+ + andq $-(4 * VEC_SIZE), %rdi
+ + subq %rdi, %rdx
+ +# endif
+ +# else
+ + addq $VEC_SIZE, %rdi
+ + andq $-(4 * VEC_SIZE), %rdi
+ +# endif
+ +
+ + vpxorq %XMMZERO, %XMMZERO, %XMMZERO
+ +
+ + /* Compare 4 * VEC at a time forward. */
+ + .p2align 4
+ +L(loop_4x_vec):
+ + /* It would be possible to save some instructions using 4x VPCMP
+ + but bottleneck on port 5 makes it not woth it. */
+ + VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1
+ + /* xor will set bytes match esi to zero. */
+ + vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2
+ + vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3
+ + VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3
+ + /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */
+ + VPMINU %YMM2, %YMM3, %YMM3{%k1}{z}
+ + VPCMP $0, %YMM3, %YMMZERO, %k2
+ +# ifdef USE_AS_RAWMEMCHR
+ + subq $-(VEC_SIZE * 4), %rdi
+ + kortestd %k2, %k3
+ + jz L(loop_4x_vec)
+ +# else
+ + kortestd %k2, %k3
+ + jnz L(loop_4x_vec_end)
+ +
+ + subq $-(VEC_SIZE * 4), %rdi
+ +
+ + subq $(CHAR_PER_VEC * 4), %rdx
+ + ja L(loop_4x_vec)
+ +
+ + /* Fall through into less than 4 remaining vectors of length case.
+ + */
+ + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + addq $(VEC_SIZE * 3), %rdi
+ + .p2align 4
+ +L(last_4x_vec_or_less):
+ + /* Check if first VEC contained match. */
+ + testl %eax, %eax
+ + jnz L(first_vec_x1_check)
+ +
+ + /* If remaining length > CHAR_PER_VEC * 2. */
+ + addl $(CHAR_PER_VEC * 2), %edx
+ + jg L(last_4x_vec)
+ +
+ +L(last_2x_vec):
+ + /* If remaining length < CHAR_PER_VEC. */
+ + addl $CHAR_PER_VEC, %edx
+ + jle L(zero_end)
+ +
+ + /* Check VEC2 and compare any match with remaining length. */
+ + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + tzcntl %eax, %eax
+ + cmpl %eax, %edx
+ + jbe L(set_zero_end)
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ +L(zero_end):
+ + ret
+ +
+ +
+ + .p2align 4
+ +L(first_vec_x1_check):
+ + tzcntl %eax, %eax
+ + /* Adjust length. */
+ + subl $-(CHAR_PER_VEC * 4), %edx
+ + /* Check if match within remaining length. */
+ + cmpl %eax, %edx
+ + jbe L(set_zero_end)
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +L(set_zero_end):
+ + xorl %eax, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(loop_4x_vec_end):
+ +# endif
+ + /* rawmemchr will fall through into this if match was found in
+ + loop. */
+ +
+ + /* k1 has not of matches with VEC1. */
+ + kmovd %k1, %eax
+ +# ifdef USE_AS_WMEMCHR
+ + subl $((1 << CHAR_PER_VEC) - 1), %eax
+ +# else
+ + incl %eax
+ +# endif
+ + jnz L(last_vec_x1_return)
+ +
+ + VPCMP $0, %YMM2, %YMMZERO, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2_return)
+ +
+ + kmovd %k2, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x3_return)
+ +
+ + kmovd %k3, %eax
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_RAWMEMCHR
+ + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+ +# else
+ + leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x1_return):
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_RAWMEMCHR
+ +# ifdef USE_AS_WMEMCHR
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (%rdi, %rax, CHAR_SIZE), %rax
+ +# else
+ + addq %rdi, %rax
+ +# endif
+ +# else
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x2_return):
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_RAWMEMCHR
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
+ +# else
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x3_return):
+ + tzcntl %eax, %eax
+ +# ifdef USE_AS_RAWMEMCHR
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ +# else
+ + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
+ + leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax
+ +# endif
+ + ret
+ +
+ +
+ +# ifndef USE_AS_RAWMEMCHR
+ +L(last_4x_vec_or_less_cmpeq):
+ + VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + subq $-(VEC_SIZE * 4), %rdi
+ + /* Check first VEC regardless. */
+ + testl %eax, %eax
+ + jnz L(first_vec_x1_check)
+ +
+ + /* If remaining length <= CHAR_PER_VEC * 2. */
+ + addl $(CHAR_PER_VEC * 2), %edx
+ + jle L(last_2x_vec)
+ +
+ + .p2align 4
+ +L(last_4x_vec):
+ + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2)
+ +
+ +
+ + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + /* Create mask for possible matches within remaining length. */
+ +# ifdef USE_AS_WMEMCHR
+ + movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx
+ + bzhil %edx, %ecx, %ecx
+ +# else
+ + movq $-1, %rcx
+ + bzhiq %rdx, %rcx, %rcx
+ +# endif
+ + /* Test matches in data against length match. */
+ + andl %ecx, %eax
+ + jnz L(last_vec_x3)
+ +
+ + /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after
+ + remaining length was found to be > CHAR_PER_VEC * 2. */
+ + subl $CHAR_PER_VEC, %edx
+ + jbe L(zero_end2)
+ +
+ +
+ + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
+ + kmovd %k0, %eax
+ + /* Shift remaining length mask for last VEC. */
+ +# ifdef USE_AS_WMEMCHR
+ + shrl $CHAR_PER_VEC, %ecx
+ +# else
+ + shrq $CHAR_PER_VEC, %rcx
+ +# endif
+ + andl %ecx, %eax
+ + jz L(zero_end2)
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
+ +L(zero_end2):
+ + ret
+ +
+ +L(last_vec_x2):
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x3):
+ + tzcntl %eax, %eax
+ + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
+ + ret
+ +# endif
+ +
+ +END (MEMCHR)
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
+ new file mode 100644
+ index 0000000000..cf4eff5d4a
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef MEMCMP
+ +# define MEMCMP __memcmp_avx2_movbe_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "memcmp-avx2-movbe.S"
+ diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+ index 67fc575b59..87f9478eaf 100644
+ --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+ +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+ @@ -47,6 +47,10 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ # define VEC_SIZE 32
+ # define VEC_MASK ((1 << VEC_SIZE) - 1)
+
+ @@ -55,7 +59,7 @@
+ memcmp has to use UNSIGNED comparison for elemnts.
+ */
+
+ - .section .text.avx,"ax",@progbits
+ + .section SECTION(.text),"ax",@progbits
+ ENTRY (MEMCMP)
+ # ifdef USE_AS_WMEMCMP
+ shl $2, %RDX_LP
+ @@ -123,8 +127,8 @@ ENTRY (MEMCMP)
+ vptest %ymm0, %ymm5
+ jnc L(4x_vec_end)
+ xorl %eax, %eax
+ - VZEROUPPER
+ - ret
+ +L(return_vzeroupper):
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(last_2x_vec):
+ @@ -144,8 +148,7 @@ L(last_vec):
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec):
+ @@ -164,8 +167,7 @@ L(wmemcmp_return):
+ movzbl (%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ # ifdef USE_AS_WMEMCMP
+ .p2align 4
+ @@ -367,8 +369,7 @@ L(last_4x_vec):
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(4x_vec_end):
+ @@ -394,8 +395,7 @@ L(4x_vec_end):
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x1):
+ @@ -410,8 +410,7 @@ L(first_vec_x1):
+ movzbl VEC_SIZE(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x2):
+ @@ -426,7 +425,6 @@ L(first_vec_x2):
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ END (MEMCMP)
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
+ new file mode 100644
+ index 0000000000..9c093972e1
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
+ @@ -0,0 +1,440 @@
+ +/* memcmp/wmemcmp optimized with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#if IS_IN (libc)
+ +
+ +/* memcmp/wmemcmp is implemented as:
+ + 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
+ + to avoid branches.
+ + 2. Use overlapping compare to avoid branch.
+ + 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
+ + bytes for wmemcmp.
+ + 4. If size is 8 * VEC_SIZE or less, unroll the loop.
+ + 5. Compare 4 * VEC_SIZE at a time with the aligned first memory
+ + area.
+ + 6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
+ + 7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
+ + 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */
+ +
+ +# include <sysdep.h>
+ +
+ +# ifndef MEMCMP
+ +# define MEMCMP __memcmp_evex_movbe
+ +# endif
+ +
+ +# define VMOVU vmovdqu64
+ +
+ +# ifdef USE_AS_WMEMCMP
+ +# define VPCMPEQ vpcmpeqd
+ +# else
+ +# define VPCMPEQ vpcmpeqb
+ +# endif
+ +
+ +# define XMM1 xmm17
+ +# define XMM2 xmm18
+ +# define YMM1 ymm17
+ +# define YMM2 ymm18
+ +# define YMM3 ymm19
+ +# define YMM4 ymm20
+ +# define YMM5 ymm21
+ +# define YMM6 ymm22
+ +
+ +# define VEC_SIZE 32
+ +# ifdef USE_AS_WMEMCMP
+ +# define VEC_MASK 0xff
+ +# define XMM_MASK 0xf
+ +# else
+ +# define VEC_MASK 0xffffffff
+ +# define XMM_MASK 0xffff
+ +# endif
+ +
+ +/* Warning!
+ + wmemcmp has to use SIGNED comparison for elements.
+ + memcmp has to use UNSIGNED comparison for elemnts.
+ +*/
+ +
+ + .section .text.evex,"ax",@progbits
+ +ENTRY (MEMCMP)
+ +# ifdef USE_AS_WMEMCMP
+ + shl $2, %RDX_LP
+ +# elif defined __ILP32__
+ + /* Clear the upper 32 bits. */
+ + movl %edx, %edx
+ +# endif
+ + cmp $VEC_SIZE, %RDX_LP
+ + jb L(less_vec)
+ +
+ + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k1
+ + kmovd %k1, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + cmpq $(VEC_SIZE * 2), %rdx
+ + jbe L(last_vec)
+ +
+ + /* More than 2 * VEC. */
+ + cmpq $(VEC_SIZE * 8), %rdx
+ + ja L(more_8x_vec)
+ + cmpq $(VEC_SIZE * 4), %rdx
+ + jb L(last_4x_vec)
+ +
+ + /* From 4 * VEC to 8 * VEC, inclusively. */
+ + VMOVU (%rsi), %YMM1
+ + VPCMPEQ (%rdi), %YMM1, %k1
+ +
+ + VMOVU VEC_SIZE(%rsi), %YMM2
+ + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
+ +
+ + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
+ + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
+ +
+ + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
+ + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
+ +
+ + kandd %k1, %k2, %k5
+ + kandd %k3, %k4, %k6
+ + kandd %k5, %k6, %k6
+ +
+ + kmovd %k6, %eax
+ + cmpl $VEC_MASK, %eax
+ + jne L(4x_vec_end)
+ +
+ + leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi
+ + leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi
+ + VMOVU (%rsi), %YMM1
+ + VPCMPEQ (%rdi), %YMM1, %k1
+ +
+ + VMOVU VEC_SIZE(%rsi), %YMM2
+ + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
+ + kandd %k1, %k2, %k5
+ +
+ + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
+ + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
+ + kandd %k3, %k5, %k5
+ +
+ + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
+ + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
+ + kandd %k4, %k5, %k5
+ +
+ + kmovd %k5, %eax
+ + cmpl $VEC_MASK, %eax
+ + jne L(4x_vec_end)
+ + xorl %eax, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(last_2x_vec):
+ + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ +L(last_vec):
+ + /* Use overlapping loads to avoid branches. */
+ + leaq -VEC_SIZE(%rdi, %rdx), %rdi
+ + leaq -VEC_SIZE(%rsi, %rdx), %rsi
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec):
+ + /* A byte or int32 is different within 16 or 32 bytes. */
+ + tzcntl %eax, %ecx
+ +# ifdef USE_AS_WMEMCMP
+ + xorl %eax, %eax
+ + movl (%rdi, %rcx, 4), %edx
+ + cmpl (%rsi, %rcx, 4), %edx
+ +L(wmemcmp_return):
+ + setl %al
+ + negl %eax
+ + orl $1, %eax
+ +# else
+ + movzbl (%rdi, %rcx), %eax
+ + movzbl (%rsi, %rcx), %edx
+ + sub %edx, %eax
+ +# endif
+ + ret
+ +
+ +# ifdef USE_AS_WMEMCMP
+ + .p2align 4
+ +L(4):
+ + xorl %eax, %eax
+ + movl (%rdi), %edx
+ + cmpl (%rsi), %edx
+ + jne L(wmemcmp_return)
+ + ret
+ +# else
+ + .p2align 4
+ +L(between_4_7):
+ + /* Load as big endian with overlapping movbe to avoid branches. */
+ + movbe (%rdi), %eax
+ + movbe (%rsi), %ecx
+ + shlq $32, %rax
+ + shlq $32, %rcx
+ + movbe -4(%rdi, %rdx), %edi
+ + movbe -4(%rsi, %rdx), %esi
+ + orq %rdi, %rax
+ + orq %rsi, %rcx
+ + subq %rcx, %rax
+ + je L(exit)
+ + sbbl %eax, %eax
+ + orl $1, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(exit):
+ + ret
+ +
+ + .p2align 4
+ +L(between_2_3):
+ + /* Load as big endian to avoid branches. */
+ + movzwl (%rdi), %eax
+ + movzwl (%rsi), %ecx
+ + shll $8, %eax
+ + shll $8, %ecx
+ + bswap %eax
+ + bswap %ecx
+ + movb -1(%rdi, %rdx), %al
+ + movb -1(%rsi, %rdx), %cl
+ + /* Subtraction is okay because the upper 8 bits are zero. */
+ + subl %ecx, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(1):
+ + movzbl (%rdi), %eax
+ + movzbl (%rsi), %ecx
+ + subl %ecx, %eax
+ + ret
+ +# endif
+ +
+ + .p2align 4
+ +L(zero):
+ + xorl %eax, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(less_vec):
+ +# ifdef USE_AS_WMEMCMP
+ + /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */
+ + cmpb $4, %dl
+ + je L(4)
+ + jb L(zero)
+ +# else
+ + cmpb $1, %dl
+ + je L(1)
+ + jb L(zero)
+ + cmpb $4, %dl
+ + jb L(between_2_3)
+ + cmpb $8, %dl
+ + jb L(between_4_7)
+ +# endif
+ + cmpb $16, %dl
+ + jae L(between_16_31)
+ + /* It is between 8 and 15 bytes. */
+ + vmovq (%rdi), %XMM1
+ + vmovq (%rsi), %XMM2
+ + VPCMPEQ %XMM1, %XMM2, %k2
+ + kmovw %k2, %eax
+ + subl $XMM_MASK, %eax
+ + jnz L(first_vec)
+ + /* Use overlapping loads to avoid branches. */
+ + leaq -8(%rdi, %rdx), %rdi
+ + leaq -8(%rsi, %rdx), %rsi
+ + vmovq (%rdi), %XMM1
+ + vmovq (%rsi), %XMM2
+ + VPCMPEQ %XMM1, %XMM2, %k2
+ + kmovw %k2, %eax
+ + subl $XMM_MASK, %eax
+ + jnz L(first_vec)
+ + ret
+ +
+ + .p2align 4
+ +L(between_16_31):
+ + /* From 16 to 31 bytes. No branch when size == 16. */
+ + VMOVU (%rsi), %XMM2
+ + VPCMPEQ (%rdi), %XMM2, %k2
+ + kmovw %k2, %eax
+ + subl $XMM_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + /* Use overlapping loads to avoid branches. */
+ + leaq -16(%rdi, %rdx), %rdi
+ + leaq -16(%rsi, %rdx), %rsi
+ + VMOVU (%rsi), %XMM2
+ + VPCMPEQ (%rdi), %XMM2, %k2
+ + kmovw %k2, %eax
+ + subl $XMM_MASK, %eax
+ + jnz L(first_vec)
+ + ret
+ +
+ + .p2align 4
+ +L(more_8x_vec):
+ + /* More than 8 * VEC. Check the first VEC. */
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + /* Align the first memory area for aligned loads in the loop.
+ + Compute how much the first memory area is misaligned. */
+ + movq %rdi, %rcx
+ + andl $(VEC_SIZE - 1), %ecx
+ + /* Get the negative of offset for alignment. */
+ + subq $VEC_SIZE, %rcx
+ + /* Adjust the second memory area. */
+ + subq %rcx, %rsi
+ + /* Adjust the first memory area which should be aligned now. */
+ + subq %rcx, %rdi
+ + /* Adjust length. */
+ + addq %rcx, %rdx
+ +
+ +L(loop_4x_vec):
+ + /* Compare 4 * VEC at a time forward. */
+ + VMOVU (%rsi), %YMM1
+ + VPCMPEQ (%rdi), %YMM1, %k1
+ +
+ + VMOVU VEC_SIZE(%rsi), %YMM2
+ + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
+ + kandd %k2, %k1, %k5
+ +
+ + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
+ + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
+ + kandd %k3, %k5, %k5
+ +
+ + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
+ + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
+ + kandd %k4, %k5, %k5
+ +
+ + kmovd %k5, %eax
+ + cmpl $VEC_MASK, %eax
+ + jne L(4x_vec_end)
+ +
+ + addq $(VEC_SIZE * 4), %rdi
+ + addq $(VEC_SIZE * 4), %rsi
+ +
+ + subq $(VEC_SIZE * 4), %rdx
+ + cmpq $(VEC_SIZE * 4), %rdx
+ + jae L(loop_4x_vec)
+ +
+ + /* Less than 4 * VEC. */
+ + cmpq $VEC_SIZE, %rdx
+ + jbe L(last_vec)
+ + cmpq $(VEC_SIZE * 2), %rdx
+ + jbe L(last_2x_vec)
+ +
+ +L(last_4x_vec):
+ + /* From 2 * VEC to 4 * VEC. */
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + addq $VEC_SIZE, %rdi
+ + addq $VEC_SIZE, %rsi
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + /* Use overlapping loads to avoid branches. */
+ + leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi
+ + leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ +
+ + addq $VEC_SIZE, %rdi
+ + addq $VEC_SIZE, %rsi
+ + VMOVU (%rsi), %YMM2
+ + VPCMPEQ (%rdi), %YMM2, %k2
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ + ret
+ +
+ + .p2align 4
+ +L(4x_vec_end):
+ + kmovd %k1, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec)
+ + kmovd %k2, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec_x1)
+ + kmovd %k3, %eax
+ + subl $VEC_MASK, %eax
+ + jnz L(first_vec_x2)
+ + kmovd %k4, %eax
+ + subl $VEC_MASK, %eax
+ + tzcntl %eax, %ecx
+ +# ifdef USE_AS_WMEMCMP
+ + xorl %eax, %eax
+ + movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx
+ + cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx
+ + jmp L(wmemcmp_return)
+ +# else
+ + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
+ + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ + sub %edx, %eax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x1):
+ + tzcntl %eax, %ecx
+ +# ifdef USE_AS_WMEMCMP
+ + xorl %eax, %eax
+ + movl VEC_SIZE(%rdi, %rcx, 4), %edx
+ + cmpl VEC_SIZE(%rsi, %rcx, 4), %edx
+ + jmp L(wmemcmp_return)
+ +# else
+ + movzbl VEC_SIZE(%rdi, %rcx), %eax
+ + movzbl VEC_SIZE(%rsi, %rcx), %edx
+ + sub %edx, %eax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x2):
+ + tzcntl %eax, %ecx
+ +# ifdef USE_AS_WMEMCMP
+ + xorl %eax, %eax
+ + movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx
+ + cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx
+ + jmp L(wmemcmp_return)
+ +# else
+ + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
+ + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ + sub %edx, %eax
+ +# endif
+ + ret
+ +END (MEMCMP)
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
+ new file mode 100644
+ index 0000000000..1ec1962e86
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
+ @@ -0,0 +1,17 @@
+ +#if IS_IN (libc)
+ +# define VEC_SIZE 32
+ +# define VEC(i) ymm##i
+ +# define VMOVNT vmovntdq
+ +# define VMOVU vmovdqu
+ +# define VMOVA vmovdqa
+ +
+ +# define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +# define VZEROUPPER_RETURN jmp L(return)
+ +
+ +# define SECTION(p) p##.avx.rtm
+ +# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm
+ +
+ +# include "memmove-vec-unaligned-erms.S"
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+ index aac1515cf6..7dad1ad74c 100644
+ --- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+ +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+ @@ -1,11 +1,25 @@
+ #if IS_IN (libc)
+ # define VEC_SIZE 64
+ -# define VEC(i) zmm##i
+ +# define XMM0 xmm16
+ +# define XMM1 xmm17
+ +# define YMM0 ymm16
+ +# define YMM1 ymm17
+ +# define VEC0 zmm16
+ +# define VEC1 zmm17
+ +# define VEC2 zmm18
+ +# define VEC3 zmm19
+ +# define VEC4 zmm20
+ +# define VEC5 zmm21
+ +# define VEC6 zmm22
+ +# define VEC7 zmm23
+ +# define VEC8 zmm24
+ +# define VEC(i) VEC##i
+ # define VMOVNT vmovntdq
+ # define VMOVU vmovdqu64
+ # define VMOVA vmovdqa64
+ +# define VZEROUPPER
+
+ -# define SECTION(p) p##.avx512
+ +# define SECTION(p) p##.evex512
+ # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
+
+ # include "memmove-vec-unaligned-erms.S"
+ diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
+ new file mode 100644
+ index 0000000000..b879007e89
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
+ @@ -0,0 +1,26 @@
+ +#if IS_IN (libc)
+ +# define VEC_SIZE 32
+ +# define XMM0 xmm16
+ +# define XMM1 xmm17
+ +# define YMM0 ymm16
+ +# define YMM1 ymm17
+ +# define VEC0 ymm16
+ +# define VEC1 ymm17
+ +# define VEC2 ymm18
+ +# define VEC3 ymm19
+ +# define VEC4 ymm20
+ +# define VEC5 ymm21
+ +# define VEC6 ymm22
+ +# define VEC7 ymm23
+ +# define VEC8 ymm24
+ +# define VEC(i) VEC##i
+ +# define VMOVNT vmovntdq
+ +# define VMOVU vmovdqu64
+ +# define VMOVA vmovdqa64
+ +# define VZEROUPPER
+ +
+ +# define SECTION(p) p##.evex
+ +# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
+ +
+ +# include "memmove-vec-unaligned-erms.S"
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+ index c763b7d871..d13d23d6ce 100644
+ --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+ +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+ @@ -48,6 +48,14 @@
+ # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
+ #endif
+
+ +#ifndef XMM0
+ +# define XMM0 xmm0
+ +#endif
+ +
+ +#ifndef YMM0
+ +# define YMM0 ymm0
+ +#endif
+ +
+ #ifndef VZEROUPPER
+ # if VEC_SIZE > 16
+ # define VZEROUPPER vzeroupper
+ @@ -67,6 +75,13 @@
+ # define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16))
+ #endif
+
+ +/* Avoid short distance rep movsb only with non-SSE vector. */
+ +#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
+ +# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
+ +#else
+ +# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
+ +#endif
+ +
+ #ifndef PREFETCH
+ # define PREFETCH(addr) prefetcht0 addr
+ #endif
+ @@ -143,11 +158,12 @@ L(last_2x_vec):
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+ - VZEROUPPER
+ #if !defined USE_MULTIARCH || !IS_IN (libc)
+ L(nop):
+ -#endif
+ ret
+ +#else
+ + VZEROUPPER_RETURN
+ +#endif
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ END (MEMMOVE_SYMBOL (__memmove, unaligned))
+
+ @@ -240,11 +256,14 @@ L(last_2x_vec):
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+ L(return):
+ - VZEROUPPER
+ +#if VEC_SIZE > 16
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+ +#else
+ ret
+ +#endif
+
+ L(movsb):
+ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+ jae L(more_8x_vec)
+ cmpq %rsi, %rdi
+ jb 1f
+ @@ -257,7 +276,21 @@ L(movsb):
+ # error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE!
+ # endif
+ jb L(more_8x_vec_backward)
+ +# if AVOID_SHORT_DISTANCE_REP_MOVSB
+ + movq %rdi, %rcx
+ + subq %rsi, %rcx
+ + jmp 2f
+ +# endif
+ 1:
+ +# if AVOID_SHORT_DISTANCE_REP_MOVSB
+ + movq %rsi, %rcx
+ + subq %rdi, %rcx
+ +2:
+ +/* Avoid "rep movsb" if RCX, the distance between source and destination,
+ + is N*4GB + [1..63] with N >= 0. */
+ + cmpl $63, %ecx
+ + jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
+ +# endif
+ mov %RDX_LP, %RCX_LP
+ rep movsb
+ L(nop):
+ @@ -291,21 +324,20 @@ L(less_vec):
+ #if VEC_SIZE > 32
+ L(between_32_63):
+ /* From 32 to 63. No branch when size == 32. */
+ - vmovdqu (%rsi), %ymm0
+ - vmovdqu -32(%rsi,%rdx), %ymm1
+ - vmovdqu %ymm0, (%rdi)
+ - vmovdqu %ymm1, -32(%rdi,%rdx)
+ - VZEROUPPER
+ - ret
+ + VMOVU (%rsi), %YMM0
+ + VMOVU -32(%rsi,%rdx), %YMM1
+ + VMOVU %YMM0, (%rdi)
+ + VMOVU %YMM1, -32(%rdi,%rdx)
+ + VZEROUPPER_RETURN
+ #endif
+ #if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+ L(between_16_31):
+ - vmovdqu (%rsi), %xmm0
+ - vmovdqu -16(%rsi,%rdx), %xmm1
+ - vmovdqu %xmm0, (%rdi)
+ - vmovdqu %xmm1, -16(%rdi,%rdx)
+ - ret
+ + VMOVU (%rsi), %XMM0
+ + VMOVU -16(%rsi,%rdx), %XMM1
+ + VMOVU %XMM0, (%rdi)
+ + VMOVU %XMM1, -16(%rdi,%rdx)
+ + VZEROUPPER_RETURN
+ #endif
+ L(between_8_15):
+ /* From 8 to 15. No branch when size == 8. */
+ @@ -358,8 +390,7 @@ L(more_2x_vec):
+ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
+ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ L(last_4x_vec):
+ /* Copy from 2 * VEC to 4 * VEC. */
+ VMOVU (%rsi), %VEC(0)
+ @@ -370,8 +401,7 @@ L(last_4x_vec):
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+ VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ L(more_8x_vec):
+ cmpq %rsi, %rdi
+ @@ -402,7 +432,7 @@ L(more_8x_vec):
+ addq %r8, %rdx
+ #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ /* Check non-temporal store threshold. */
+ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+ ja L(large_forward)
+ #endif
+ L(loop_4x_vec_forward):
+ @@ -427,8 +457,7 @@ L(loop_4x_vec_forward):
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ L(more_8x_vec_backward):
+ /* Load the first 4 * VEC and last VEC to support overlapping
+ @@ -454,7 +483,7 @@ L(more_8x_vec_backward):
+ subq %r8, %rdx
+ #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ /* Check non-temporal store threshold. */
+ - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx
+ + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+ ja L(large_backward)
+ #endif
+ L(loop_4x_vec_backward):
+ @@ -479,8 +508,7 @@ L(loop_4x_vec_backward):
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ L(large_forward):
+ @@ -515,8 +543,7 @@ L(loop_large_forward):
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ L(large_backward):
+ /* Don't use non-temporal store if there is overlap between
+ @@ -550,8 +577,7 @@ L(loop_large_backward):
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ #endif
+ END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+
+ diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..cea2d2a72d
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef MEMRCHR
+ +# define MEMRCHR __memrchr_avx2_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "memrchr-avx2.S"
+ diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
+ index f5437b54de..c8d54c08d6 100644
+ --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
+ +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
+ @@ -20,14 +20,22 @@
+
+ # include <sysdep.h>
+
+ +# ifndef MEMRCHR
+ +# define MEMRCHR __memrchr_avx2
+ +# endif
+ +
+ # ifndef VZEROUPPER
+ # define VZEROUPPER vzeroupper
+ # endif
+
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ # define VEC_SIZE 32
+
+ - .section .text.avx,"ax",@progbits
+ -ENTRY (__memrchr_avx2)
+ + .section SECTION(.text),"ax",@progbits
+ +ENTRY (MEMRCHR)
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+ vpbroadcastb %xmm0, %ymm0
+ @@ -134,8 +142,8 @@ L(loop_4x_vec):
+ vpmovmskb %ymm1, %eax
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ +L(return_vzeroupper):
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(last_4x_vec_or_less):
+ @@ -169,8 +177,7 @@ L(last_4x_vec_or_less):
+ addq %rax, %rdx
+ jl L(zero)
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_2x_vec):
+ @@ -191,31 +198,27 @@ L(last_2x_vec):
+ jl L(zero)
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x0):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x1):
+ bsrl %eax, %eax
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x2):
+ bsrl %eax, %eax
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x3):
+ @@ -232,8 +235,7 @@ L(last_vec_x1_check):
+ jl L(zero)
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x3_check):
+ @@ -243,12 +245,14 @@ L(last_vec_x3_check):
+ jl L(zero)
+ addl $(VEC_SIZE * 3), %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(zero):
+ - VZEROUPPER
+ + xorl %eax, %eax
+ + VZEROUPPER_RETURN
+ +
+ + .p2align 4
+ L(null):
+ xorl %eax, %eax
+ ret
+ @@ -273,8 +277,7 @@ L(last_vec_or_less_aligned):
+
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_or_less):
+ @@ -315,8 +318,7 @@ L(last_vec_or_less):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_2x_aligned):
+ @@ -353,7 +355,6 @@ L(last_vec_2x_aligned):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+ - VZEROUPPER
+ - ret
+ -END (__memrchr_avx2)
+ + VZEROUPPER_RETURN
+ +END (MEMRCHR)
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S
+ new file mode 100644
+ index 0000000000..16bf8e02b1
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S
+ @@ -0,0 +1,337 @@
+ +/* memrchr optimized with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#if IS_IN (libc)
+ +
+ +# include <sysdep.h>
+ +
+ +# define VMOVA vmovdqa64
+ +
+ +# define YMMMATCH ymm16
+ +
+ +# define VEC_SIZE 32
+ +
+ + .section .text.evex,"ax",@progbits
+ +ENTRY (__memrchr_evex)
+ + /* Broadcast CHAR to YMMMATCH. */
+ + vpbroadcastb %esi, %YMMMATCH
+ +
+ + sub $VEC_SIZE, %RDX_LP
+ + jbe L(last_vec_or_less)
+ +
+ + add %RDX_LP, %RDI_LP
+ +
+ + /* Check the last VEC_SIZE bytes. */
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x0)
+ +
+ + subq $(VEC_SIZE * 4), %rdi
+ + movl %edi, %ecx
+ + andl $(VEC_SIZE - 1), %ecx
+ + jz L(aligned_more)
+ +
+ + /* Align data for aligned loads in the loop. */
+ + addq $VEC_SIZE, %rdi
+ + addq $VEC_SIZE, %rdx
+ + andq $-VEC_SIZE, %rdi
+ + subq %rcx, %rdx
+ +
+ + .p2align 4
+ +L(aligned_more):
+ + subq $(VEC_SIZE * 4), %rdx
+ + jbe L(last_4x_vec_or_less)
+ +
+ + /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ + since data is only aligned to VEC_SIZE. */
+ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x3)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
+ + kmovd %k2, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2)
+ +
+ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
+ + kmovd %k3, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x1)
+ +
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k4
+ + kmovd %k4, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x0)
+ +
+ + /* Align data to 4 * VEC_SIZE for loop with fewer branches.
+ + There are some overlaps with above if data isn't aligned
+ + to 4 * VEC_SIZE. */
+ + movl %edi, %ecx
+ + andl $(VEC_SIZE * 4 - 1), %ecx
+ + jz L(loop_4x_vec)
+ +
+ + addq $(VEC_SIZE * 4), %rdi
+ + addq $(VEC_SIZE * 4), %rdx
+ + andq $-(VEC_SIZE * 4), %rdi
+ + subq %rcx, %rdx
+ +
+ + .p2align 4
+ +L(loop_4x_vec):
+ + /* Compare 4 * VEC at a time forward. */
+ + subq $(VEC_SIZE * 4), %rdi
+ + subq $(VEC_SIZE * 4), %rdx
+ + jbe L(last_4x_vec_or_less)
+ +
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2
+ + kord %k1, %k2, %k5
+ + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3
+ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4
+ +
+ + kord %k3, %k4, %k6
+ + kortestd %k5, %k6
+ + jz L(loop_4x_vec)
+ +
+ + /* There is a match. */
+ + kmovd %k4, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x3)
+ +
+ + kmovd %k3, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2)
+ +
+ + kmovd %k2, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x1)
+ +
+ + kmovd %k1, %eax
+ + bsrl %eax, %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_4x_vec_or_less):
+ + addl $(VEC_SIZE * 4), %edx
+ + cmpl $(VEC_SIZE * 2), %edx
+ + jbe L(last_2x_vec)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x3)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
+ + kmovd %k2, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x2)
+ +
+ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
+ + kmovd %k3, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x1_check)
+ + cmpl $(VEC_SIZE * 3), %edx
+ + jbe L(zero)
+ +
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k4
+ + kmovd %k4, %eax
+ + testl %eax, %eax
+ + jz L(zero)
+ + bsrl %eax, %eax
+ + subq $(VEC_SIZE * 4), %rdx
+ + addq %rax, %rdx
+ + jl L(zero)
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_2x_vec):
+ + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jnz L(last_vec_x3_check)
+ + cmpl $VEC_SIZE, %edx
+ + jbe L(zero)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jz L(zero)
+ + bsrl %eax, %eax
+ + subq $(VEC_SIZE * 2), %rdx
+ + addq %rax, %rdx
+ + jl L(zero)
+ + addl $(VEC_SIZE * 2), %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x0):
+ + bsrl %eax, %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x1):
+ + bsrl %eax, %eax
+ + addl $VEC_SIZE, %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x2):
+ + bsrl %eax, %eax
+ + addl $(VEC_SIZE * 2), %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x3):
+ + bsrl %eax, %eax
+ + addl $(VEC_SIZE * 3), %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x1_check):
+ + bsrl %eax, %eax
+ + subq $(VEC_SIZE * 3), %rdx
+ + addq %rax, %rdx
+ + jl L(zero)
+ + addl $VEC_SIZE, %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_x3_check):
+ + bsrl %eax, %eax
+ + subq $VEC_SIZE, %rdx
+ + addq %rax, %rdx
+ + jl L(zero)
+ + addl $(VEC_SIZE * 3), %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(zero):
+ + xorl %eax, %eax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_or_less_aligned):
+ + movl %edx, %ecx
+ +
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ +
+ + movl $1, %edx
+ + /* Support rdx << 32. */
+ + salq %cl, %rdx
+ + subq $1, %rdx
+ +
+ + kmovd %k1, %eax
+ +
+ + /* Remove the trailing bytes. */
+ + andl %edx, %eax
+ + testl %eax, %eax
+ + jz L(zero)
+ +
+ + bsrl %eax, %eax
+ + addq %rdi, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_or_less):
+ + addl $VEC_SIZE, %edx
+ +
+ + /* Check for zero length. */
+ + testl %edx, %edx
+ + jz L(zero)
+ +
+ + movl %edi, %ecx
+ + andl $(VEC_SIZE - 1), %ecx
+ + jz L(last_vec_or_less_aligned)
+ +
+ + movl %ecx, %esi
+ + movl %ecx, %r8d
+ + addl %edx, %esi
+ + andq $-VEC_SIZE, %rdi
+ +
+ + subl $VEC_SIZE, %esi
+ + ja L(last_vec_2x_aligned)
+ +
+ + /* Check the last VEC. */
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ + kmovd %k1, %eax
+ +
+ + /* Remove the leading and trailing bytes. */
+ + sarl %cl, %eax
+ + movl %edx, %ecx
+ +
+ + movl $1, %edx
+ + sall %cl, %edx
+ + subl $1, %edx
+ +
+ + andl %edx, %eax
+ + testl %eax, %eax
+ + jz L(zero)
+ +
+ + bsrl %eax, %eax
+ + addq %rdi, %rax
+ + addq %r8, %rax
+ + ret
+ +
+ + .p2align 4
+ +L(last_vec_2x_aligned):
+ + movl %esi, %ecx
+ +
+ + /* Check the last VEC. */
+ + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1
+ +
+ + movl $1, %edx
+ + sall %cl, %edx
+ + subl $1, %edx
+ +
+ + kmovd %k1, %eax
+ +
+ + /* Remove the trailing bytes. */
+ + andl %edx, %eax
+ +
+ + testl %eax, %eax
+ + jnz L(last_vec_x1)
+ +
+ + /* Check the second last VEC. */
+ + vpcmpb $0, (%rdi), %YMMMATCH, %k1
+ +
+ + movl %r8d, %ecx
+ +
+ + kmovd %k1, %eax
+ +
+ + /* Remove the leading bytes. Must use unsigned right shift for
+ + bsrl below. */
+ + shrl %cl, %eax
+ + testl %eax, %eax
+ + jz L(zero)
+ +
+ + bsrl %eax, %eax
+ + addq %rdi, %rax
+ + addq %r8, %rax
+ + ret
+ +END (__memrchr_evex)
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
+ new file mode 100644
+ index 0000000000..8ac3e479bb
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
+ @@ -0,0 +1,10 @@
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
+ +#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
+ +
+ +#include "memset-avx2-unaligned-erms.S"
+ diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+ index 7ab3d89849..ae0860f36a 100644
+ --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+ +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+ @@ -14,9 +14,15 @@
+ movq r, %rax; \
+ vpbroadcastd %xmm0, %ymm0
+
+ -# define SECTION(p) p##.avx
+ -# define MEMSET_SYMBOL(p,s) p##_avx2_##s
+ -# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +# ifndef MEMSET_SYMBOL
+ +# define MEMSET_SYMBOL(p,s) p##_avx2_##s
+ +# endif
+ +# ifndef WMEMSET_SYMBOL
+ +# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
+ +# endif
+
+ # include "memset-vec-unaligned-erms.S"
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+ index 0783979ca5..22e7b187c8 100644
+ --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+ +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+ @@ -1,22 +1,22 @@
+ #if IS_IN (libc)
+ # define VEC_SIZE 64
+ -# define VEC(i) zmm##i
+ +# define XMM0 xmm16
+ +# define YMM0 ymm16
+ +# define VEC0 zmm16
+ +# define VEC(i) VEC##i
+ # define VMOVU vmovdqu64
+ # define VMOVA vmovdqa64
+ +# define VZEROUPPER
+
+ # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ - vmovd d, %xmm0; \
+ movq r, %rax; \
+ - vpbroadcastb %xmm0, %xmm0; \
+ - vpbroadcastq %xmm0, %zmm0
+ + vpbroadcastb d, %VEC0
+
+ # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ - vmovd d, %xmm0; \
+ movq r, %rax; \
+ - vpbroadcastd %xmm0, %xmm0; \
+ - vpbroadcastq %xmm0, %zmm0
+ + vpbroadcastd d, %VEC0
+
+ -# define SECTION(p) p##.avx512
+ +# define SECTION(p) p##.evex512
+ # define MEMSET_SYMBOL(p,s) p##_avx512_##s
+ # define WMEMSET_SYMBOL(p,s) p##_avx512_##s
+
+ diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+ new file mode 100644
+ index 0000000000..ae0a4d6e46
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+ @@ -0,0 +1,24 @@
+ +#if IS_IN (libc)
+ +# define VEC_SIZE 32
+ +# define XMM0 xmm16
+ +# define YMM0 ymm16
+ +# define VEC0 ymm16
+ +# define VEC(i) VEC##i
+ +# define VMOVU vmovdqu64
+ +# define VMOVA vmovdqa64
+ +# define VZEROUPPER
+ +
+ +# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ + movq r, %rax; \
+ + vpbroadcastb d, %VEC0
+ +
+ +# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+ + movq r, %rax; \
+ + vpbroadcastd d, %VEC0
+ +
+ +# define SECTION(p) p##.evex
+ +# define MEMSET_SYMBOL(p,s) p##_evex_##s
+ +# define WMEMSET_SYMBOL(p,s) p##_evex_##s
+ +
+ +# include "memset-vec-unaligned-erms.S"
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+ index af2299709c..16bed6ec11 100644
+ --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+ +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+ @@ -34,20 +34,25 @@
+ # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
+ #endif
+
+ +#ifndef XMM0
+ +# define XMM0 xmm0
+ +#endif
+ +
+ +#ifndef YMM0
+ +# define YMM0 ymm0
+ +#endif
+ +
+ #ifndef VZEROUPPER
+ # if VEC_SIZE > 16
+ # define VZEROUPPER vzeroupper
+ +# define VZEROUPPER_SHORT_RETURN vzeroupper; ret
+ # else
+ # define VZEROUPPER
+ # endif
+ #endif
+
+ #ifndef VZEROUPPER_SHORT_RETURN
+ -# if VEC_SIZE > 16
+ -# define VZEROUPPER_SHORT_RETURN vzeroupper
+ -# else
+ -# define VZEROUPPER_SHORT_RETURN rep
+ -# endif
+ +# define VZEROUPPER_SHORT_RETURN rep; ret
+ #endif
+
+ #ifndef MOVQ
+ @@ -77,7 +82,7 @@
+ ENTRY (__bzero)
+ mov %RDI_LP, %RAX_LP /* Set return value. */
+ mov %RSI_LP, %RDX_LP /* Set n. */
+ - pxor %xmm0, %xmm0
+ + pxor %XMM0, %XMM0
+ jmp L(entry_from_bzero)
+ END (__bzero)
+ weak_alias (__bzero, bzero)
+ @@ -119,8 +124,7 @@ L(entry_from_bzero):
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ END (MEMSET_SYMBOL (__memset, unaligned))
+
+ @@ -143,14 +147,12 @@ ENTRY (__memset_erms)
+ ENTRY (MEMSET_SYMBOL (__memset, erms))
+ # endif
+ L(stosb):
+ - /* Issue vzeroupper before rep stosb. */
+ - VZEROUPPER
+ mov %RDX_LP, %RCX_LP
+ movzbl %sil, %eax
+ mov %RDI_LP, %RDX_LP
+ rep stosb
+ mov %RDX_LP, %RAX_LP
+ - ret
+ + VZEROUPPER_RETURN
+ # if VEC_SIZE == 16
+ END (__memset_erms)
+ # else
+ @@ -177,8 +179,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ L(stosb_more_2x_vec):
+ cmpq $REP_STOSB_THRESHOLD, %rdx
+ @@ -192,8 +193,11 @@ L(more_2x_vec):
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+ L(return):
+ - VZEROUPPER
+ +#if VEC_SIZE > 16
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+ +#else
+ ret
+ +#endif
+
+ L(loop_start):
+ leaq (VEC_SIZE * 4)(%rdi), %rcx
+ @@ -219,7 +223,6 @@ L(loop):
+ cmpq %rcx, %rdx
+ jne L(loop)
+ VZEROUPPER_SHORT_RETURN
+ - ret
+ L(less_vec):
+ /* Less than 1 VEC. */
+ # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+ @@ -233,7 +236,7 @@ L(less_vec):
+ cmpb $16, %dl
+ jae L(between_16_31)
+ # endif
+ - MOVQ %xmm0, %rcx
+ + MOVQ %XMM0, %rcx
+ cmpb $8, %dl
+ jae L(between_8_15)
+ cmpb $4, %dl
+ @@ -243,40 +246,34 @@ L(less_vec):
+ jb 1f
+ movb %cl, (%rdi)
+ 1:
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ # if VEC_SIZE > 32
+ /* From 32 to 63. No branch when size == 32. */
+ L(between_32_63):
+ - vmovdqu %ymm0, -32(%rdi,%rdx)
+ - vmovdqu %ymm0, (%rdi)
+ - VZEROUPPER
+ - ret
+ + VMOVU %YMM0, -32(%rdi,%rdx)
+ + VMOVU %YMM0, (%rdi)
+ + VZEROUPPER_RETURN
+ # endif
+ # if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+ L(between_16_31):
+ - vmovdqu %xmm0, -16(%rdi,%rdx)
+ - vmovdqu %xmm0, (%rdi)
+ - VZEROUPPER
+ - ret
+ + VMOVU %XMM0, -16(%rdi,%rdx)
+ + VMOVU %XMM0, (%rdi)
+ + VZEROUPPER_RETURN
+ # endif
+ /* From 8 to 15. No branch when size == 8. */
+ L(between_8_15):
+ movq %rcx, -8(%rdi,%rdx)
+ movq %rcx, (%rdi)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl %ecx, -4(%rdi,%rdx)
+ movl %ecx, (%rdi)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ L(between_2_3):
+ /* From 2 to 3. No branch when size == 2. */
+ movw %cx, -2(%rdi,%rdx)
+ movw %cx, (%rdi)
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ END (MEMSET_SYMBOL (__memset, unaligned_erms))
+ diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..acc5f6e2fb
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
+ @@ -0,0 +1,4 @@
+ +#define MEMCHR __rawmemchr_avx2_rtm
+ +#define USE_AS_RAWMEMCHR 1
+ +
+ +#include "memchr-avx2-rtm.S"
+ diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
+ new file mode 100644
+ index 0000000000..ec942b77ba
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
+ @@ -0,0 +1,4 @@
+ +#define MEMCHR __rawmemchr_evex
+ +#define USE_AS_RAWMEMCHR 1
+ +
+ +#include "memchr-evex.S"
+ diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..2b9c07a59f
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
+ @@ -0,0 +1,3 @@
+ +#define USE_AS_STPCPY
+ +#define STRCPY __stpcpy_avx2_rtm
+ +#include "strcpy-avx2-rtm.S"
+ diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S
+ new file mode 100644
+ index 0000000000..7c6f26cd98
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S
+ @@ -0,0 +1,3 @@
+ +#define USE_AS_STPCPY
+ +#define STRCPY __stpcpy_evex
+ +#include "strcpy-evex.S"
+ diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..60a2ccfe53
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
+ @@ -0,0 +1,4 @@
+ +#define USE_AS_STPCPY
+ +#define USE_AS_STRNCPY
+ +#define STRCPY __stpncpy_avx2_rtm
+ +#include "strcpy-avx2-rtm.S"
+ diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S
+ new file mode 100644
+ index 0000000000..1570014d1c
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S
+ @@ -0,0 +1,4 @@
+ +#define USE_AS_STPCPY
+ +#define USE_AS_STRNCPY
+ +#define STRCPY __stpncpy_evex
+ +#include "strcpy-evex.S"
+ diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..637fb557c4
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef STRCAT
+ +# define STRCAT __strcat_avx2_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "strcat-avx2.S"
+ diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S
+ index a4143bf8f5..1e6d4827ee 100644
+ --- a/sysdeps/x86_64/multiarch/strcat-avx2.S
+ +++ b/sysdeps/x86_64/multiarch/strcat-avx2.S
+ @@ -30,7 +30,11 @@
+ /* Number of bytes in a vector register */
+ # define VEC_SIZE 32
+
+ - .section .text.avx,"ax",@progbits
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ + .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCAT)
+ mov %rdi, %r9
+ # ifdef USE_AS_STRNCAT
+ diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S
+ new file mode 100644
+ index 0000000000..97c3d85b6d
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strcat-evex.S
+ @@ -0,0 +1,283 @@
+ +/* strcat with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#if IS_IN (libc)
+ +
+ +# include <sysdep.h>
+ +
+ +# ifndef STRCAT
+ +# define STRCAT __strcat_evex
+ +# endif
+ +
+ +# define VMOVU vmovdqu64
+ +# define VMOVA vmovdqa64
+ +
+ +/* zero register */
+ +# define XMMZERO xmm16
+ +# define YMMZERO ymm16
+ +# define YMM0 ymm17
+ +# define YMM1 ymm18
+ +
+ +# define USE_AS_STRCAT
+ +
+ +/* Number of bytes in a vector register */
+ +# define VEC_SIZE 32
+ +
+ + .section .text.evex,"ax",@progbits
+ +ENTRY (STRCAT)
+ + mov %rdi, %r9
+ +# ifdef USE_AS_STRNCAT
+ + mov %rdx, %r8
+ +# endif
+ +
+ + xor %eax, %eax
+ + mov %edi, %ecx
+ + and $((VEC_SIZE * 4) - 1), %ecx
+ + vpxorq %XMMZERO, %XMMZERO, %XMMZERO
+ + cmp $(VEC_SIZE * 3), %ecx
+ + ja L(fourth_vector_boundary)
+ + vpcmpb $0, (%rdi), %YMMZERO, %k0
+ + kmovd %k0, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_first_vector)
+ + mov %rdi, %rax
+ + and $-VEC_SIZE, %rax
+ + jmp L(align_vec_size_start)
+ +L(fourth_vector_boundary):
+ + mov %rdi, %rax
+ + and $-VEC_SIZE, %rax
+ + vpcmpb $0, (%rax), %YMMZERO, %k0
+ + mov $-1, %r10d
+ + sub %rax, %rcx
+ + shl %cl, %r10d
+ + kmovd %k0, %edx
+ + and %r10d, %edx
+ + jnz L(exit)
+ +
+ +L(align_vec_size_start):
+ + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
+ + kmovd %k0, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_second_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_third_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
+ + kmovd %k2, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fourth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
+ + kmovd %k3, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fifth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
+ + add $(VEC_SIZE * 4), %rax
+ + kmovd %k4, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_second_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_third_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
+ + kmovd %k2, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fourth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
+ + kmovd %k3, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fifth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
+ + kmovd %k4, %edx
+ + add $(VEC_SIZE * 4), %rax
+ + test %edx, %edx
+ + jnz L(exit_null_on_second_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_third_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
+ + kmovd %k2, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fourth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
+ + kmovd %k3, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fifth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
+ + add $(VEC_SIZE * 4), %rax
+ + kmovd %k4, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_second_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_third_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
+ + kmovd %k2, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fourth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
+ + kmovd %k3, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fifth_vector)
+ +
+ + test $((VEC_SIZE * 4) - 1), %rax
+ + jz L(align_four_vec_loop)
+ +
+ + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
+ + add $(VEC_SIZE * 5), %rax
+ + kmovd %k4, %edx
+ + test %edx, %edx
+ + jnz L(exit)
+ +
+ + test $((VEC_SIZE * 4) - 1), %rax
+ + jz L(align_four_vec_loop)
+ +
+ + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
+ + add $VEC_SIZE, %rax
+ + kmovd %k0, %edx
+ + test %edx, %edx
+ + jnz L(exit)
+ +
+ + test $((VEC_SIZE * 4) - 1), %rax
+ + jz L(align_four_vec_loop)
+ +
+ + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
+ + add $VEC_SIZE, %rax
+ + kmovd %k0, %edx
+ + test %edx, %edx
+ + jnz L(exit)
+ +
+ + test $((VEC_SIZE * 4) - 1), %rax
+ + jz L(align_four_vec_loop)
+ +
+ + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1
+ + add $VEC_SIZE, %rax
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit)
+ +
+ + add $VEC_SIZE, %rax
+ +
+ + .p2align 4
+ +L(align_four_vec_loop):
+ + VMOVA (%rax), %YMM0
+ + VMOVA (VEC_SIZE * 2)(%rax), %YMM1
+ + vpminub VEC_SIZE(%rax), %YMM0, %YMM0
+ + vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1
+ + vpminub %YMM0, %YMM1, %YMM0
+ + /* If K0 != 0, there is a null byte. */
+ + vpcmpb $0, %YMM0, %YMMZERO, %k0
+ + add $(VEC_SIZE * 4), %rax
+ + ktestd %k0, %k0
+ + jz L(align_four_vec_loop)
+ +
+ + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0
+ + sub $(VEC_SIZE * 5), %rax
+ + kmovd %k0, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_second_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
+ + kmovd %k1, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_third_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
+ + kmovd %k2, %edx
+ + test %edx, %edx
+ + jnz L(exit_null_on_fourth_vector)
+ +
+ + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
+ + kmovd %k3, %edx
+ + sub %rdi, %rax
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + add $(VEC_SIZE * 4), %rax
+ + jmp L(StartStrcpyPart)
+ +
+ + .p2align 4
+ +L(exit):
+ + sub %rdi, %rax
+ +L(exit_null_on_first_vector):
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + jmp L(StartStrcpyPart)
+ +
+ + .p2align 4
+ +L(exit_null_on_second_vector):
+ + sub %rdi, %rax
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + add $VEC_SIZE, %rax
+ + jmp L(StartStrcpyPart)
+ +
+ + .p2align 4
+ +L(exit_null_on_third_vector):
+ + sub %rdi, %rax
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + add $(VEC_SIZE * 2), %rax
+ + jmp L(StartStrcpyPart)
+ +
+ + .p2align 4
+ +L(exit_null_on_fourth_vector):
+ + sub %rdi, %rax
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + add $(VEC_SIZE * 3), %rax
+ + jmp L(StartStrcpyPart)
+ +
+ + .p2align 4
+ +L(exit_null_on_fifth_vector):
+ + sub %rdi, %rax
+ + bsf %rdx, %rdx
+ + add %rdx, %rax
+ + add $(VEC_SIZE * 4), %rax
+ +
+ + .p2align 4
+ +L(StartStrcpyPart):
+ + lea (%r9, %rax), %rdi
+ + mov %rsi, %rcx
+ + mov %r9, %rax /* save result */
+ +
+ +# ifdef USE_AS_STRNCAT
+ + test %r8, %r8
+ + jz L(ExitZero)
+ +# define USE_AS_STRNCPY
+ +# endif
+ +
+ +# include "strcpy-evex.S"
+ +#endif
+ diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..81f20d1d8e
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef STRCHR
+ +# define STRCHR __strchr_avx2_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "strchr-avx2.S"
+ diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
+ index 39fc69da7b..0a5217514a 100644
+ --- a/sysdeps/x86_64/multiarch/strchr-avx2.S
+ +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
+ @@ -38,9 +38,13 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ # define VEC_SIZE 32
+
+ - .section .text.avx,"ax",@progbits
+ + .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCHR)
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM0. */
+ @@ -93,8 +97,8 @@ L(cros_page_boundary):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+ - VZEROUPPER
+ - ret
+ +L(return_vzeroupper):
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(aligned_more):
+ @@ -190,8 +194,7 @@ L(first_vec_x0):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- #define internal_syscall1(v0_init, input, number, err, arg1) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a3 asm ("$7"); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -159,15 +160,17 @@
+ .p2align 4
+ L(first_vec_x1):
+ @@ -205,8 +208,7 @@ L(first_vec_x1):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + long long int _arg2 = ARGIFY (arg2); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a1 asm ("$5") = ARGIFY (arg2); \
- - register long long __a3 asm ("$7"); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a1 asm ("$5") = _arg2; \
- + register long long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -185,16 +188,19 @@
- #define internal_syscall3(v0_init, input, number, err, \
- arg1, arg2, arg3) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + long long int _arg2 = ARGIFY (arg2); \
- + long long int _arg3 = ARGIFY (arg3); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a1 asm ("$5") = ARGIFY (arg2); \
- - register long long __a2 asm ("$6") = ARGIFY (arg3); \
- - register long long __a3 asm ("$7"); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a1 asm ("$5") = _arg2; \
- + register long long int __a2 asm ("$6") = _arg3; \
- + register long long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -212,16 +218,20 @@
- #define internal_syscall4(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + long long int _arg2 = ARGIFY (arg2); \
- + long long int _arg3 = ARGIFY (arg3); \
- + long long int _arg4 = ARGIFY (arg4); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a1 asm ("$5") = ARGIFY (arg2); \
- - register long long __a2 asm ("$6") = ARGIFY (arg3); \
- - register long long __a3 asm ("$7") = ARGIFY (arg4); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a1 asm ("$5") = _arg2; \
- + register long long int __a2 asm ("$6") = _arg3; \
- + register long long int __a3 asm ("$7") = _arg4; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -239,17 +249,22 @@
- #define internal_syscall5(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + long long int _arg2 = ARGIFY (arg2); \
- + long long int _arg3 = ARGIFY (arg3); \
- + long long int _arg4 = ARGIFY (arg4); \
- + long long int _arg5 = ARGIFY (arg5); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a1 asm ("$5") = ARGIFY (arg2); \
- - register long long __a2 asm ("$6") = ARGIFY (arg3); \
- - register long long __a3 asm ("$7") = ARGIFY (arg4); \
- - register long long __a4 asm ("$8") = ARGIFY (arg5); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a1 asm ("$5") = _arg2; \
- + register long long int __a2 asm ("$6") = _arg3; \
- + register long long int __a3 asm ("$7") = _arg4; \
- + register long long int __a4 asm ("$8") = _arg5; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -267,18 +282,24 @@
- #define internal_syscall6(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5, arg6) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long long __s0 asm ("$16") __attribute__ ((unused)) \
- + long long int _arg1 = ARGIFY (arg1); \
- + long long int _arg2 = ARGIFY (arg2); \
- + long long int _arg3 = ARGIFY (arg3); \
- + long long int _arg4 = ARGIFY (arg4); \
- + long long int _arg5 = ARGIFY (arg5); \
- + long long int _arg6 = ARGIFY (arg6); \
- + register long long int __s0 asm ("$16") __attribute__ ((unused))\
- = (number); \
- - register long long __v0 asm ("$2"); \
- - register long long __a0 asm ("$4") = ARGIFY (arg1); \
- - register long long __a1 asm ("$5") = ARGIFY (arg2); \
- - register long long __a2 asm ("$6") = ARGIFY (arg3); \
- - register long long __a3 asm ("$7") = ARGIFY (arg4); \
- - register long long __a4 asm ("$8") = ARGIFY (arg5); \
- - register long long __a5 asm ("$9") = ARGIFY (arg6); \
- + register long long int __v0 asm ("$2"); \
- + register long long int __a0 asm ("$4") = _arg1; \
- + register long long int __a1 asm ("$5") = _arg2; \
- + register long long int __a2 asm ("$6") = _arg3; \
- + register long long int __a3 asm ("$7") = _arg4; \
- + register long long int __a4 asm ("$8") = _arg5; \
- + register long long int __a5 asm ("$9") = _arg6; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
- index 9d30291f84..3e1f1cc3c5 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
- +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
- @@ -50,7 +50,7 @@
- #undef INLINE_SYSCALL
- #define INLINE_SYSCALL(name, nr, args...) \
- ({ INTERNAL_SYSCALL_DECL (_sc_err); \
- - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \
- if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \
- { \
- __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \
- @@ -59,10 +59,10 @@
- result_var; })
+ .p2align 4
+ L(first_vec_x2):
+ @@ -220,8 +222,7 @@ L(first_vec_x2):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(4x_vec_end):
+ @@ -247,8 +248,7 @@ L(first_vec_x3):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ END (STRCHR)
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S
+ new file mode 100644
+ index 0000000000..ddc86a7058
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strchr-evex.S
+ @@ -0,0 +1,335 @@
+ +/* strchr/strchrnul optimized with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ + This file is part of the GNU C Library.
+ +
+ + The GNU C Library is free software; you can redistribute it and/or
+ + modify it under the terms of the GNU Lesser General Public
+ + License as published by the Free Software Foundation; either
+ + version 2.1 of the License, or (at your option) any later version.
+ +
+ + The GNU C Library is distributed in the hope that it will be useful,
+ + but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ + Lesser General Public License for more details.
+ +
+ + You should have received a copy of the GNU Lesser General Public
+ + License along with the GNU C Library; if not, see
+ + <https://www.gnu.org/licenses/>. */
+ +
+ +#if IS_IN (libc)
+ +
+ +# include <sysdep.h>
+ +
+ +# ifndef STRCHR
+ +# define STRCHR __strchr_evex
+ +# endif
+ +
+ +# define VMOVU vmovdqu64
+ +# define VMOVA vmovdqa64
+ +
+ +# ifdef USE_AS_WCSCHR
+ +# define VPBROADCAST vpbroadcastd
+ +# define VPCMP vpcmpd
+ +# define VPMINU vpminud
+ +# define CHAR_REG esi
+ +# define SHIFT_REG r8d
+ +# else
+ +# define VPBROADCAST vpbroadcastb
+ +# define VPCMP vpcmpb
+ +# define VPMINU vpminub
+ +# define CHAR_REG sil
+ +# define SHIFT_REG ecx
+ +# endif
+ +
+ +# define XMMZERO xmm16
+ +
+ +# define YMMZERO ymm16
+ +# define YMM0 ymm17
+ +# define YMM1 ymm18
+ +# define YMM2 ymm19
+ +# define YMM3 ymm20
+ +# define YMM4 ymm21
+ +# define YMM5 ymm22
+ +# define YMM6 ymm23
+ +# define YMM7 ymm24
+ +# define YMM8 ymm25
+ +
+ +# define VEC_SIZE 32
+ +# define PAGE_SIZE 4096
+ +
+ + .section .text.evex,"ax",@progbits
+ +ENTRY (STRCHR)
+ + movl %edi, %ecx
+ +# ifndef USE_AS_STRCHRNUL
+ + xorl %edx, %edx
+ +# endif
+ +
+ + /* Broadcast CHAR to YMM0. */
+ + VPBROADCAST %esi, %YMM0
+ +
+ + vpxorq %XMMZERO, %XMMZERO, %XMMZERO
+ +
+ + /* Check if we cross page boundary with one vector load. */
+ + andl $(PAGE_SIZE - 1), %ecx
+ + cmpl $(PAGE_SIZE - VEC_SIZE), %ecx
+ + ja L(cross_page_boundary)
+ +
+ + /* Check the first VEC_SIZE bytes. Search for both CHAR and the
+ + null bytes. */
+ + VMOVU (%rdi), %YMM1
+ +
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + ktestd %k0, %k0
+ + jz L(more_vecs)
+ + kmovd %k0, %eax
+ + tzcntl %eax, %eax
+ + /* Found CHAR or the null byte. */
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (%rdi, %rax, 4), %rax
+ +# else
+ + addq %rdi, %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(more_vecs):
+ + /* Align data for aligned loads in the loop. */
+ + andq $-VEC_SIZE, %rdi
+ +L(aligned_more):
+ +
+ + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ + since data is only aligned to VEC_SIZE. */
+ + VMOVA VEC_SIZE(%rdi), %YMM1
+ + addq $VEC_SIZE, %rdi
+ +
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x0)
+ +
+ + VMOVA VEC_SIZE(%rdi), %YMM1
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x1)
+ +
+ + VMOVA (VEC_SIZE * 2)(%rdi), %YMM1
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x2)
+ +
+ + VMOVA (VEC_SIZE * 3)(%rdi), %YMM1
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + ktestd %k0, %k0
+ + jz L(prep_loop_4x)
+ +
+ + kmovd %k0, %eax
+ + tzcntl %eax, %eax
+ + /* Found CHAR or the null byte. */
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax
+ +# else
+ + leaq (VEC_SIZE * 3)(%rdi, %rax), %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x0):
+ + tzcntl %eax, %eax
+ + /* Found CHAR or the null byte. */
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (%rdi, %rax, 4), %rax
+ +# else
+ + addq %rdi, %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x1):
+ + tzcntl %eax, %eax
+ + /* Found CHAR or the null byte. */
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq VEC_SIZE(%rdi, %rax, 4), %rax
+ +# else
+ + leaq VEC_SIZE(%rdi, %rax), %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ + .p2align 4
+ +L(first_vec_x2):
+ + tzcntl %eax, %eax
+ + /* Found CHAR or the null byte. */
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
+ +# else
+ + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ +L(prep_loop_4x):
+ + /* Align data to 4 * VEC_SIZE. */
+ + andq $-(VEC_SIZE * 4), %rdi
+ +
+ + .p2align 4
+ +L(loop_4x_vec):
+ + /* Compare 4 * VEC at a time forward. */
+ + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
+ + VMOVA (VEC_SIZE * 5)(%rdi), %YMM2
+ + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3
+ + VMOVA (VEC_SIZE * 7)(%rdi), %YMM4
+ +
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM5
+ + vpxorq %YMM2, %YMM0, %YMM6
+ + vpxorq %YMM3, %YMM0, %YMM7
+ + vpxorq %YMM4, %YMM0, %YMM8
+ +
+ + VPMINU %YMM5, %YMM1, %YMM5
+ + VPMINU %YMM6, %YMM2, %YMM6
+ + VPMINU %YMM7, %YMM3, %YMM7
+ + VPMINU %YMM8, %YMM4, %YMM8
+ +
+ + VPMINU %YMM5, %YMM6, %YMM1
+ + VPMINU %YMM7, %YMM8, %YMM2
+ +
+ + VPMINU %YMM1, %YMM2, %YMM1
+ +
+ + /* Each bit in K0 represents a CHAR or a null byte. */
+ + VPCMP $0, %YMMZERO, %YMM1, %k0
+ +
+ + addq $(VEC_SIZE * 4), %rdi
+ +
+ + ktestd %k0, %k0
+ + jz L(loop_4x_vec)
+ +
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM5, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x0)
+ +
+ + /* Each bit in K1 represents a CHAR or a null byte in YMM2. */
+ + VPCMP $0, %YMMZERO, %YMM6, %k1
+ + kmovd %k1, %eax
+ + testl %eax, %eax
+ + jnz L(first_vec_x1)
+ +
+ + /* Each bit in K2 represents a CHAR or a null byte in YMM3. */
+ + VPCMP $0, %YMMZERO, %YMM7, %k2
+ + /* Each bit in K3 represents a CHAR or a null byte in YMM4. */
+ + VPCMP $0, %YMMZERO, %YMM8, %k3
+ +
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Each bit in K2/K3 represents 4-byte element. */
+ + kshiftlw $8, %k3, %k1
+ +# else
+ + kshiftlq $32, %k3, %k1
+ +# endif
+ +
+ + /* Each bit in K1 represents a NULL or a mismatch. */
+ + korq %k1, %k2, %k1
+ + kmovq %k1, %rax
+ +
+ + tzcntq %rax, %rax
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
+ +# else
+ + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ + /* Cold case for crossing page with first load. */
+ + .p2align 4
+ +L(cross_page_boundary):
+ + andq $-VEC_SIZE, %rdi
+ + andl $(VEC_SIZE - 1), %ecx
+ +
+ + VMOVA (%rdi), %YMM1
+ +
+ + /* Leaves only CHARS matching esi as 0. */
+ + vpxorq %YMM1, %YMM0, %YMM2
+ + VPMINU %YMM2, %YMM1, %YMM2
+ + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
+ + VPCMP $0, %YMMZERO, %YMM2, %k0
+ + kmovd %k0, %eax
+ + testl %eax, %eax
+ +
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Divide shift count by 4 since each bit in K1 represent 4
+ + bytes. */
+ + movl %ecx, %SHIFT_REG
+ + sarl $2, %SHIFT_REG
+ +# endif
+ +
+ + /* Remove the leading bits. */
+ + sarxl %SHIFT_REG, %eax, %eax
+ + testl %eax, %eax
+ +
+ + jz L(aligned_more)
+ + tzcntl %eax, %eax
+ + addq %rcx, %rdi
+ +# ifdef USE_AS_WCSCHR
+ + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
+ + leaq (%rdi, %rax, 4), %rax
+ +# else
+ + addq %rdi, %rax
+ +# endif
+ +# ifndef USE_AS_STRCHRNUL
+ + cmp (%rax), %CHAR_REG
+ + cmovne %rdx, %rax
+ +# endif
+ + ret
+ +
+ +END (STRCHR)
+ +# endif
+ diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
+ index f27980dd36..a04ac8eb1d 100644
+ --- a/sysdeps/x86_64/multiarch/strchr.c
+ +++ b/sysdeps/x86_64/multiarch/strchr.c
+ @@ -29,16 +29,28 @@
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
+ +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
- #undef INTERNAL_SYSCALL_DECL
- -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused))
- +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused))
+ - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+ - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ - return OPTIMIZE (avx2);
+ + {
+ + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)
+ + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)
+ + && CPU_FEATURES_CPU_P (cpu_features, BMI2))
+ + return OPTIMIZE (evex);
+ +
+ + if (CPU_FEATURES_CPU_P (cpu_features, RTM))
+ + return OPTIMIZE (avx2_rtm);
+ +
+ + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ + return OPTIMIZE (avx2);
+ + }
- #undef INTERNAL_SYSCALL_ERROR_P
- -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err))
- +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err))
+ if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+ return OPTIMIZE (sse2_no_bsf);
+ diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..cdcf818b91
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
+ @@ -0,0 +1,3 @@
+ +#define STRCHR __strchrnul_avx2_rtm
+ +#define USE_AS_STRCHRNUL 1
+ +#include "strchr-avx2-rtm.S"
+ diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S
+ new file mode 100644
+ index 0000000000..064fe7ca9e
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S
+ @@ -0,0 +1,3 @@
+ +#define STRCHR __strchrnul_evex
+ +#define USE_AS_STRCHRNUL 1
+ +#include "strchr-evex.S"
+ diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
+ new file mode 100644
+ index 0000000000..aecd30d97f
+ --- /dev/null
+ +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
+ @@ -0,0 +1,12 @@
+ +#ifndef STRCMP
+ +# define STRCMP __strcmp_avx2_rtm
+ +#endif
+ +
+ +#define ZERO_UPPER_VEC_REGISTERS_RETURN \
+ + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
+ +
+ +#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
+ +
+ +#define SECTION(p) p##.avx.rtm
+ +
+ +#include "strcmp-avx2.S"
+ diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
+ index 48d03a9f46..4d434fd14e 100644
+ --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
+ +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
+ @@ -55,6 +55,10 @@
+ # define VZEROUPPER vzeroupper
+ # endif
- #undef INTERNAL_SYSCALL_ERRNO
- #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val)
- @@ -108,13 +108,13 @@
+ +# ifndef SECTION
+ +# define SECTION(p) p##.avx
+ +# endif
+ +
+ /* Warning!
+ wcscmp/wcsncmp have to use SIGNED comparison for elements.
+ strcmp/strncmp have to use UNSIGNED comparison for elements.
+ @@ -75,7 +79,7 @@
+ the maximum offset is reached before a difference is found, zero is
+ returned. */
+
+ - .section .text.avx,"ax",@progbits
+ + .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCMP)
+ # ifdef USE_AS_STRNCMP
+ /* Check for simple cases (0 or 1) in offset. */
+ @@ -83,6 +87,16 @@ ENTRY (STRCMP)
+ je L(char0)
+ jb L(zero)
+ # ifdef USE_AS_WCSCMP
+ +# ifndef __ILP32__
+ + movq %rdx, %rcx
+ + /* Check if length could overflow when multiplied by
+ + sizeof(wchar_t). Checking top 8 bits will cover all potential
+ + overflow cases as well as redirect cases where its impossible to
+ + length to bound a valid memory region. In these cases just use
+ + 'wcscmp'. */
+ + shrq $56, %rcx
+ + jnz OVERFLOW_STRCMP
+ +# endif
+ /* Convert units: from wide to byte char. */
+ shl $2, %RDX_LP
+ # endif
+ @@ -127,8 +141,8 @@ L(return):
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ +L(return_vzeroupper):
+ + ZERO_UPPER_VEC_REGISTERS_RETURN
- #define internal_syscall0(v0_init, input, number, err, dummy...) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -131,14 +131,15 @@
+ .p2align 4
+ L(return_vec_size):
+ @@ -161,8 +175,7 @@ L(return_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- #define internal_syscall1(v0_init, input, number, err, arg1) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -155,15 +156,17 @@
+ .p2align 4
+ L(return_2_vec_size):
+ @@ -195,8 +208,7 @@ L(return_2_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -181,16 +184,19 @@
- #define internal_syscall3(v0_init, input, number, err, \
- arg1, arg2, arg3) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7"); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7"); \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -208,16 +214,20 @@
- #define internal_syscall4(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + long int _arg4 = (long int) (arg4); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7") = (long) (arg4); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7") = _arg4; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -235,17 +245,22 @@
- #define internal_syscall5(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + long int _arg4 = (long int) (arg4); \
- + long int _arg5 = (long int) (arg5); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7") = (long) (arg4); \
- - register long __a4 asm ("$8") = (long) (arg5); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7") = _arg4; \
- + register long int __a4 asm ("$8") = _arg5; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- @@ -263,18 +278,24 @@
- #define internal_syscall6(v0_init, input, number, err, \
- arg1, arg2, arg3, arg4, arg5, arg6) \
- ({ \
- - long _sys_result; \
- + long int _sys_result; \
- \
- { \
- - register long __s0 asm ("$16") __attribute__ ((unused)) \
- + long int _arg1 = (long int) (arg1); \
- + long int _arg2 = (long int) (arg2); \
- + long int _arg3 = (long int) (arg3); \
- + long int _arg4 = (long int) (arg4); \
- + long int _arg5 = (long int) (arg5); \
- + long int _arg6 = (long int) (arg6); \
- + register long int __s0 asm ("$16") __attribute__ ((unused)) \
- = (number); \
- - register long __v0 asm ("$2"); \
- - register long __a0 asm ("$4") = (long) (arg1); \
- - register long __a1 asm ("$5") = (long) (arg2); \
- - register long __a2 asm ("$6") = (long) (arg3); \
- - register long __a3 asm ("$7") = (long) (arg4); \
- - register long __a4 asm ("$8") = (long) (arg5); \
- - register long __a5 asm ("$9") = (long) (arg6); \
- + register long int __v0 asm ("$2"); \
- + register long int __a0 asm ("$4") = _arg1; \
- + register long int __a1 asm ("$5") = _arg2; \
- + register long int __a2 asm ("$6") = _arg3; \
- + register long int __a3 asm ("$7") = _arg4; \
- + register long int __a4 asm ("$8") = _arg5; \
- + register long int __a5 asm ("$9") = _arg6; \
- __asm__ volatile ( \
- ".set\tnoreorder\n\t" \
- v0_init \
- diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
- index 26adf2cd04..a9baff3c17 100644
- --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
- +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S
- @@ -20,7 +20,7 @@
- #include <sys/asm.h>
+ .p2align 4
+ L(return_3_vec_size):
+ @@ -229,8 +241,7 @@ L(return_3_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(next_3_vectors):
+ @@ -356,8 +367,7 @@ L(back_to_loop):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_vec):
+ @@ -400,8 +410,7 @@ L(test_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_2_vec):
+ @@ -444,8 +453,7 @@ L(test_2_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_3_vec):
+ @@ -486,8 +494,7 @@ L(test_3_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- /* Usage:
- - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7)
- + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7)
+ .p2align 4
+ L(loop_cross_page):
+ @@ -556,8 +563,7 @@ L(loop_cross_page):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
- We need to do some arg shifting, syscall_number will be in v0. */
+ .p2align 4
+ L(loop_cross_page_2_vec):
+ @@ -591,7 +597,14 @@ L(loop_cross_page_2_vec):
+ movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
- diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h
- index cdfc0b1b58..a4cf1540fe 100644
- --- a/sysdeps/unix/sysv/linux/mips/sysdep.h
- +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h
- @@ -36,8 +36,8 @@
- the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */
- #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \
- ({ \
- - long _ret = funcptr (args); \
- - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \
- + long int _ret = funcptr (args); \
- + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \
- if (err) \
- _ret = -_ret; \
- _ret; \
- diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h
+ testq %rdi, %rdi
+ +# ifdef USE_AS_STRNCMP
+ + /* At this point, if %rdi value is 0, it already tested
+ + VEC_SIZE*4+%r10 byte starting from %rax. This label
+ + checks whether strncmp maximum offset reached or not. */
+ + je L(string_nbyte_offset_check)
+ +# else
+ je L(back_to_loop)
+ +# endif
+ tzcntq %rdi, %rcx
+ addq %r10, %rcx
+ /* Adjust for number of bytes skipped. */
+ @@ -624,8 +637,15 @@ L(loop_cross_page_2_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ +
+ +# ifdef USE_AS_STRNCMP
+ +L(string_nbyte_offset_check):
+ + leaq (VEC_SIZE * 4)(%r10), %r10
+ + cmpq %r10, %r11
+ + jbe L(zero)
+ + jmp L(back_to_loop)
+ +# endif
+
+ .p2align 4
+ L(cross_page_loop):
+ @@ -659,8 +679,7 @@ L(cross_page_loop):
+ # ifndef USE_AS_WCSCMP
+ L(different):
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ # ifdef USE_AS_WCSCMP
+ .p2align 4
+ @@ -670,16 +689,14 @@ L(different):
+ setl %al
+ negl %eax
+ orl $1, %eax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ # endif
+
+ # ifdef USE_AS_STRNCMP
+ .p2align 4
+ L(zero):
+ xorl %eax, %eax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ .p2align 4
+ L(char0):
+ @@ -693,8 +710,7 @@ L(char0):
+ movzbl (%rdi), %eax
+ subl %ecx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ # endif
+
+ .p2align 4
+ @@ -719,8 +735,7 @@ L(last_vector):
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+ # endif
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+
+ /* Comparing on page boundary region requires special treatment:
+ It must done one vector at the time, starting with the wider
+ @@ -841,7 +856,6 @@ L(cross_page_4bytes):
+ testl %eax, %eax
+ jne L(cross_page_loop)
+ subl %ecx, %eax
+ - VZEROUPPER
+ - ret
+ + VZEROUPPER_RETURN
+ END (STRCMP)
+ #endif
+ diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
new file mode 100644
- index 0000000000..a009899983
+ index 0000000000..459eeed09f
--- /dev/null
- +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h
- @@ -0,0 +1,67 @@
- +/* Return backtrace of current program state. Arch-specific bits.
- + Copyright (C) 2020 Free Software Foundation, Inc.
+ +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
+ @@ -0,0 +1,1043 @@
+ +/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions.
+ + Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or