From: Peter Michael Green Date: Sat, 20 May 2023 09:21:41 +0000 (+0100) Subject: Manual merge of version 2.31-13+rpi1+deb11u3 and 2.31-13+deb11u6 to produce 2.31... X-Git-Tag: archive/raspbian/2.31-13+rpi1+deb11u6^0 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=68f773c96372ce7cc5f13663033c3eae5754adcd;p=glibc.git Manual merge of version 2.31-13+rpi1+deb11u3 and 2.31-13+deb11u6 to produce 2.31-13+rpi1+deb11u6 --- 68f773c96372ce7cc5f13663033c3eae5754adcd diff --cc debian/changelog index 098a25ed5,a12b2b432..157c12b7a --- a/debian/changelog +++ b/debian/changelog @@@ -1,19 -1,48 +1,65 @@@ - glibc (2.31-13+rpi1+deb11u3) bullseye-staging; urgency=medium ++glibc (2.31-13+rpi1+deb11u6) bullseye-staging; urgency=medium + + [changes brought forward from 2.25-2+rpi1 by Peter Michael Green at Wed, 29 Nov 2017 03:00:21 +0000] + * Disable testsuite. + + [changes introduced in 2.29-9+rpi1 by Peter Michale Green] + * Change mode on scripts/check-obsolete-constructs.py to 644, + dgit does not like mode 755 files created by patches and the + script does not seem to be used for anything in the Debian + package. + + [changes introduced in 2.31-13+rpi1+deb11u3 by Peter Michael Green] + * Change mode on sysdeps/x86_64/configure to 644, same dgit issue + as above. + - -- Peter Michael Green Tue, 29 Mar 2022 22:11:31 +0000 ++ -- Peter Michael Green Sat, 20 May 2023 09:21:02 +0000 ++ + glibc (2.31-13+deb11u6) bullseye; urgency=medium + + [ Aurelien Jarno ] + * debian/patches/git-updates.diff: update from upstream stable branch: + - Drop debian/patches/amd64/local-require-bmi-in-avx2-ifunc.diff + (obsolete). + - Fix memory leak in printf-family functions with long multibyte strings. + - Fix a crash in printf-family due to width/precision-dependent + allocations. + - Fix a segfault in printf handling thousands separator. + - Fix an overflow in the AVX2 implementation of wcsnlen when crossing + pages. + + -- Aurelien Jarno Wed, 19 Apr 2023 23:17:51 +0200 + + glibc (2.31-13+deb11u5) bullseye; urgency=medium + + * debian/patches/local-require-bmi-in-avx2-ifunc.diff: new patch extracted + from an upstream commit, to change the AVX2 ifunc selector to require the + BMI2 feature. It happened that the wmemchr and wcslen changes backported + in 2.31-13+deb11u4 relied on that commit which got forgotten. + Closes: #1019855. + + -- Aurelien Jarno Fri, 14 Oct 2022 21:35:00 +0200 + + glibc (2.31-13+deb11u4) bullseye; urgency=medium + + [ Aurelien Jarno ] + * debian/debhelper.in/libc-dev.NEWS: New file to explain how to update + programs to use the TI-RPC library instead of the Sun RPC one. Closes: + #1014735. + * debian/patches/git-updates.diff: update from upstream stable branch: + - Fix an off-by-one buffer overflow/underflow in getcwd() (CVE-2021-3999). + - Fix an overflow bug in the SSE2 and AVX2 implementations of wmemchr. + - Fix an overflow bug in the SSE4.1 and AVX2 implementations of wcslen and + wcsncat. + - Fix an overflow bug in the AVX2 and EVEX implementation of wcsncmp. + - Add a few EVEX optimized string functions to fix a performance issue (up + to 40%) with Skylake-X processors. + - Make grantpt usable after multi-threaded fork. Closes: #1015740. + - debian/patches/hurd-i386/git-posix_openpt.diff: rebase. + * debian/rules.d/build.mk: pass --with-default-link=no to configure to + ensure that libio vtable protection is enabled. + + -- Aurelien Jarno Fri, 26 Aug 2022 23:32:46 +0200 glibc (2.31-13+deb11u3) bullseye; urgency=medium diff --cc debian/patches/git-updates.diff index f6c05db17,63246ab1a..cfa38a480 --- a/debian/patches/git-updates.diff +++ b/debian/patches/git-updates.diff @@@ -5888,2070 -7057,10745 +7057,10743 @@@ index 0000000000..48bb6d7ca + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public - + License along with the GNU C Library; if not, see + + License along with the GNU C Library. If not, see + . */ + - +#ifndef _UNWIND_ARCH_H - +#define _UNWIND_ARCH_H + +#include + - +#include + +/* Assumptions: + + * + + * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. + + * + + */ + - +static inline void * - +unwind_arch_adjustment (void *prev, void *addr) - +{ - + return addr; - +} + +#define dstin x0 + +#define src x1 + +#define count x2 + +#define dst x3 + +#define srcend x4 + +#define dstend x5 + +#define A_l x6 + +#define A_lw w6 + +#define A_h x7 + +#define B_l x8 + +#define B_lw w8 + +#define B_h x9 + +#define C_lw w10 + +#define tmp1 x14 + - +#endif - diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c - index 0a37397284..25ca8f8463 100644 - --- a/sysdeps/hppa/dl-fptr.c - +++ b/sysdeps/hppa/dl-fptr.c - @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp) - } - - install: - - fdesc->ip = ip; - fdesc->gp = gp; - + fdesc->ip = ip; - - return (ElfW(Addr)) fdesc; - } - @@ -350,7 +350,9 @@ ElfW(Addr) - _dl_lookup_address (const void *address) - { - ElfW(Addr) addr = (ElfW(Addr)) address; - - unsigned int *desc, *gptr; - + ElfW(Word) reloc_arg; - + volatile unsigned int *desc; - + unsigned int *gptr; - - /* Return ADDR if the least-significant two bits of ADDR are not consistent - with ADDR being a linker defined function pointer. The normal value for - @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address) - if (!_dl_read_access_allowed (desc)) - return addr; - - - /* Load first word of candidate descriptor. It should be a pointer - + /* First load the relocation offset. */ - + reloc_arg = (ElfW(Word)) desc[1]; - + atomic_full_barrier(); + +#define A_q q0 + +#define B_q q1 + +#define C_q q2 + +#define D_q q3 + +#define E_q q4 + +#define F_q q5 + +#define G_q q6 + +#define H_q q7 + - + /* Then load first word of candidate descriptor. It should be a pointer - with word alignment and point to memory that can be read. */ - gptr = (unsigned int *) desc[0]; - if (((unsigned int) gptr & 3) != 0 - @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address) - /* See if descriptor requires resolution. The following trampoline is - used in each global offset table for function resolution: - - - ldw 0(r20),r22 - - bv r0(r22) - + ldw 0(r20),r21 - + bv r0(r21) - ldw 4(r20),r21 - tramp: b,l .-12,r20 - depwi 0,31,2,r20 - @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address) - if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */ - && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */ - && (ElfW(Addr)) gptr[2] == elf_machine_resolve ()) - - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]); - + { - + struct link_map *l = (struct link_map *) gptr[5]; + - + /* If gp has been resolved, we need to hunt for relocation offset. */ - + if (!(reloc_arg & PA_GP_RELOC)) - + reloc_arg = _dl_fix_reloc_arg (addr, l); + +/* This implementation supports both memcpy and memmove and shares most code. + + It uses unaligned accesses and branchless sequences to keep the code small, + + simple and improve performance. + - + _dl_fixup (l, reloc_arg); - + } - - return (ElfW(Addr)) desc[0]; - } - diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h - index 9e98366ea3..8ecff97706 100644 - --- a/sysdeps/hppa/dl-machine.h - +++ b/sysdeps/hppa/dl-machine.h - @@ -48,6 +48,14 @@ - #define GOT_FROM_PLT_STUB (4*4) - #define PLT_ENTRY_SIZE (2*4) - - +/* The gp slot in the function descriptor contains the relocation offset - + before resolution. To distinguish between a resolved gp value and an - + unresolved relocation offset we set an unused bit in the relocation - + offset. This would allow us to do a synchronzied two word update - + using this bit (interlocked update), but instead of waiting for the - + update we simply recompute the gp value given that we know the ip. */ - +#define PA_GP_RELOC 1 + + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + + copies of up to 128 bytes, and large copies. The overhead of the overlap + + check in memmove is negligible since it is only required for large copies. + - /* Initialize the function descriptor table before relocations */ - static inline void - __hppa_init_bootstrap_fdesc_table (struct link_map *map) - @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, - volatile Elf32_Addr *rfdesc = reloc_addr; - /* map is the link_map for the caller, t is the link_map for the object - being called */ - - rfdesc[1] = value.gp; - - /* Need to ensure that the gp is visible before the code - - entry point is updated */ - - rfdesc[0] = value.ip; + + Large copies use a software pipelined loop processing 64 bytes per + + iteration. The destination pointer is 16-byte aligned to minimize + + unaligned accesses. The loop tail is handled by always copying 64 bytes + + from the end. */ + - + /* We would like the function descriptor to be double word aligned. This - + helps performance (ip and gp then reside on the same cache line) and - + we can update the pair atomically with a single store. The linker - + now ensures this alignment but we still have to handle old code. */ - + if ((unsigned int)reloc_addr & 7) - + { - + /* Need to ensure that the gp is visible before the code - + entry point is updated */ - + rfdesc[1] = value.gp; - + atomic_full_barrier(); - + rfdesc[0] = value.ip; - + } - + else - + { - + /* Update pair atomically with floating point store. */ - + union { ElfW(Word) v[2]; double d; } u; + +ENTRY (__memcpy_simd) + + DELOUSE (0) + + DELOUSE (1) + + DELOUSE (2) + - + u.v[0] = value.ip; - + u.v[1] = value.gp; - + *(volatile double *)rfdesc = u.d; - + } - return value; - } - - @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) - here. The trampoline code will load the proper - LTP and pass the reloc offset to the fixup - function. */ - - fptr->gp = iplt - jmprel; - + fptr->gp = (iplt - jmprel) | PA_GP_RELOC; - } /* r_sym != 0 */ - else - { - diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c - new file mode 100644 - index 0000000000..885a3f1837 - --- /dev/null - +++ b/sysdeps/hppa/dl-runtime.c - @@ -0,0 +1,58 @@ - +/* On-demand PLT fixup for shared objects. HPPA version. - + Copyright (C) 2019 Free Software Foundation, Inc. - + This file is part of the GNU C Library. + + add srcend, src, count + + add dstend, dstin, count + + cmp count, 128 + + b.hi L(copy_long) + + cmp count, 32 + + b.hi L(copy32_128) + - + The GNU C Library is free software; you can redistribute it and/or - + modify it under the terms of the GNU Lesser General Public - + License as published by the Free Software Foundation; either - + version 2.1 of the License, or (at your option) any later version. + + /* Small copies: 0..32 bytes. */ + + cmp count, 16 + + b.lo L(copy16) + + ldr A_q, [src] + + ldr B_q, [srcend, -16] + + str A_q, [dstin] + + str B_q, [dstend, -16] + + ret + - + The GNU C Library is distributed in the hope that it will be useful, - + but WITHOUT ANY WARRANTY; without even the implied warranty of - + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - + Lesser General Public License for more details. + + /* Copy 8-15 bytes. */ + +L(copy16): + + tbz count, 3, L(copy8) + + ldr A_l, [src] + + ldr A_h, [srcend, -8] + + str A_l, [dstin] + + str A_h, [dstend, -8] + + ret + - + You should have received a copy of the GNU Lesser General Public - + License along with the GNU C Library; if not, write to the Free - + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - + 02111-1307 USA. */ + + /* Copy 4-7 bytes. */ + +L(copy8): + + tbz count, 2, L(copy4) + + ldr A_lw, [src] + + ldr B_lw, [srcend, -4] + + str A_lw, [dstin] + + str B_lw, [dstend, -4] + + ret + - +/* Clear PA_GP_RELOC bit in relocation offset. */ - +#define reloc_offset (reloc_arg & ~PA_GP_RELOC) - +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL) + + /* Copy 0..3 bytes using a branchless sequence. */ + +L(copy4): + + cbz count, L(copy0) + + lsr tmp1, count, 1 + + ldrb A_lw, [src] + + ldrb C_lw, [srcend, -1] + + ldrb B_lw, [src, tmp1] + + strb A_lw, [dstin] + + strb B_lw, [dstin, tmp1] + + strb C_lw, [dstend, -1] + +L(copy0): + + ret + - +#include + + .p2align 4 + + /* Medium copies: 33..128 bytes. */ + +L(copy32_128): + + ldp A_q, B_q, [src] + + ldp C_q, D_q, [srcend, -32] + + cmp count, 64 + + b.hi L(copy128) + + stp A_q, B_q, [dstin] + + stp C_q, D_q, [dstend, -32] + + ret + - +/* The caller has encountered a partially relocated function descriptor. - + The gp of the descriptor has been updated, but not the ip. We find - + the function descriptor again and compute the relocation offset and - + return that to the caller. The caller will continue on to call - + _dl_fixup with the relocation offset. */ + + .p2align 4 + + /* Copy 65..128 bytes. */ + +L(copy128): + + ldp E_q, F_q, [src, 32] + + cmp count, 96 + + b.ls L(copy96) + + ldp G_q, H_q, [srcend, -64] + + stp G_q, H_q, [dstend, -64] + +L(copy96): + + stp A_q, B_q, [dstin] + + stp E_q, F_q, [dstin, 32] + + stp C_q, D_q, [dstend, -32] + + ret + - +ElfW(Word) - +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE - +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) - +{ - + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; - + const Elf32_Rela *reloc; + + /* Align loop64 below to 16 bytes. */ + + nop + - + l_addr = l->l_addr; - + jmprel = D_PTR(l, l_info[DT_JMPREL]); - + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val; + + /* Copy more than 128 bytes. */ + +L(copy_long): + + /* Copy 16 bytes and then align src to 16-byte alignment. */ + + ldr D_q, [src] + + and tmp1, src, 15 + + bic src, src, 15 + + sub dst, dstin, tmp1 + + add count, count, tmp1 /* Count is now 16 too large. */ + + ldp A_q, B_q, [src, 16] + + str D_q, [dstin] + + ldp C_q, D_q, [src, 48] + + subs count, count, 128 + 16 /* Test and readjust count. */ + + b.ls L(copy64_from_end) + +L(loop64): + + stp A_q, B_q, [dst, 16] + + ldp A_q, B_q, [src, 80] + + stp C_q, D_q, [dst, 48] + + ldp C_q, D_q, [src, 112] + + add src, src, 64 + + add dst, dst, 64 + + subs count, count, 64 + + b.hi L(loop64) + - + /* Look for the entry... */ - + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela)) - + { - + reloc = (const Elf32_Rela *) iplt; - + r_type = ELF32_R_TYPE (reloc->r_info); + + /* Write the last iteration and copy 64 bytes from the end. */ + +L(copy64_from_end): + + ldp E_q, F_q, [srcend, -64] + + stp A_q, B_q, [dst, 16] + + ldp A_q, B_q, [srcend, -32] + + stp C_q, D_q, [dst, 48] + + stp E_q, F_q, [dstend, -64] + + stp A_q, B_q, [dstend, -32] + + ret + - + if (__builtin_expect (r_type == R_PARISC_IPLT, 1) - + && fptr == (struct fdesc *) (reloc->r_offset + l_addr)) - + /* Found entry. Return the reloc offset. */ - + return iplt - jmprel; - + } + +END (__memcpy_simd) + +libc_hidden_builtin_def (__memcpy_simd) + - + /* Crash if we weren't passed a valid function pointer. */ - + ABORT_INSTRUCTION; - + return 0; - +} - diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S - index 0114ca8b19..d0804b30c0 100644 - --- a/sysdeps/hppa/dl-trampoline.S - +++ b/sysdeps/hppa/dl-trampoline.S - @@ -31,7 +31,7 @@ - slow down __cffc when it attempts to call fixup to resolve function - descriptor references. Please refer to gcc/gcc/config/pa/fptr.c - - - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */ - + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */ - - /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */ - .text - @@ -61,17 +61,20 @@ _dl_runtime_resolve: - copy %sp, %r1 /* Copy previous sp */ - /* Save function result address (on entry) */ - stwm %r28,128(%sp) - - /* Fillin some frame info to follow ABI */ - + /* Fill in some frame info to follow ABI */ - stw %r1,-4(%sp) /* Previous sp */ - stw %r21,-32(%sp) /* PIC register value */ - - /* Save input floating point registers. This must be done - in the new frame since the previous frame doesn't have - enough space */ - - ldo -56(%sp),%r1 - + ldo -64(%sp),%r1 - fstd,ma %fr4,-8(%r1) - fstd,ma %fr5,-8(%r1) - fstd,ma %fr6,-8(%r1) + - + /* Test PA_GP_RELOC bit. */ - + bb,>= %r19,31,2f /* branch if not reloc offset */ - fstd,ma %fr7,-8(%r1) - - /* Set up args to fixup func, needs only two arguments */ - @@ -79,7 +82,7 @@ _dl_runtime_resolve: - copy %r19,%r25 /* (2) reloc offset */ - - /* Call the real address resolver. */ - - bl _dl_fixup,%rp - +3: bl _dl_fixup,%rp - copy %r21,%r19 /* set fixup func ltp */ - - /* While the linker will set a function pointer to NULL when it - @@ -102,7 +105,7 @@ _dl_runtime_resolve: - copy %r29, %r19 - - /* Reload arguments fp args */ - - ldo -56(%sp),%r1 - + ldo -64(%sp),%r1 - fldd,ma -8(%r1),%fr4 - fldd,ma -8(%r1),%fr5 - fldd,ma -8(%r1),%fr6 - @@ -129,6 +132,25 @@ _dl_runtime_resolve: - bv %r0(%rp) - ldo -128(%sp),%sp - - +2: - + /* Set up args for _dl_fix_reloc_arg. */ - + copy %r22,%r26 /* (1) function pointer */ - + depi 0,31,2,%r26 /* clear least significant bits */ - + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + +ENTRY (__memmove_simd) + + DELOUSE (0) + + DELOUSE (1) + + DELOUSE (2) + - + /* Save ltp and link map arg for _dl_fixup. */ - + stw %r21,-56(%sp) /* ltp */ - + stw %r25,-60(%sp) /* struct link map */ + + add srcend, src, count + + add dstend, dstin, count + + cmp count, 128 + + b.hi L(move_long) + + cmp count, 32 + + b.hi L(copy32_128) + - + /* Find reloc offset. */ - + bl _dl_fix_reloc_arg,%rp - + copy %r21,%r19 /* set func ltp */ + + /* Small moves: 0..32 bytes. */ + + cmp count, 16 + + b.lo L(copy16) + + ldr A_q, [src] + + ldr B_q, [srcend, -16] + + str A_q, [dstin] + + str B_q, [dstend, -16] + + ret + - + /* Set up args for _dl_fixup. */ - + ldw -56(%sp),%r21 /* ltp */ - + ldw -60(%sp),%r26 /* (1) struct link map */ - + b 3b - + copy %ret0,%r25 /* (2) reloc offset */ - .EXIT - .PROCEND - cfi_endproc - @@ -153,7 +175,7 @@ _dl_runtime_profile: - copy %sp, %r1 /* Copy previous sp */ - /* Save function result address (on entry) */ - stwm %r28,192(%sp) - - /* Fillin some frame info to follow ABI */ - + /* Fill in some frame info to follow ABI */ - stw %r1,-4(%sp) /* Previous sp */ - stw %r21,-32(%sp) /* PIC register value */ - - @@ -181,10 +203,11 @@ _dl_runtime_profile: - fstd,ma %fr5,8(%r1) - fstd,ma %fr6,8(%r1) - fstd,ma %fr7,8(%r1) - - /* 32-bit stack pointer and return register */ - - stw %sp,-56(%sp) - - stw %r2,-52(%sp) - - + /* Test PA_GP_RELOC bit. */ - + bb,>= %r19,31,2f /* branch if not reloc offset */ - + /* 32-bit stack pointer */ - + stw %sp,-56(%sp) - - /* Set up args to fixup func, needs five arguments */ - ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */ - @@ -197,7 +220,7 @@ _dl_runtime_profile: - stw %r1, -52(%sp) /* (5) long int *framesizep */ - - /* Call the real address resolver. */ - - bl _dl_profile_fixup,%rp - +3: bl _dl_profile_fixup,%rp - copy %r21,%r19 /* set fixup func ltp */ - - /* Load up the returned function descriptor */ - @@ -215,7 +238,9 @@ _dl_runtime_profile: - fldd,ma 8(%r1),%fr5 - fldd,ma 8(%r1),%fr6 - fldd,ma 8(%r1),%fr7 - - ldw -52(%sp),%rp + +L(move_long): + + /* Only use backward copy if there is an overlap. */ + + sub tmp1, dstin, src + + cbz tmp1, L(move0) + + cmp tmp1, count + + b.hs L(copy_long) + - + /* Reload rp register -(192+20) without adjusting stack */ - + ldw -212(%sp),%rp - - /* Reload static link register -(192+16) without adjusting stack */ - ldw -208(%sp),%r29 - @@ -303,6 +328,33 @@ L(cont): - ldw -20(%sp),%rp - /* Return */ - bv,n 0(%r2) - + - +2: - + /* Set up args for _dl_fix_reloc_arg. */ - + copy %r22,%r26 /* (1) function pointer */ - + depi 0,31,2,%r26 /* clear least significant bits */ - + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ - + - + /* Save ltp and link map arg for _dl_fixup. */ - + stw %r21,-92(%sp) /* ltp */ - + stw %r25,-116(%sp) /* struct link map */ + + /* Large backwards copy for overlapping copies. + + Copy 16 bytes and then align srcend to 16-byte alignment. */ + +L(copy_long_backwards): + + ldr D_q, [srcend, -16] + + and tmp1, srcend, 15 + + bic srcend, srcend, 15 + + sub count, count, tmp1 + + ldp A_q, B_q, [srcend, -32] + + str D_q, [dstend, -16] + + ldp C_q, D_q, [srcend, -64] + + sub dstend, dstend, tmp1 + + subs count, count, 128 + + b.ls L(copy64_from_start) + - + /* Find reloc offset. */ - + bl _dl_fix_reloc_arg,%rp - + copy %r21,%r19 /* set func ltp */ + +L(loop64_backwards): + + str B_q, [dstend, -16] + + str A_q, [dstend, -32] + + ldp A_q, B_q, [srcend, -96] + + str D_q, [dstend, -48] + + str C_q, [dstend, -64]! + + ldp C_q, D_q, [srcend, -128] + + sub srcend, srcend, 64 + + subs count, count, 64 + + b.hi L(loop64_backwards) + - + /* Restore fixup ltp. */ - + ldw -92(%sp),%r21 /* ltp */ + + /* Write the last iteration and copy 64 bytes from the start. */ + +L(copy64_from_start): + + ldp E_q, F_q, [src, 32] + + stp A_q, B_q, [dstend, -32] + + ldp A_q, B_q, [src] + + stp C_q, D_q, [dstend, -64] + + stp E_q, F_q, [dstin, 32] + + stp A_q, B_q, [dstin] + +L(move0): + + ret + - + /* Set up args to fixup func, needs five arguments */ - + ldw -116(%sp),%r26 /* (1) struct link map */ - + copy %ret0,%r25 /* (2) reloc offset */ - + stw %r25,-120(%sp) /* Save reloc offset */ - + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */ - + ldo -56(%sp),%r23 /* (4) La_hppa_regs */ - + ldo -112(%sp), %r1 - + b 3b - + stw %r1, -52(%sp) /* (5) long int *framesizep */ - .EXIT - .PROCEND - cfi_endproc - diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h - index 8af0789a9c..4334ade2a0 100644 - --- a/sysdeps/i386/dl-machine.h - +++ b/sysdeps/i386/dl-machine.h - @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, - { - # ifndef RTLD_BOOTSTRAP - if (sym_map != map - - && sym_map->l_type != lt_executable - && !sym_map->l_relocated) - { - const char *strtab - = (const char *) D_PTR (map, l_info[DT_STRTAB]); - - _dl_error_printf ("\ - + if (sym_map->l_type == lt_executable) - + _dl_fatal_printf ("\ - +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ - +and creates an unsatisfiable circular dependency.\n", - + RTLD_PROGNAME, strtab + refsym->st_name, - + map->l_name); - + else - + _dl_error_printf ("\ - %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", - - RTLD_PROGNAME, map->l_name, - - sym_map->l_name, - - strtab + refsym->st_name); - + RTLD_PROGNAME, map->l_name, - + sym_map->l_name, - + strtab + refsym->st_name); - } - # endif - value = ((Elf32_Addr (*) (void)) value) (); - diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h - index b4bcd8fb6c..6094af8fec 100644 - --- a/sysdeps/i386/sysdep.h - +++ b/sysdeps/i386/sysdep.h - @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \ + +END (__memmove_simd) + +libc_hidden_builtin_def (__memmove_simd) + diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c + index ed5a47f6f8..46a4cb3a54 100644 + --- a/sysdeps/aarch64/multiarch/memmove.c + +++ b/sysdeps/aarch64/multiarch/memmove.c + @@ -29,6 +29,7 @@ + extern __typeof (__redirect_memmove) __libc_memmove; - # define SETUP_PIC_REG(reg) \ - .ifndef GET_PC_THUNK(reg); \ - - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \ - + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \ - .globl GET_PC_THUNK(reg); \ - .hidden GET_PC_THUNK(reg); \ - .p2align 4; \ - @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \ + extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; + +extern __typeof (__redirect_memmove) __memmove_simd attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; + @@ -40,7 +41,10 @@ libc_ifunc (__libc_memmove, + ? __memmove_falkor + : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) + ? __memmove_thunderx2 + - : __memmove_generic)))); + + : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr) + + || IS_NEOVERSE_V1 (midr) + + ? __memmove_simd + + : __memmove_generic))))); - # define SETUP_PIC_REG_STR(reg) \ - ".ifndef " GET_PC_THUNK_STR (reg) "\n" \ - - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \ - + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \ - + GET_PC_THUNK_STR (reg) ",comdat\n" \ - ".globl " GET_PC_THUNK_STR (reg) "\n" \ - ".hidden " GET_PC_THUNK_STR (reg) "\n" \ - ".p2align 4\n" \ - diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile - index 995e90d6da..6030adf7e7 100644 - --- a/sysdeps/ieee754/ldbl-96/Makefile - +++ b/sysdeps/ieee754/ldbl-96/Makefile - @@ -17,5 +17,8 @@ - # . + # undef memmove + strong_alias (__libc_memmove, memmove); + diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S + index 548130e413..a8ff52c072 100644 + --- a/sysdeps/aarch64/strcpy.S + +++ b/sysdeps/aarch64/strcpy.S + @@ -234,8 +234,13 @@ L(entry_no_page_cross): + #endif + /* calculate the loc value */ + cmeq datav.16b, datav.16b, #0 + +#ifdef __AARCH64EB__ + + mov data1, datav.d[1] + + mov data2, datav.d[0] + +#else + mov data1, datav.d[0] + mov data2, datav.d[1] + +#endif + cmp data1, 0 + csel data1, data1, data2, ne + mov pos, 8 + diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S + index 5981247dd9..086a5c7e99 100644 + --- a/sysdeps/aarch64/strnlen.S + +++ b/sysdeps/aarch64/strnlen.S + @@ -154,8 +154,13 @@ L(loop_end): + byte. */ - ifeq ($(subdir),math) - -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 - +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo - +ifeq ($(have-ssp),yes) - +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all - endif - +endif # $(subdir) == math - diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c - index 5f742321ae..bcdf20179f 100644 - --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c - +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c - @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y) - return 0; - } + cmeq datav.16b, datav.16b, #0 + +#ifdef __AARCH64EB__ + + mov data1, datav.d[1] + + mov data2, datav.d[0] + +#else + mov data1, datav.d[0] + mov data2, datav.d[1] + +#endif + cmp data1, 0 + csel data1, data1, data2, ne + sub len, src, srcin + diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h + index 604c489170..f1feb19dc7 100644 + --- a/sysdeps/aarch64/sysdep.h + +++ b/sysdeps/aarch64/sysdep.h + @@ -45,7 +45,7 @@ + #define ENTRY(name) \ + .globl C_SYMBOL_NAME(name); \ + .type C_SYMBOL_NAME(name),%function; \ + - .align 4; \ + + .p2align 6; \ + C_LABEL(name) \ + cfi_startproc; \ + CALL_MCOUNT + diff --git a/sysdeps/arm/armv7/multiarch/memcpy_impl.S b/sysdeps/arm/armv7/multiarch/memcpy_impl.S + index bf4ac7077f..379bb56fc9 100644 + --- a/sysdeps/arm/armv7/multiarch/memcpy_impl.S + +++ b/sysdeps/arm/armv7/multiarch/memcpy_impl.S + @@ -268,7 +268,7 @@ ENTRY(memcpy) - + if ((i0 & 0x80000000) == 0) - + { - + /* Pseudo-zero and unnormal representations are not valid - + representations of long double. We need to avoid stack - + corruption in __kernel_rem_pio2, which expects input in a - + particular normal form, but those representations do not need - + to be consistently handled like any particular floating-point - + value. */ - + y[1] = y[0] = __builtin_nanl (""); - + return 0; - + } - + - /* Split the 64 bits of the mantissa into three 24-bit integers - stored in a double array. */ - exp = j0 - 23; - diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c - new file mode 100644 - index 0000000000..f59b97769d - --- /dev/null - +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c - @@ -0,0 +1,41 @@ - +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487). - + Copyright (C) 2020 Free Software Foundation, Inc. - + This file is part of the GNU C Library. - + - + The GNU C Library is free software; you can redistribute it and/or - + modify it under the terms of the GNU Lesser General Public - + License as published by the Free Software Foundation; either - + version 2.1 of the License, or (at your option) any later version. - + - + The GNU C Library is distributed in the hope that it will be useful, - + but WITHOUT ANY WARRANTY; without even the implied warranty of - + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - + Lesser General Public License for more details. - + - + You should have received a copy of the GNU Lesser General Public - + License along with the GNU C Library; if not, see - + . */ - + - +#include - +#include - +#include - + - +static int - +do_test (void) - +{ - + for (int i = 0; i < 64; i++) - + { - + uint64_t sig = i == 63 ? 0 : 1ULL << i; - + long double ld; - + SET_LDOUBLE_WORDS (ld, 0x4141, - + sig >> 32, sig & 0xffffffffULL); - + /* The requirement is that no stack overflow occurs when the - + pseudo-zero or unnormal goes through range reduction. */ - + volatile long double ldr; - + ldr = sinl (ld); - + (void) ldr; - + } - + return 0; - +} - + - +#include - diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c - index e613e6a344..a03f478fc7 100644 - --- a/sysdeps/posix/system.c - +++ b/sysdeps/posix/system.c - @@ -101,7 +101,8 @@ cancel_handler (void *arg) - static int - do_system (const char *line) - { - - int status; - + int status = -1; - + int ret; - pid_t pid; - struct sigaction sa; - #ifndef _LIBC_REENTRANT - @@ -144,14 +145,14 @@ do_system (const char *line) - __posix_spawnattr_setflags (&spawn_attr, - POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK); + mov dst, dstin /* Preserve dstin, we need to return it. */ + cmp count, #64 + - bge .Lcpy_not_short + + bhs .Lcpy_not_short + /* Deal with small copies quickly by dropping straight into the + exit block. */ - - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, - - (char *const[]){ (char*) SHELL_NAME, - - (char*) "-c", - - (char *) line, NULL }, - - __environ); - + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, - + (char *const[]){ (char *) SHELL_NAME, - + (char *) "-c", - + (char *) line, NULL }, - + __environ); - __posix_spawnattr_destroy (&spawn_attr); + @@ -351,10 +351,10 @@ ENTRY(memcpy) - - if (status == 0) - + if (ret == 0) - { - /* Cancellation results in cleanup handlers running as exceptions in - the block where they were installed, so it is safe to reference - @@ -186,6 +187,9 @@ do_system (const char *line) - } - DO_UNLOCK (); + 1: + subs tmp2, count, #64 /* Use tmp2 for count. */ + - blt .Ltail63aligned + + blo .Ltail63aligned - + if (ret != 0) - + __set_errno (ret); - + - return status; - } + cmp tmp2, #512 + - bge .Lcpy_body_long + + bhs .Lcpy_body_long - diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h - index 2ba009e919..829eec266a 100644 - --- a/sysdeps/powerpc/powerpc32/sysdep.h - +++ b/sysdeps/powerpc/powerpc32/sysdep.h - @@ -179,8 +179,8 @@ GOT_LABEL: ; \ - #else - /* Position-dependent code does not require access to the GOT. */ - # define __GLRO(rOUT, rGOT, member, offset) \ - - lis rOUT,(member+LOWORD)@ha; \ - - lwz rOUT,(member+LOWORD)@l(rOUT) - + lis rOUT,(member)@ha; \ - + lwz rOUT,(member)@l(rOUT) - #endif /* PIC */ + .Lcpy_body_medium: /* Count in tmp2. */ + #ifdef USE_VFP + @@ -378,7 +378,7 @@ ENTRY(memcpy) + add src, src, #64 + vstr d1, [dst, #56] + add dst, dst, #64 + - bge 1b + + bhs 1b + tst tmp2, #0x3f + beq .Ldone - #endif /* __ASSEMBLER__ */ - diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c - index 8a53a1088f..362a2b713c 100644 - --- a/sysdeps/powerpc/powerpc64/backtrace.c - +++ b/sysdeps/powerpc/powerpc64/backtrace.c - @@ -54,11 +54,22 @@ struct signal_frame_64 { - /* We don't care about the rest, since the IP value is at 'uc' field. */ - }; + @@ -412,7 +412,7 @@ ENTRY(memcpy) + ldrd A_l, A_h, [src, #64]! + strd A_l, A_h, [dst, #64]! + subs tmp2, tmp2, #64 + - bge 1b + + bhs 1b + tst tmp2, #0x3f + bne 1f + ldr tmp2,[sp], #FRAME_SIZE + @@ -482,7 +482,7 @@ ENTRY(memcpy) + add src, src, #32 - +/* Test if the address match to the inside the trampoline code. - + Up to and including kernel 5.8, returning from an interrupt or syscall to a - + signal handler starts execution directly at the handler's entry point, with - + LR set to address of the sigreturn trampoline (the vDSO symbol). - + Newer kernels will branch to signal handler from the trampoline instead, so - + checking the stacktrace against the vDSO entrypoint does not work in such - + case. - + The vDSO branches with a 'bctrl' instruction, so checking either the - + vDSO address itself and the next instruction should cover all kernel - + versions. */ - static inline bool - is_sigtramp_address (void *nip) - { - #ifdef HAVE_SIGTRAMP_RT64 - - if (nip == GLRO (dl_vdso_sigtramp_rt64)) - + if (nip == GLRO (dl_vdso_sigtramp_rt64) || - + nip == GLRO (dl_vdso_sigtramp_rt64) + 4) - return true; + subs tmp2, tmp2, #prefetch_lines * 64 * 2 + - blt 2f + + blo 2f + 1: + cpy_line_vfp d3, 0 + cpy_line_vfp d4, 64 + @@ -494,7 +494,7 @@ ENTRY(memcpy) + add dst, dst, #2 * 64 + add src, src, #2 * 64 + subs tmp2, tmp2, #prefetch_lines * 64 + - bge 1b + + bhs 1b + + 2: + cpy_tail_vfp d3, 0 + @@ -615,8 +615,8 @@ ENTRY(memcpy) + 1: + pld [src, #(3 * 64)] + subs count, count, #64 + - ldrmi tmp2, [sp], #FRAME_SIZE + - bmi .Ltail63unaligned + + ldrlo tmp2, [sp], #FRAME_SIZE + + blo .Ltail63unaligned + pld [src, #(4 * 64)] + + #ifdef USE_NEON + @@ -633,7 +633,7 @@ ENTRY(memcpy) + neon_load_multi d0-d3, src + neon_load_multi d4-d7, src + subs count, count, #64 + - bmi 2f + + blo 2f + 1: + pld [src, #(4 * 64)] + neon_store_multi d0-d3, dst + @@ -641,7 +641,7 @@ ENTRY(memcpy) + neon_store_multi d4-d7, dst + neon_load_multi d4-d7, src + subs count, count, #64 + - bpl 1b + + bhs 1b + 2: + neon_store_multi d0-d3, dst + neon_store_multi d4-d7, dst + diff --git a/sysdeps/arm/be/nofpu/Implies b/sysdeps/arm/be/nofpu/Implies + new file mode 100644 + index 0000000000..c90dd7fd5c + --- /dev/null + +++ b/sysdeps/arm/be/nofpu/Implies + @@ -0,0 +1 @@ + +arm/nofpu + diff --git a/sysdeps/arm/le/nofpu/Implies b/sysdeps/arm/le/nofpu/Implies + new file mode 100644 + index 0000000000..c90dd7fd5c + --- /dev/null + +++ b/sysdeps/arm/le/nofpu/Implies + @@ -0,0 +1 @@ + +arm/nofpu + diff --git a/sysdeps/arm/memcpy.S b/sysdeps/arm/memcpy.S + index 510e8adaf2..bcfbc51d99 100644 + --- a/sysdeps/arm/memcpy.S + +++ b/sysdeps/arm/memcpy.S + @@ -68,7 +68,7 @@ ENTRY(memcpy) + cfi_remember_state + + subs r2, r2, #4 + - blt 8f + + blo 8f + ands ip, r0, #3 + PLD( pld [r1, #0] ) + bne 9f + @@ -82,7 +82,7 @@ ENTRY(memcpy) + cfi_rel_offset (r6, 4) + cfi_rel_offset (r7, 8) + cfi_rel_offset (r8, 12) + - blt 5f + + blo 5f + + CALGN( ands ip, r1, #31 ) + CALGN( rsb r3, ip, #32 ) + @@ -98,9 +98,9 @@ ENTRY(memcpy) #endif - return false; - diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure - index fa46e9e351..e7f576338d 100644 - --- a/sysdeps/s390/configure - +++ b/sysdeps/s390/configure - @@ -123,7 +123,9 @@ void testinsn (char *buf) - __asm__ (".machine \"arch13\" \n\t" - ".machinemode \"zarch_nohighgprs\" \n\t" - "lghi %%r0,16 \n\t" - - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); - + "mvcrl 0(%0),32(%0) \n\t" - + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" - + : : "a" (buf) : "memory", "r0"); - } - EOF - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c - @@ -271,7 +273,9 @@ else - void testinsn (char *buf) - { - __asm__ ("lghi %%r0,16 \n\t" - - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); - + "mvcrl 0(%0),32(%0) \n\t" - + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" - + : : "a" (buf) : "memory", "r0"); - } - EOF - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c - diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac - index 3ed5a8ef87..5c3479e8cf 100644 - --- a/sysdeps/s390/configure.ac - +++ b/sysdeps/s390/configure.ac - @@ -88,7 +88,9 @@ void testinsn (char *buf) - __asm__ (".machine \"arch13\" \n\t" - ".machinemode \"zarch_nohighgprs\" \n\t" - "lghi %%r0,16 \n\t" - - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); - + "mvcrl 0(%0),32(%0) \n\t" - + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" - + : : "a" (buf) : "memory", "r0"); - } - EOF - dnl test, if assembler supports S390 arch13 instructions - @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF - void testinsn (char *buf) - { - __asm__ ("lghi %%r0,16 \n\t" - - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); - + "mvcrl 0(%0),32(%0) \n\t" - + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" - + : : "a" (buf) : "memory", "r0"); - } - EOF - dnl test, if assembler supports S390 arch13 zarch instructions as default - diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c - index 5fc85e129f..ee59b5de14 100644 - --- a/sysdeps/s390/memmove.c - +++ b/sysdeps/s390/memmove.c - @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; - s390_libc_ifunc_expr (__redirect_memmove, memmove, - ({ - s390_libc_ifunc_expr_stfle_init (); - - (HAVE_MEMMOVE_ARCH13 - + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) - && S390_IS_ARCH13_MIE3 (stfle_bits)) - ? MEMMOVE_ARCH13 - : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) - diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c - index e6195c6e26..17c0cc3952 100644 - --- a/sysdeps/s390/multiarch/ifunc-impl-list.c - +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c - @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, - IFUNC_IMPL (i, name, memmove, - # if HAVE_MEMMOVE_ARCH13 - IFUNC_IMPL_ADD (array, i, memmove, - - S390_IS_ARCH13_MIE3 (stfle_bits), - + ((dl_hwcap & HWCAP_S390_VXRS_EXT2) - + && S390_IS_ARCH13_MIE3 (stfle_bits)), - MEMMOVE_ARCH13) - # endif - # if HAVE_MEMMOVE_Z13 - diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies - new file mode 100644 - index 0000000000..71b28ee1a4 - --- /dev/null - +++ b/sysdeps/sh/be/sh4/fpu/Implies - @@ -0,0 +1 @@ - +sh/sh4/fpu - diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies - new file mode 100644 - index 0000000000..71b28ee1a4 - --- /dev/null - +++ b/sysdeps/sh/le/sh4/fpu/Implies - @@ -0,0 +1 @@ - +sh/sh4/fpu - diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh - index c07626677f..4f6c3490a2 100644 - --- a/sysdeps/unix/make-syscalls.sh - +++ b/sysdeps/unix/make-syscalls.sh - @@ -30,6 +30,7 @@ - # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction) - # s: non-NULL string (e.g., 1st arg to open) - # S: optionally-NULL string (e.g., 1st arg to acct) - +# U: unsigned long int (32-bit types are zero-extended to 64-bit types) - # v: vararg scalar (e.g., optional 3rd arg to open) - # V: byte-per-page vector (3rd arg to mincore) - # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4) - @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do - ?:?????????) nargs=9;; - esac - + # Derive the unsigned long int arguments from the argument signature - + ulong_arg_1=0 - + ulong_arg_2=0 - + ulong_count=0 - + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U) - + do - + ulong_count=$(expr $ulong_count + 1) - + ulong_arg=$(echo $U | sed -e "s/:U//") - + case $ulong_count in - + 1) - + ulong_arg_1=$ulong_arg - + ;; - + 2) - + ulong_arg_2=$ulong_arg - + ;; - + *) - + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)" - + exit 2 - + esac - + done - + - # Make sure only the first syscall rule is used, if multiple dirs - # define the same syscall. - echo '' - @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do - \$(make-target-directory) - (echo '#define SYSCALL_NAME $syscall'; \\ - echo '#define SYSCALL_NARGS $nargs'; \\ - + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\ - + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\ - echo '#define SYSCALL_SYMBOL $strong'; \\ - echo '#define SYSCALL_NOERRNO $noerrno'; \\ - echo '#define SYSCALL_ERRVAL $errval'; \\ - diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S - index cf6c7a58fb..f807a8603f 100644 - --- a/sysdeps/unix/syscall-template.S - +++ b/sysdeps/unix/syscall-template.S - @@ -25,6 +25,12 @@ - defining a few macros: - SYSCALL_NAME syscall name - SYSCALL_NARGS number of arguments this call takes - + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this - + call takes. 0 means that there are no - + unsigned long int arguments. - + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this - + call takes. 0 means that there is at most - + one unsigned long int argument. - SYSCALL_SYMBOL primary symbol name - SYSCALL_NOERRNO 1 to define a no-errno version (see below) - SYSCALL_ERRVAL 1 to define an error-value version (see below) - @@ -44,9 +50,31 @@ - /* This indirection is needed so that SYMBOL gets macro-expanded. */ - #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL) + PLD( pld [r1, #0] ) + -2: PLD( subs r2, r2, #96 ) + +2: PLD( cmp r2, #96 ) + PLD( pld [r1, #28] ) + - PLD( blt 4f ) + + PLD( blo 4f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) - -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N) - -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N) - -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N) - +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros - + have 2 extra arguments for unsigned long int arguments: - + Extra argument 1: Position of the first unsigned long int argument. - + Extra argument 2: Position of the second unsigned long int argument. - + */ - +#ifndef PSEUDOS_HAVE_ULONG_INDICES - +# undef SYSCALL_ULONG_ARG_1 - +# define SYSCALL_ULONG_ARG_1 0 - +#endif - + - +#if SYSCALL_ULONG_ARG_1 - +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \ - + PSEUDO (SYMBOL, NAME, N, U1, U2) - +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \ - + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2) - +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \ - + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2) - +#else - +# define T_PSEUDO(SYMBOL, NAME, N) \ - + PSEUDO (SYMBOL, NAME, N) - +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \ - + PSEUDO_NOERRNO (SYMBOL, NAME, N) - +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \ - + PSEUDO_ERRVAL (SYMBOL, NAME, N) - +#endif - #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL) - #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL) - #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL) - @@ -56,7 +84,12 @@ - /* This kind of system call stub never returns an error. - We return the return value register to the caller unexamined. */ + @@ -108,9 +108,7 @@ ENTRY(memcpy) + 4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + - bge 3b + - PLD( cmn r2, #96 ) + - PLD( bge 4b ) + + bhs 3b - +# if SYSCALL_ULONG_ARG_1 - +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, - + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) - +# else - T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) - +# endif - ret_NOERRNO - T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) + 5: ands ip, r2, #28 + rsb ip, ip, #32 + @@ -222,7 +220,7 @@ ENTRY(memcpy) + strbge r4, [r0], #1 + subs r2, r2, ip + strb lr, [r0], #1 + - blt 8b + + blo 8b + ands ip, r1, #3 + beq 1b - @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) - value, or zero for success. We may massage the kernel's return value - to meet that ABI, but we never set errno here. */ + @@ -236,7 +234,7 @@ ENTRY(memcpy) + .macro forward_copy_shift pull push - +# if SYSCALL_ULONG_ARG_1 - +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, - + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) - +# else - T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) - +# endif - ret_ERRVAL - T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) + subs r2, r2, #28 + - blt 14f + + blo 14f - @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) - /* This is a "normal" system call stub: if there is an error, - it returns -1 and sets errno. */ + CALGN( ands ip, r1, #31 ) + CALGN( rsb ip, ip, #32 ) + @@ -253,9 +251,9 @@ ENTRY(memcpy) + cfi_rel_offset (r10, 16) - +# if SYSCALL_ULONG_ARG_1 - +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, - + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) - +# else - T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) - +# endif - ret - T_PSEUDO_END (SYSCALL_SYMBOL) + PLD( pld [r1, #0] ) + - PLD( subs r2, r2, #96 ) + + PLD( cmp r2, #96 ) + PLD( pld [r1, #28] ) + - PLD( blt 13f ) + + PLD( blo 13f ) + PLD( pld [r1, #60] ) + PLD( pld [r1, #92] ) - diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list - index e28e801c7a..6b22b2cb45 100644 - --- a/sysdeps/unix/syscalls.list - +++ b/sysdeps/unix/syscalls.list - @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill - link - link i:ss __link link - listen - listen i:ii __listen listen - lseek - lseek i:iii __libc_lseek __lseek lseek - -madvise - madvise i:pii __madvise madvise - +madvise - madvise i:pUi __madvise madvise - mkdir - mkdir i:si __mkdir mkdir - -mmap - mmap b:aniiii __mmap mmap - -mprotect - mprotect i:aii __mprotect mprotect - -munmap - munmap i:ai __munmap munmap - +mmap - mmap b:aUiiii __mmap mmap - +mprotect - mprotect i:aUi __mprotect mprotect - +munmap - munmap i:aU __munmap munmap - open - open Ci:siv __libc_open __open open - profil - profil i:piii __profil profil - ptrace - ptrace i:iiii ptrace - -read - read Ci:ibn __libc_read __read read - -readlink - readlink i:spi __readlink readlink - +read - read Ci:ibU __libc_read __read read - +readlink - readlink i:spU __readlink readlink - readv - readv Ci:ipi __readv readv - reboot - reboot i:i reboot - -recv - recv Ci:ibni __libc_recv recv - -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom - +recv - recv Ci:ibUi __libc_recv recv - +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom - recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg - rename - rename i:ss rename - rmdir - rmdir i:s __rmdir rmdir - select - select Ci:iPPPP __select __libc_select select - -send - send Ci:ibni __libc_send __send send - +send - send Ci:ibUi __libc_send __send send - sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg - -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto - +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto - setdomain - setdomainname i:si setdomainname - setegid - setegid i:i __setegid setegid - seteuid - seteuid i:i __seteuid seteuid - @@ -94,5 +94,5 @@ uname - uname i:p __uname uname - unlink - unlink i:s __unlink unlink - utimes - utimes i:sp __utimes utimes - vhangup - vhangup i:i vhangup - -write - write Ci:ibn __libc_write __write write - +write - write Ci:ibU __libc_write __write write - writev - writev Ci:ipi __writev writev - diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile - index f12b7b1a2d..5fbde369c3 100644 - --- a/sysdeps/unix/sysv/linux/Makefile - +++ b/sysdeps/unix/sysv/linux/Makefile - @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \ - setfsuid setfsgid epoll_pwait signalfd \ - eventfd eventfd_read eventfd_write prlimit \ - personality epoll_wait tee vmsplice splice \ - - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get - + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \ - + prctl \ - + process_vm_readv process_vm_writev + @@ -280,9 +278,7 @@ ENTRY(memcpy) + mov ip, ip, PULL #\pull + orr ip, ip, lr, PUSH #\push + stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} + - bge 12b + - PLD( cmn r2, #96 ) + - PLD( bge 13b ) + + bhs 12b - CFLAGS-gethostid.c = -fexceptions - CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables - diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h - index 9378387747..c8471947b9 100644 - --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h - +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h - @@ -17,6 +17,7 @@ - #define __NR_clock_nanosleep 115 - #define __NR_clock_settime 112 - #define __NR_clone 220 - +#define __NR_clone3 435 - #define __NR_close 57 - #define __NR_connect 203 - #define __NR_copy_file_range 285 - diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h - index 1389cea1b3..346d045fb4 100644 - --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h - +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h - @@ -51,8 +51,12 @@ + pop {r5 - r8, r10} + cfi_adjust_cfa_offset (-20) + diff --git a/sysdeps/arm/memmove.S b/sysdeps/arm/memmove.S + index 954037ef3a..0d07b76ee6 100644 + --- a/sysdeps/arm/memmove.S + +++ b/sysdeps/arm/memmove.S + @@ -85,7 +85,7 @@ ENTRY(memmove) + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + - blt 8f + + blo 8f + ands ip, r0, #3 + PLD( pld [r1, #-4] ) + bne 9f + @@ -99,7 +99,7 @@ ENTRY(memmove) + cfi_rel_offset (r6, 4) + cfi_rel_offset (r7, 8) + cfi_rel_offset (r8, 12) + - blt 5f + + blo 5f - #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ - && MIDR_PARTNUM(midr) == 0x000) - -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - - && MIDR_PARTNUM(midr) == 0xd0c) - +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - + && MIDR_PARTNUM(midr) == 0xd0c) - +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - + && MIDR_PARTNUM(midr) == 0xd49) - +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ - + && MIDR_PARTNUM(midr) == 0xd40) + CALGN( ands ip, r1, #31 ) + CALGN( sbcsne r4, ip, r2 ) @ C is always set here + @@ -114,9 +114,9 @@ ENTRY(memmove) + #endif - #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ - && MIDR_PARTNUM(midr) == 0x000) - diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data - index a60053b914..08af68b5e8 100644 - --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data - +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data - @@ -7,6 +7,9 @@ libc.so: malloc - libc.so: memalign - libc.so: realloc - libm.so: matherr - +# If outline atomics are used, libgcc (built outside of glibc) may - +# call __getauxval using the PLT. - +libc.so: __getauxval ? - # The dynamic loader needs __tls_get_addr for TLS. - ld.so: __tls_get_addr - # The main malloc is interposed into the dynamic linker, for - diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h - index 9d8ffbe860..bf61b66b70 100644 - --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h - +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h - @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t; - typedef intmax_t atomic_max_t; - typedef uintmax_t uatomic_max_t; + PLD( pld [r1, #-4] ) + -2: PLD( subs r2, r2, #96 ) + +2: PLD( cmp r2, #96 ) + PLD( pld [r1, #-32] ) + - PLD( blt 4f ) + + PLD( blo 4f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) - +#define atomic_full_barrier() __sync_synchronize () - + - #define __HAVE_64B_ATOMICS 0 - #define USE_ATOMIC_COMPILER_BUILTINS 0 + @@ -124,9 +124,7 @@ ENTRY(memmove) + 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} + subs r2, r2, #32 + stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} + - bge 3b + - PLD( cmn r2, #96 ) + - PLD( bge 4b ) + + bhs 3b - +/* We use the compiler atomic load and store builtins as the generic - + defines are not atomic. In particular, we need to use compare and - + exchange for stores as the implementation is synthesized. */ - +void __atomic_link_error (void); - +#define __atomic_check_size_ls(mem) \ - + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ - + __atomic_link_error (); - + - +#define atomic_load_relaxed(mem) \ - + ({ __atomic_check_size_ls((mem)); \ - + __atomic_load_n ((mem), __ATOMIC_RELAXED); }) - +#define atomic_load_acquire(mem) \ - + ({ __atomic_check_size_ls((mem)); \ - + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) - + - +#define atomic_store_relaxed(mem, val) \ - + do { \ - + __atomic_check_size_ls((mem)); \ - + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ - + } while (0) - +#define atomic_store_release(mem, val) \ - + do { \ - + __atomic_check_size_ls((mem)); \ - + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ - + } while (0) - + - /* XXX Is this actually correct? */ - #define ATOMIC_EXCHANGE_USES_CAS 1 + 5: ands ip, r2, #28 + rsb ip, ip, #32 + @@ -237,7 +235,7 @@ ENTRY(memmove) + strbge r4, [r0, #-1]! + subs r2, r2, ip + strb lr, [r0, #-1]! + - blt 8b + + blo 8b + ands ip, r1, #3 + beq 1b - diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h - index ed873d9dd4..796663a23a 100644 - --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h - +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h - @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \ + @@ -251,7 +249,7 @@ ENTRY(memmove) + .macro backward_copy_shift push pull - # define inline_syscall0(name,dummy) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r12) \ - @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \ + subs r2, r2, #28 + - blt 14f + + blo 14f - # define inline_syscall1(name,arg1) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - + long int __arg1 = (long int) (arg1); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r12) \ - @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \ + CALGN( ands ip, r1, #31 ) + CALGN( rsb ip, ip, #32 ) + @@ -268,9 +266,9 @@ ENTRY(memmove) + cfi_rel_offset (r10, 16) - # define inline_syscall2(name,arg1,arg2) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - - register long __r6 __asm__("r6") = (long)(arg2); \ - + long int __arg1 = (long int) (arg1); \ - + long int __arg2 = (long int) (arg2); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - + register long int __r6 __asm__("r6") = __arg2; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r6), "r"(__r12) \ - @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \ + PLD( pld [r1, #-4] ) + - PLD( subs r2, r2, #96 ) + + PLD( cmp r2, #96 ) + PLD( pld [r1, #-32] ) + - PLD( blt 13f ) + + PLD( blo 13f ) + PLD( pld [r1, #-64] ) + PLD( pld [r1, #-96] ) - # define inline_syscall3(name,arg1,arg2,arg3) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - - register long __r6 __asm__("r6") = (long)(arg2); \ - - register long __r7 __asm__("r7") = (long)(arg3); \ - + long int __arg1 = (long int) (arg1); \ - + long int __arg2 = (long int) (arg2); \ - + long int __arg3 = (long int) (arg3); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - + register long int __r6 __asm__("r6") = __arg2; \ - + register long int __r7 __asm__("r7") = __arg3; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \ - @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \ - - # define inline_syscall4(name,arg1,arg2,arg3,arg4) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - - register long __r6 __asm__("r6") = (long)(arg2); \ - - register long __r7 __asm__("r7") = (long)(arg3); \ - - register long __r8 __asm__("r8") = (long)(arg4); \ - + long int __arg1 = (long int) (arg1); \ - + long int __arg2 = (long int) (arg2); \ - + long int __arg3 = (long int) (arg3); \ - + long int __arg4 = (long int) (arg4); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - + register long int __r6 __asm__("r6") = __arg2; \ - + register long int __r7 __asm__("r7") = __arg3; \ - + register long int __r8 __asm__("r8") = __arg4; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \ - @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \ - - # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - - register long __r6 __asm__("r6") = (long)(arg2); \ - - register long __r7 __asm__("r7") = (long)(arg3); \ - - register long __r8 __asm__("r8") = (long)(arg4); \ - - register long __r9 __asm__("r9") = (long)(arg5); \ - + long int __arg1 = (long int) (arg1); \ - + long int __arg2 = (long int) (arg2); \ - + long int __arg3 = (long int) (arg3); \ - + long int __arg4 = (long int) (arg4); \ - + long int __arg5 = (long int) (arg5); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - + register long int __r6 __asm__("r6") = __arg2; \ - + register long int __r7 __asm__("r7") = __arg3; \ - + register long int __r8 __asm__("r8") = __arg4; \ - + register long int __r9 __asm__("r9") = __arg5; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \ - @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \ - - # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \ - ({ \ - - register long __ret __asm__("r3"); \ - - register long __r12 __asm__("r12") = name; \ - - register long __r5 __asm__("r5") = (long)(arg1); \ - - register long __r6 __asm__("r6") = (long)(arg2); \ - - register long __r7 __asm__("r7") = (long)(arg3); \ - - register long __r8 __asm__("r8") = (long)(arg4); \ - - register long __r9 __asm__("r9") = (long)(arg5); \ - - register long __r10 __asm__("r10") = (long)(arg6); \ - + long int __arg1 = (long int) (arg1); \ - + long int __arg2 = (long int) (arg2); \ - + long int __arg3 = (long int) (arg3); \ - + long int __arg4 = (long int) (arg4); \ - + long int __arg5 = (long int) (arg5); \ - + long int __arg6 = (long int) (arg6); \ - + register long int __ret __asm__("r3"); \ - + register long int __r12 __asm__("r12") = name; \ - + register long int __r5 __asm__("r5") = __arg1; \ - + register long int __r6 __asm__("r6") = __arg2; \ - + register long int __r7 __asm__("r7") = __arg3; \ - + register long int __r8 __asm__("r8") = __arg4; \ - + register long int __r9 __asm__("r9") = __arg5; \ - + register long int __r10 __asm__("r10") = __arg6; \ - __asm__ __volatile__( "brki r14,8; nop;" \ - : "=r"(__ret) \ - : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \ - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S - index b2bbf10181..ff445a5406 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S - @@ -22,9 +22,9 @@ - .text - .set nomips16 + @@ -295,9 +293,7 @@ ENTRY(memmove) + mov r4, r4, PUSH #\push + orr r4, r4, r3, PULL #\pull + stmdb r0!, {r4 - r8, r10, ip, lr} + - bge 12b + - PLD( cmn r2, #96 ) + - PLD( bge 13b ) + + bhs 12b - -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4, - - long arg5, - - long number) */ - +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3, - + long int arg4, long int arg5, - + long int number) */ + pop {r5 - r8, r10} + cfi_adjust_cfa_offset (-20) + diff --git a/sysdeps/generic/unwind-arch.h b/sysdeps/generic/unwind-arch.h + new file mode 100644 + index 0000000000..d712e5e11d + --- /dev/null + +++ b/sysdeps/generic/unwind-arch.h + @@ -0,0 +1,30 @@ + +/* Return backtrace of current program state. Arch-specific bits. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#ifndef _UNWIND_ARCH_H + +#define _UNWIND_ARCH_H + + + +#include + + + +static inline void * + +unwind_arch_adjustment (void *prev, void *addr) + +{ + + return addr; + +} + + + +#endif + diff --git a/sysdeps/hppa/dl-fptr.c b/sysdeps/hppa/dl-fptr.c + index 0a37397284..25ca8f8463 100644 + --- a/sysdeps/hppa/dl-fptr.c + +++ b/sysdeps/hppa/dl-fptr.c + @@ -172,8 +172,8 @@ make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp) + } - ENTRY(__mips_syscall5) - lw v0, 20(sp) - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S - index 572d7c1137..2b4a3117d1 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S - @@ -22,9 +22,9 @@ - .text - .set nomips16 + install: + - fdesc->ip = ip; + fdesc->gp = gp; + + fdesc->ip = ip; - -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4, - - long arg5, long arg6, - - long number) */ - +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3, - + long int arg4, long int arg5, long int arg6, - + long int number) */ + return (ElfW(Addr)) fdesc; + } + @@ -350,7 +350,9 @@ ElfW(Addr) + _dl_lookup_address (const void *address) + { + ElfW(Addr) addr = (ElfW(Addr)) address; + - unsigned int *desc, *gptr; + + ElfW(Word) reloc_arg; + + volatile unsigned int *desc; + + unsigned int *gptr; - ENTRY(__mips_syscall6) - lw v0, 24(sp) - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S - index 05164cb253..2723bbb138 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S - @@ -22,9 +22,10 @@ - .text - .set nomips16 + /* Return ADDR if the least-significant two bits of ADDR are not consistent + with ADDR being a linker defined function pointer. The normal value for + @@ -367,7 +369,11 @@ _dl_lookup_address (const void *address) + if (!_dl_read_access_allowed (desc)) + return addr; - -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4, - - long arg5, long arg6, long arg7, - - long number) */ - +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3, - + long int arg4, long int arg5, long int arg6, - + long int arg7, - + long int number) */ + - /* Load first word of candidate descriptor. It should be a pointer + + /* First load the relocation offset. */ + + reloc_arg = (ElfW(Word)) desc[1]; + + atomic_full_barrier(); + + + + /* Then load first word of candidate descriptor. It should be a pointer + with word alignment and point to memory that can be read. */ + gptr = (unsigned int *) desc[0]; + if (((unsigned int) gptr & 3) != 0 + @@ -377,8 +383,8 @@ _dl_lookup_address (const void *address) + /* See if descriptor requires resolution. The following trampoline is + used in each global offset table for function resolution: - ENTRY(__mips_syscall7) - lw v0, 28(sp) - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h - index 9bf551ace8..f23ede0259 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h - @@ -19,51 +19,57 @@ - #ifndef MIPS16_SYSCALL_H - #define MIPS16_SYSCALL_H 1 + - ldw 0(r20),r22 + - bv r0(r22) + + ldw 0(r20),r21 + + bv r0(r21) + ldw 4(r20),r21 + tramp: b,l .-12,r20 + depwi 0,31,2,r20 + @@ -389,7 +395,15 @@ _dl_lookup_address (const void *address) + if (gptr[0] == 0xea9f1fdd /* b,l .-12,r20 */ + && gptr[1] == 0xd6801c1e /* depwi 0,31,2,r20 */ + && (ElfW(Addr)) gptr[2] == elf_machine_resolve ()) + - _dl_fixup ((struct link_map *) gptr[5], (ElfW(Word)) desc[1]); + + { + + struct link_map *l = (struct link_map *) gptr[5]; + + + + /* If gp has been resolved, we need to hunt for relocation offset. */ + + if (!(reloc_arg & PA_GP_RELOC)) + + reloc_arg = _dl_fix_reloc_arg (addr, l); + + + + _dl_fixup (l, reloc_arg); + + } - -long long __nomips16 __mips16_syscall0 (long number); - +long long int __nomips16 __mips16_syscall0 (long int number); - #define __mips16_syscall0(dummy, number) \ - - __mips16_syscall0 ((long) (number)) - + __mips16_syscall0 ((long int) (number)) + return (ElfW(Addr)) desc[0]; + } + diff --git a/sysdeps/hppa/dl-machine.h b/sysdeps/hppa/dl-machine.h + index 9e98366ea3..8ecff97706 100644 + --- a/sysdeps/hppa/dl-machine.h + +++ b/sysdeps/hppa/dl-machine.h + @@ -48,6 +48,14 @@ + #define GOT_FROM_PLT_STUB (4*4) + #define PLT_ENTRY_SIZE (2*4) - -long long __nomips16 __mips16_syscall1 (long a0, - - long number); - +long long int __nomips16 __mips16_syscall1 (long int a0, - + long int number); - #define __mips16_syscall1(a0, number) \ - - __mips16_syscall1 ((long) (a0), \ - - (long) (number)) - + __mips16_syscall1 ((long int) (a0), \ - + (long int) (number)) - - -long long __nomips16 __mips16_syscall2 (long a0, long a1, - - long number); - +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1, - + long int number); - #define __mips16_syscall2(a0, a1, number) \ - - __mips16_syscall2 ((long) (a0), (long) (a1), \ - - (long) (number)) - + __mips16_syscall2 ((long int) (a0), (long int) (a1), \ - + (long int) (number)) - - -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2, - - long number); - +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1, - + long int a2, - + long int number); - #define __mips16_syscall3(a0, a1, a2, number) \ - - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \ - - (long) (number)) - + __mips16_syscall3 ((long int) (a0), (long int) (a1), \ - + (long int) (a2), \ - + (long int) (number)) - - -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3, - - long number); - +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1, - + long int a2, long int a3, - + long int number); - #define __mips16_syscall4(a0, a1, a2, a3, number) \ - - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \ - - (long) (a3), \ - - (long) (number)) - + __mips16_syscall4 ((long int) (a0), (long int) (a1), \ - + (long int) (a2), (long int) (a3), \ - + (long int) (number)) + +/* The gp slot in the function descriptor contains the relocation offset + + before resolution. To distinguish between a resolved gp value and an + + unresolved relocation offset we set an unused bit in the relocation + + offset. This would allow us to do a synchronzied two word update + + using this bit (interlocked update), but instead of waiting for the + + update we simply recompute the gp value given that we know the ip. */ + +#define PA_GP_RELOC 1 + + + /* Initialize the function descriptor table before relocations */ + static inline void + __hppa_init_bootstrap_fdesc_table (struct link_map *map) + @@ -117,10 +125,28 @@ elf_machine_fixup_plt (struct link_map *map, lookup_t t, + volatile Elf32_Addr *rfdesc = reloc_addr; + /* map is the link_map for the caller, t is the link_map for the object + being called */ + - rfdesc[1] = value.gp; + - /* Need to ensure that the gp is visible before the code + - entry point is updated */ + - rfdesc[0] = value.ip; + + + + /* We would like the function descriptor to be double word aligned. This + + helps performance (ip and gp then reside on the same cache line) and + + we can update the pair atomically with a single store. The linker + + now ensures this alignment but we still have to handle old code. */ + + if ((unsigned int)reloc_addr & 7) + + { + + /* Need to ensure that the gp is visible before the code + + entry point is updated */ + + rfdesc[1] = value.gp; + + atomic_full_barrier(); + + rfdesc[0] = value.ip; + + } + + else + + { + + /* Update pair atomically with floating point store. */ + + union { ElfW(Word) v[2]; double d; } u; + + + + u.v[0] = value.ip; + + u.v[1] = value.gp; + + *(volatile double *)rfdesc = u.d; + + } + return value; + } - /* The remaining ones use regular MIPS wrappers. */ + @@ -265,7 +291,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) + here. The trampoline code will load the proper + LTP and pass the reloc offset to the fixup + function. */ + - fptr->gp = iplt - jmprel; + + fptr->gp = (iplt - jmprel) | PA_GP_RELOC; + } /* r_sym != 0 */ + else + { + diff --git a/sysdeps/hppa/dl-runtime.c b/sysdeps/hppa/dl-runtime.c + new file mode 100644 + index 0000000000..885a3f1837 + --- /dev/null + +++ b/sysdeps/hppa/dl-runtime.c + @@ -0,0 +1,58 @@ + +/* On-demand PLT fixup for shared objects. HPPA version. + + Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, write to the Free + + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + + 02111-1307 USA. */ + + + +/* Clear PA_GP_RELOC bit in relocation offset. */ + +#define reloc_offset (reloc_arg & ~PA_GP_RELOC) + +#define reloc_index (reloc_arg & ~PA_GP_RELOC) / sizeof (PLTREL) + + + +#include + + + +/* The caller has encountered a partially relocated function descriptor. + + The gp of the descriptor has been updated, but not the ip. We find + + the function descriptor again and compute the relocation offset and + + return that to the caller. The caller will continue on to call + + _dl_fixup with the relocation offset. */ + + + +ElfW(Word) + +attribute_hidden __attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE + +_dl_fix_reloc_arg (struct fdesc *fptr, struct link_map *l) + +{ + + Elf32_Addr l_addr, iplt, jmprel, end_jmprel, r_type; + + const Elf32_Rela *reloc; + + + + l_addr = l->l_addr; + + jmprel = D_PTR(l, l_info[DT_JMPREL]); + + end_jmprel = jmprel + l->l_info[DT_PLTRELSZ]->d_un.d_val; + + + + /* Look for the entry... */ + + for (iplt = jmprel; iplt < end_jmprel; iplt += sizeof (Elf32_Rela)) + + { + + reloc = (const Elf32_Rela *) iplt; + + r_type = ELF32_R_TYPE (reloc->r_info); + + + + if (__builtin_expect (r_type == R_PARISC_IPLT, 1) + + && fptr == (struct fdesc *) (reloc->r_offset + l_addr)) + + /* Found entry. Return the reloc offset. */ + + return iplt - jmprel; + + } + + + + /* Crash if we weren't passed a valid function pointer. */ + + ABORT_INSTRUCTION; + + return 0; + +} + diff --git a/sysdeps/hppa/dl-trampoline.S b/sysdeps/hppa/dl-trampoline.S + index 0114ca8b19..d0804b30c0 100644 + --- a/sysdeps/hppa/dl-trampoline.S + +++ b/sysdeps/hppa/dl-trampoline.S + @@ -31,7 +31,7 @@ + slow down __cffc when it attempts to call fixup to resolve function + descriptor references. Please refer to gcc/gcc/config/pa/fptr.c - #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \ - - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \ - - (long) (a3), (long) (a4), \ - - (long) (number)) - + __mips_syscall5 ((long int) (a0), (long int) (a1), \ - + (long int) (a2), (long int) (a3), \ - + (long int) (a4), \ - + (long int) (number)) + - Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp. */ + + Enter with r19 = reloc offset, r20 = got-8, r21 = fixup ltp, r22 = fp. */ - #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \ - - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \ - - (long) (a3), (long) (a4), (long) (a5), \ - - (long) (number)) - + __mips_syscall6 ((long int) (a0), (long int) (a1), \ - + (long int) (a2), (long int) (a3), \ - + (long int) (a4), (long int) (a5), \ - + (long int) (number)) + /* RELOCATION MARKER: bl to provide gcc's __cffc with fixup loc. */ + .text + @@ -61,17 +61,20 @@ _dl_runtime_resolve: + copy %sp, %r1 /* Copy previous sp */ + /* Save function result address (on entry) */ + stwm %r28,128(%sp) + - /* Fillin some frame info to follow ABI */ + + /* Fill in some frame info to follow ABI */ + stw %r1,-4(%sp) /* Previous sp */ + stw %r21,-32(%sp) /* PIC register value */ - #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \ - - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \ - - (long) (a3), (long) (a4), (long) (a5), \ - - (long) (a6), \ - - (long) (number)) - + __mips_syscall7 ((long int) (a0), (long int) (a1), \ - + (long int) (a2), (long int) (a3), \ - + (long int) (a4), (long int) (a5), \ - + (long int) (a6), \ - + (long int) (number)) + /* Save input floating point registers. This must be done + in the new frame since the previous frame doesn't have + enough space */ + - ldo -56(%sp),%r1 + + ldo -64(%sp),%r1 + fstd,ma %fr4,-8(%r1) + fstd,ma %fr5,-8(%r1) + fstd,ma %fr6,-8(%r1) + + + + /* Test PA_GP_RELOC bit. */ + + bb,>= %r19,31,2f /* branch if not reloc offset */ + fstd,ma %fr7,-8(%r1) - #endif - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c - index 92f16e2724..43c05f8050 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c - @@ -20,8 +20,8 @@ + /* Set up args to fixup func, needs only two arguments */ + @@ -79,7 +82,7 @@ _dl_runtime_resolve: + copy %r19,%r25 /* (2) reloc offset */ - #undef __mips16_syscall0 + /* Call the real address resolver. */ + - bl _dl_fixup,%rp + +3: bl _dl_fixup,%rp + copy %r21,%r19 /* set fixup func ltp */ - -long long __nomips16 - -__mips16_syscall0 (long number) - +long long int __nomips16 - +__mips16_syscall0 (long int number) - { - union __mips_syscall_return ret; - ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0); - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c - index fa985a96e5..16a567e834 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c - @@ -20,9 +20,9 @@ + /* While the linker will set a function pointer to NULL when it + @@ -102,7 +105,7 @@ _dl_runtime_resolve: + copy %r29, %r19 - #undef __mips16_syscall1 + /* Reload arguments fp args */ + - ldo -56(%sp),%r1 + + ldo -64(%sp),%r1 + fldd,ma -8(%r1),%fr4 + fldd,ma -8(%r1),%fr5 + fldd,ma -8(%r1),%fr6 + @@ -129,6 +132,25 @@ _dl_runtime_resolve: + bv %r0(%rp) + ldo -128(%sp),%sp - -long long __nomips16 - -__mips16_syscall1 (long a0, - - long number) - +long long int __nomips16 - +__mips16_syscall1 (long int a0, - + long int number) - { - union __mips_syscall_return ret; - ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1, - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c - index f042ac815d..c0a856c344 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c - @@ -20,9 +20,9 @@ - - #undef __mips16_syscall2 + +2: + + /* Set up args for _dl_fix_reloc_arg. */ + + copy %r22,%r26 /* (1) function pointer */ + + depi 0,31,2,%r26 /* clear least significant bits */ + + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + + + /* Save ltp and link map arg for _dl_fixup. */ + + stw %r21,-56(%sp) /* ltp */ + + stw %r25,-60(%sp) /* struct link map */ + + + + /* Find reloc offset. */ + + bl _dl_fix_reloc_arg,%rp + + copy %r21,%r19 /* set func ltp */ + + + + /* Set up args for _dl_fixup. */ + + ldw -56(%sp),%r21 /* ltp */ + + ldw -60(%sp),%r26 /* (1) struct link map */ + + b 3b + + copy %ret0,%r25 /* (2) reloc offset */ + .EXIT + .PROCEND + cfi_endproc + @@ -153,7 +175,7 @@ _dl_runtime_profile: + copy %sp, %r1 /* Copy previous sp */ + /* Save function result address (on entry) */ + stwm %r28,192(%sp) + - /* Fillin some frame info to follow ABI */ + + /* Fill in some frame info to follow ABI */ + stw %r1,-4(%sp) /* Previous sp */ + stw %r21,-32(%sp) /* PIC register value */ - -long long __nomips16 - -__mips16_syscall2 (long a0, long a1, - - long number) - +long long int __nomips16 - +__mips16_syscall2 (long int a0, long int a1, - + long int number) - { - union __mips_syscall_return ret; - ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2, - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c - index dfe2f7feb5..042768ebf2 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c - @@ -20,9 +20,9 @@ + @@ -181,10 +203,11 @@ _dl_runtime_profile: + fstd,ma %fr5,8(%r1) + fstd,ma %fr6,8(%r1) + fstd,ma %fr7,8(%r1) + - /* 32-bit stack pointer and return register */ + - stw %sp,-56(%sp) + - stw %r2,-52(%sp) - #undef __mips16_syscall3 + + /* Test PA_GP_RELOC bit. */ + + bb,>= %r19,31,2f /* branch if not reloc offset */ + + /* 32-bit stack pointer */ + + stw %sp,-56(%sp) - -long long __nomips16 - -__mips16_syscall3 (long a0, long a1, long a2, - - long number) - +long long int __nomips16 - +__mips16_syscall3 (long int a0, long int a1, long int a2, - + long int number) - { - union __mips_syscall_return ret; - ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3, - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c - index 39de510357..8658d822ab 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c - +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c - @@ -20,9 +20,9 @@ + /* Set up args to fixup func, needs five arguments */ + ldw 8+4(%r20),%r26 /* (1) got[1] == struct link_map */ + @@ -197,7 +220,7 @@ _dl_runtime_profile: + stw %r1, -52(%sp) /* (5) long int *framesizep */ - #undef __mips16_syscall4 + /* Call the real address resolver. */ + - bl _dl_profile_fixup,%rp + +3: bl _dl_profile_fixup,%rp + copy %r21,%r19 /* set fixup func ltp */ - -long long __nomips16 - -__mips16_syscall4 (long a0, long a1, long a2, long a3, - - long number) - +long long int __nomips16 - +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3, - + long int number) - { - union __mips_syscall_return ret; - ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4, - diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h - index beefcf284b..0c6a83e9b3 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h - +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h - @@ -52,7 +52,7 @@ - #undef INLINE_SYSCALL - #define INLINE_SYSCALL(name, nr, args...) \ - ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ - { \ - __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ - @@ -61,10 +61,10 @@ - result_var; }) + /* Load up the returned function descriptor */ + @@ -215,7 +238,9 @@ _dl_runtime_profile: + fldd,ma 8(%r1),%fr5 + fldd,ma 8(%r1),%fr6 + fldd,ma 8(%r1),%fr7 + - ldw -52(%sp),%rp + + + + /* Reload rp register -(192+20) without adjusting stack */ + + ldw -212(%sp),%rp - #undef INTERNAL_SYSCALL_DECL - -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) - +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + /* Reload static link register -(192+16) without adjusting stack */ + ldw -208(%sp),%r29 + @@ -303,6 +328,33 @@ L(cont): + ldw -20(%sp),%rp + /* Return */ + bv,n 0(%r2) + + + +2: + + /* Set up args for _dl_fix_reloc_arg. */ + + copy %r22,%r26 /* (1) function pointer */ + + depi 0,31,2,%r26 /* clear least significant bits */ + + ldw 8+4(%r20),%r25 /* (2) got[1] == struct link_map */ + + + + /* Save ltp and link map arg for _dl_fixup. */ + + stw %r21,-92(%sp) /* ltp */ + + stw %r25,-116(%sp) /* struct link map */ + + + + /* Find reloc offset. */ + + bl _dl_fix_reloc_arg,%rp + + copy %r21,%r19 /* set func ltp */ + + + + /* Restore fixup ltp. */ + + ldw -92(%sp),%r21 /* ltp */ + + + + /* Set up args to fixup func, needs five arguments */ + + ldw -116(%sp),%r26 /* (1) struct link map */ + + copy %ret0,%r25 /* (2) reloc offset */ + + stw %r25,-120(%sp) /* Save reloc offset */ + + ldw -212(%sp),%r24 /* (3) profile_fixup needs rp */ + + ldo -56(%sp),%r23 /* (4) La_hppa_regs */ + + ldo -112(%sp), %r1 + + b 3b + + stw %r1, -52(%sp) /* (5) long int *framesizep */ + .EXIT + .PROCEND + cfi_endproc + diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h + index 8af0789a9c..4334ade2a0 100644 + --- a/sysdeps/i386/dl-machine.h + +++ b/sysdeps/i386/dl-machine.h + @@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc, + { + # ifndef RTLD_BOOTSTRAP + if (sym_map != map + - && sym_map->l_type != lt_executable + && !sym_map->l_relocated) + { + const char *strtab + = (const char *) D_PTR (map, l_info[DT_STRTAB]); + - _dl_error_printf ("\ + + if (sym_map->l_type == lt_executable) + + _dl_fatal_printf ("\ + +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ + +and creates an unsatisfiable circular dependency.\n", + + RTLD_PROGNAME, strtab + refsym->st_name, + + map->l_name); + + else + + _dl_error_printf ("\ + %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", + - RTLD_PROGNAME, map->l_name, + - sym_map->l_name, + - strtab + refsym->st_name); + + RTLD_PROGNAME, map->l_name, + + sym_map->l_name, + + strtab + refsym->st_name); + } + # endif + value = ((Elf32_Addr (*) (void)) value) (); + diff --git a/sysdeps/i386/sysdep.h b/sysdeps/i386/sysdep.h + index b4bcd8fb6c..6094af8fec 100644 + --- a/sysdeps/i386/sysdep.h + +++ b/sysdeps/i386/sysdep.h + @@ -61,7 +61,7 @@ lose: SYSCALL_PIC_SETUP \ - #undef INTERNAL_SYSCALL_ERROR_P - -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) - +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + # define SETUP_PIC_REG(reg) \ + .ifndef GET_PC_THUNK(reg); \ + - .section .gnu.linkonce.t.GET_PC_THUNK(reg),"ax",@progbits; \ + + .section .text.GET_PC_THUNK(reg),"axG",@progbits,GET_PC_THUNK(reg),comdat; \ + .globl GET_PC_THUNK(reg); \ + .hidden GET_PC_THUNK(reg); \ + .p2align 4; \ + @@ -97,7 +97,8 @@ GET_PC_THUNK(reg): \ - #undef INTERNAL_SYSCALL_ERRNO - #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) - @@ -103,11 +103,11 @@ + # define SETUP_PIC_REG_STR(reg) \ + ".ifndef " GET_PC_THUNK_STR (reg) "\n" \ + - ".section .gnu.linkonce.t." GET_PC_THUNK_STR (reg) ",\"ax\",@progbits\n" \ + + ".section .text." GET_PC_THUNK_STR (reg) ",\"axG\",@progbits," \ + + GET_PC_THUNK_STR (reg) ",comdat\n" \ + ".globl " GET_PC_THUNK_STR (reg) "\n" \ + ".hidden " GET_PC_THUNK_STR (reg) "\n" \ + ".p2align 4\n" \ + diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile + index 995e90d6da..6030adf7e7 100644 + --- a/sysdeps/ieee754/ldbl-96/Makefile + +++ b/sysdeps/ieee754/ldbl-96/Makefile + @@ -17,5 +17,8 @@ + # . - union __mips_syscall_return - { - - long long val; - + long long int val; - struct - { - - long v0; - - long v1; - + long int v0; - + long int v1; - } - reg; - }; - @@ -152,13 +152,13 @@ union __mips_syscall_return + ifeq ($(subdir),math) + -tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 + +tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo + +ifeq ($(have-ssp),yes) + +CFLAGS-test-sinl-pseudo.c += -fstack-protector-all + endif + +endif # $(subdir) == math + diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c + index 5f742321ae..bcdf20179f 100644 + --- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c + +++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c + @@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y) + return 0; + } - #define internal_syscall0(v0_init, input, number, err, dummy...) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -175,14 +175,15 @@ union __mips_syscall_return - - #define internal_syscall1(v0_init, input, number, err, arg1) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -199,15 +200,17 @@ union __mips_syscall_return + + if ((i0 & 0x80000000) == 0) + + { + + /* Pseudo-zero and unnormal representations are not valid + + representations of long double. We need to avoid stack + + corruption in __kernel_rem_pio2, which expects input in a + + particular normal form, but those representations do not need + + to be consistently handled like any particular floating-point + + value. */ + + y[1] = y[0] = __builtin_nanl (""); + + return 0; + + } + + + /* Split the 64 bits of the mantissa into three 24-bit integers + stored in a double array. */ + exp = j0 - 23; + diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c + new file mode 100644 + index 0000000000..f59b97769d + --- /dev/null + +++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c + @@ -0,0 +1,41 @@ + +/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487). + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + + + +static int + +do_test (void) + +{ + + for (int i = 0; i < 64; i++) + + { + + uint64_t sig = i == 63 ? 0 : 1ULL << i; + + long double ld; + + SET_LDOUBLE_WORDS (ld, 0x4141, + + sig >> 32, sig & 0xffffffffULL); + + /* The requirement is that no stack overflow occurs when the + + pseudo-zero or unnormal goes through range reduction. */ + + volatile long double ldr; + + ldr = sinl (ld); + + (void) ldr; + + } + + return 0; + +} + + + +#include + diff --git a/sysdeps/posix/getcwd.c b/sysdeps/posix/getcwd.c + index f00b337a13..839d78d7b7 100644 + --- a/sysdeps/posix/getcwd.c + +++ b/sysdeps/posix/getcwd.c + @@ -241,6 +241,14 @@ __getcwd (char *buf, size_t size) + char *path; + #ifndef NO_ALLOCATION + size_t allocated = size; + + + + /* A size of 1 byte is never useful. */ + + if (allocated == 1) + + { + + __set_errno (ERANGE); + + return NULL; + + } + + + if (size == 0) + { + if (buf != NULL) + diff --git a/sysdeps/posix/system.c b/sysdeps/posix/system.c + index e613e6a344..a03f478fc7 100644 + --- a/sysdeps/posix/system.c + +++ b/sysdeps/posix/system.c + @@ -101,7 +101,8 @@ cancel_handler (void *arg) + static int + do_system (const char *line) + { + - int status; + + int status = -1; + + int ret; + pid_t pid; + struct sigaction sa; + #ifndef _LIBC_REENTRANT + @@ -144,14 +145,14 @@ do_system (const char *line) + __posix_spawnattr_setflags (&spawn_attr, + POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK); - #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -225,16 +228,19 @@ union __mips_syscall_return - #define internal_syscall3(v0_init, input, number, err, \ - arg1, arg2, arg3) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -252,16 +258,20 @@ union __mips_syscall_return - #define internal_syscall4(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + long int _arg4 = (long int) (arg4); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7") = (long) (arg4); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7") = _arg4; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -285,63 +295,66 @@ union __mips_syscall_return - compiler specifics required for the stack arguments to be pushed, - which would be the case if these syscalls were inlined. */ + - status = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, + - (char *const[]){ (char*) SHELL_NAME, + - (char*) "-c", + - (char *) line, NULL }, + - __environ); + + ret = __posix_spawn (&pid, SHELL_PATH, 0, &spawn_attr, + + (char *const[]){ (char *) SHELL_NAME, + + (char *) "-c", + + (char *) line, NULL }, + + __environ); + __posix_spawnattr_destroy (&spawn_attr); - -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3, - - long arg4, long arg5, - - long number); - +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2, - + long int arg3, long int arg4, - + long int arg5, - + long int number); - libc_hidden_proto (__mips_syscall5, nomips16) + - if (status == 0) + + if (ret == 0) + { + /* Cancellation results in cleanup handlers running as exceptions in + the block where they were installed, so it is safe to reference + @@ -186,6 +187,9 @@ do_system (const char *line) + } + DO_UNLOCK (); - #define internal_syscall5(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5) \ - ({ \ - union __mips_syscall_return _sc_ret; \ - - _sc_ret.val = __mips_syscall5 ((long) (arg1), \ - - (long) (arg2), \ - - (long) (arg3), \ - - (long) (arg4), \ - - (long) (arg5), \ - - (long) (number)); \ - + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \ - + (long int) (arg2), \ - + (long int) (arg3), \ - + (long int) (arg4), \ - + (long int) (arg5), \ - + (long int) (number)); \ - err = _sc_ret.reg.v1; \ - _sc_ret.reg.v0; \ - }) + + if (ret != 0) + + __set_errno (ret); + + + return status; + } - -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3, - - long arg4, long arg5, long arg6, - - long number); - +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2, - + long int arg3, long int arg4, - + long int arg5, long int arg6, - + long int number); - libc_hidden_proto (__mips_syscall6, nomips16) - - #define internal_syscall6(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5, arg6) \ - ({ \ - union __mips_syscall_return _sc_ret; \ - - _sc_ret.val = __mips_syscall6 ((long) (arg1), \ - - (long) (arg2), \ - - (long) (arg3), \ - - (long) (arg4), \ - - (long) (arg5), \ - - (long) (arg6), \ - - (long) (number)); \ - + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \ - + (long int) (arg2), \ - + (long int) (arg3), \ - + (long int) (arg4), \ - + (long int) (arg5), \ - + (long int) (arg6), \ - + (long int) (number)); \ - err = _sc_ret.reg.v1; \ - _sc_ret.reg.v0; \ - }) - - -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3, - - long arg4, long arg5, long arg6, - - long arg7, - - long number); - +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2, - + long int arg3, long int arg4, - + long int arg5, long int arg6, - + long int arg7, - + long int number); - libc_hidden_proto (__mips_syscall7, nomips16) - - #define internal_syscall7(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ - ({ \ - union __mips_syscall_return _sc_ret; \ - - _sc_ret.val = __mips_syscall7 ((long) (arg1), \ - - (long) (arg2), \ - - (long) (arg3), \ - - (long) (arg4), \ - - (long) (arg5), \ - - (long) (arg6), \ - - (long) (arg7), \ - - (long) (number)); \ - + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \ - + (long int) (arg2), \ - + (long int) (arg3), \ - + (long int) (arg4), \ - + (long int) (arg5), \ - + (long int) (arg6), \ - + (long int) (arg7), \ - + (long int) (number)); \ - err = _sc_ret.reg.v1; \ - _sc_ret.reg.v0; \ - }) - diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h - index f96636538a..4a9d7054f9 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h - +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h - @@ -47,14 +47,14 @@ - - /* Convert X to a long long, without losing any bits if it is one - already or warning if it is a 32-bit pointer. */ - -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X)) - +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X)) - - /* Define a macro which expands into the inline wrapper code for a system - call. */ - #undef INLINE_SYSCALL - #define INLINE_SYSCALL(name, nr, args...) \ - ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ - { \ - __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ - @@ -63,10 +63,10 @@ - result_var; }) - - #undef INTERNAL_SYSCALL_DECL - -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) - +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + diff --git a/sysdeps/powerpc/powerpc32/sysdep.h b/sysdeps/powerpc/powerpc32/sysdep.h + index 2ba009e919..829eec266a 100644 + --- a/sysdeps/powerpc/powerpc32/sysdep.h + +++ b/sysdeps/powerpc/powerpc32/sysdep.h + @@ -179,8 +179,8 @@ GOT_LABEL: ; \ + #else + /* Position-dependent code does not require access to the GOT. */ + # define __GLRO(rOUT, rGOT, member, offset) \ + - lis rOUT,(member+LOWORD)@ha; \ + - lwz rOUT,(member+LOWORD)@l(rOUT) + + lis rOUT,(member)@ha; \ + + lwz rOUT,(member)@l(rOUT) + #endif /* PIC */ - #undef INTERNAL_SYSCALL_ERROR_P - -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) - +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + #endif /* __ASSEMBLER__ */ + diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c + index 8a53a1088f..362a2b713c 100644 + --- a/sysdeps/powerpc/powerpc64/backtrace.c + +++ b/sysdeps/powerpc/powerpc64/backtrace.c + @@ -54,11 +54,22 @@ struct signal_frame_64 { + /* We don't care about the rest, since the IP value is at 'uc' field. */ + }; - #undef INTERNAL_SYSCALL_ERRNO - #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) - @@ -112,13 +112,13 @@ + +/* Test if the address match to the inside the trampoline code. + + Up to and including kernel 5.8, returning from an interrupt or syscall to a + + signal handler starts execution directly at the handler's entry point, with + + LR set to address of the sigreturn trampoline (the vDSO symbol). + + Newer kernels will branch to signal handler from the trampoline instead, so + + checking the stacktrace against the vDSO entrypoint does not work in such + + case. + + The vDSO branches with a 'bctrl' instruction, so checking either the + + vDSO address itself and the next instruction should cover all kernel + + versions. */ + static inline bool + is_sigtramp_address (void *nip) + { + #ifdef HAVE_SIGTRAMP_RT64 + - if (nip == GLRO (dl_vdso_sigtramp_rt64)) + + if (nip == GLRO (dl_vdso_sigtramp_rt64) || + + nip == GLRO (dl_vdso_sigtramp_rt64) + 4) + return true; + #endif + return false; + diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure + index fa46e9e351..e7f576338d 100644 + --- a/sysdeps/s390/configure + +++ b/sysdeps/s390/configure + @@ -123,7 +123,9 @@ void testinsn (char *buf) + __asm__ (".machine \"arch13\" \n\t" + ".machinemode \"zarch_nohighgprs\" \n\t" + "lghi %%r0,16 \n\t" + - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + + "mvcrl 0(%0),32(%0) \n\t" + + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + + : : "a" (buf) : "memory", "r0"); + } + EOF + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c + @@ -271,7 +273,9 @@ else + void testinsn (char *buf) + { + __asm__ ("lghi %%r0,16 \n\t" + - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + + "mvcrl 0(%0),32(%0) \n\t" + + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + + : : "a" (buf) : "memory", "r0"); + } + EOF + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c + diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac + index 3ed5a8ef87..5c3479e8cf 100644 + --- a/sysdeps/s390/configure.ac + +++ b/sysdeps/s390/configure.ac + @@ -88,7 +88,9 @@ void testinsn (char *buf) + __asm__ (".machine \"arch13\" \n\t" + ".machinemode \"zarch_nohighgprs\" \n\t" + "lghi %%r0,16 \n\t" + - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + + "mvcrl 0(%0),32(%0) \n\t" + + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + + : : "a" (buf) : "memory", "r0"); + } + EOF + dnl test, if assembler supports S390 arch13 instructions + @@ -195,7 +197,9 @@ cat > conftest.c <<\EOF + void testinsn (char *buf) + { + __asm__ ("lghi %%r0,16 \n\t" + - "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0"); + + "mvcrl 0(%0),32(%0) \n\t" + + "vstrs %%v20,%%v20,%%v20,%%v20,0,2" + + : : "a" (buf) : "memory", "r0"); + } + EOF + dnl test, if assembler supports S390 arch13 zarch instructions as default + diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c + index 5fc85e129f..ee59b5de14 100644 + --- a/sysdeps/s390/memmove.c + +++ b/sysdeps/s390/memmove.c + @@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden; + s390_libc_ifunc_expr (__redirect_memmove, memmove, + ({ + s390_libc_ifunc_expr_stfle_init (); + - (HAVE_MEMMOVE_ARCH13 + + (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2) + && S390_IS_ARCH13_MIE3 (stfle_bits)) + ? MEMMOVE_ARCH13 + : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX)) + diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c + index e6195c6e26..17c0cc3952 100644 + --- a/sysdeps/s390/multiarch/ifunc-impl-list.c + +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c + @@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, memmove, + # if HAVE_MEMMOVE_ARCH13 + IFUNC_IMPL_ADD (array, i, memmove, + - S390_IS_ARCH13_MIE3 (stfle_bits), + + ((dl_hwcap & HWCAP_S390_VXRS_EXT2) + + && S390_IS_ARCH13_MIE3 (stfle_bits)), + MEMMOVE_ARCH13) + # endif + # if HAVE_MEMMOVE_Z13 + diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies + new file mode 100644 + index 0000000000..71b28ee1a4 + --- /dev/null + +++ b/sysdeps/sh/be/sh4/fpu/Implies + @@ -0,0 +1 @@ + +sh/sh4/fpu + diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies + new file mode 100644 + index 0000000000..71b28ee1a4 + --- /dev/null + +++ b/sysdeps/sh/le/sh4/fpu/Implies + @@ -0,0 +1 @@ + +sh/sh4/fpu + diff --git a/sysdeps/unix/make-syscalls.sh b/sysdeps/unix/make-syscalls.sh + index c07626677f..4f6c3490a2 100644 + --- a/sysdeps/unix/make-syscalls.sh + +++ b/sysdeps/unix/make-syscalls.sh + @@ -30,6 +30,7 @@ + # P: optionally-NULL pointer to typed object (e.g., 3rd argument to sigaction) + # s: non-NULL string (e.g., 1st arg to open) + # S: optionally-NULL string (e.g., 1st arg to acct) + +# U: unsigned long int (32-bit types are zero-extended to 64-bit types) + # v: vararg scalar (e.g., optional 3rd arg to open) + # V: byte-per-page vector (3rd arg to mincore) + # W: wait status, optionally-NULL pointer to int (e.g., 2nd arg of wait4) + @@ -184,6 +185,27 @@ while read file srcfile caller syscall args strong weak; do + ?:?????????) nargs=9;; + esac - #define internal_syscall0(v0_init, input, number, err, dummy...) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a3 asm ("$7"); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ + + # Derive the unsigned long int arguments from the argument signature + + ulong_arg_1=0 + + ulong_arg_2=0 + + ulong_count=0 + + for U in $(echo $args | sed -e "s/.*:/:/" | grep -ob U) + + do + + ulong_count=$(expr $ulong_count + 1) + + ulong_arg=$(echo $U | sed -e "s/:U//") + + case $ulong_count in + + 1) + + ulong_arg_1=$ulong_arg + + ;; + + 2) + + ulong_arg_2=$ulong_arg + + ;; + + *) + + echo >&2 "$0: Too many unsigned long int arguments for syscall ($strong $weak)" + + exit 2 + + esac + + done + + + # Make sure only the first syscall rule is used, if multiple dirs + # define the same syscall. + echo '' + @@ -245,6 +267,8 @@ while read file srcfile caller syscall args strong weak; do + \$(make-target-directory) + (echo '#define SYSCALL_NAME $syscall'; \\ + echo '#define SYSCALL_NARGS $nargs'; \\ + + echo '#define SYSCALL_ULONG_ARG_1 $ulong_arg_1'; \\ + + echo '#define SYSCALL_ULONG_ARG_2 $ulong_arg_2'; \\ + echo '#define SYSCALL_SYMBOL $strong'; \\ + echo '#define SYSCALL_NOERRNO $noerrno'; \\ + echo '#define SYSCALL_ERRVAL $errval'; \\ + diff --git a/sysdeps/unix/syscall-template.S b/sysdeps/unix/syscall-template.S + index cf6c7a58fb..f807a8603f 100644 + --- a/sysdeps/unix/syscall-template.S + +++ b/sysdeps/unix/syscall-template.S + @@ -25,6 +25,12 @@ + defining a few macros: + SYSCALL_NAME syscall name + SYSCALL_NARGS number of arguments this call takes + + SYSCALL_ULONG_ARG_1 the first unsigned long int argument this + + call takes. 0 means that there are no + + unsigned long int arguments. + + SYSCALL_ULONG_ARG_2 the second unsigned long int argument this + + call takes. 0 means that there is at most + + one unsigned long int argument. + SYSCALL_SYMBOL primary symbol name + SYSCALL_NOERRNO 1 to define a no-errno version (see below) + SYSCALL_ERRVAL 1 to define an error-value version (see below) + @@ -44,9 +50,31 @@ + /* This indirection is needed so that SYMBOL gets macro-expanded. */ + #define syscall_hidden_def(SYMBOL) hidden_def (SYMBOL) + + -#define T_PSEUDO(SYMBOL, NAME, N) PSEUDO (SYMBOL, NAME, N) + -#define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) PSEUDO_NOERRNO (SYMBOL, NAME, N) + -#define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) PSEUDO_ERRVAL (SYMBOL, NAME, N) + +/* If PSEUDOS_HAVE_ULONG_INDICES is defined, PSEUDO and T_PSEUDO macros + + have 2 extra arguments for unsigned long int arguments: + + Extra argument 1: Position of the first unsigned long int argument. + + Extra argument 2: Position of the second unsigned long int argument. + + */ + +#ifndef PSEUDOS_HAVE_ULONG_INDICES + +# undef SYSCALL_ULONG_ARG_1 + +# define SYSCALL_ULONG_ARG_1 0 + +#endif + + + +#if SYSCALL_ULONG_ARG_1 + +# define T_PSEUDO(SYMBOL, NAME, N, U1, U2) \ + + PSEUDO (SYMBOL, NAME, N, U1, U2) + +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N, U1, U2) \ + + PSEUDO_NOERRNO (SYMBOL, NAME, N, U1, U2) + +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N, U1, U2) \ + + PSEUDO_ERRVAL (SYMBOL, NAME, N, U1, U2) + +#else + +# define T_PSEUDO(SYMBOL, NAME, N) \ + + PSEUDO (SYMBOL, NAME, N) + +# define T_PSEUDO_NOERRNO(SYMBOL, NAME, N) \ + + PSEUDO_NOERRNO (SYMBOL, NAME, N) + +# define T_PSEUDO_ERRVAL(SYMBOL, NAME, N) \ + + PSEUDO_ERRVAL (SYMBOL, NAME, N) + +#endif + #define T_PSEUDO_END(SYMBOL) PSEUDO_END (SYMBOL) + #define T_PSEUDO_END_NOERRNO(SYMBOL) PSEUDO_END_NOERRNO (SYMBOL) + #define T_PSEUDO_END_ERRVAL(SYMBOL) PSEUDO_END_ERRVAL (SYMBOL) + @@ -56,7 +84,12 @@ + /* This kind of system call stub never returns an error. + We return the return value register to the caller unexamined. */ + + +# if SYSCALL_ULONG_ARG_1 + +T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) + +# else + T_PSEUDO_NOERRNO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) + +# endif + ret_NOERRNO + T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) + + @@ -66,7 +99,12 @@ T_PSEUDO_END_NOERRNO (SYSCALL_SYMBOL) + value, or zero for success. We may massage the kernel's return value + to meet that ABI, but we never set errno here. */ + + +# if SYSCALL_ULONG_ARG_1 + +T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) + +# else + T_PSEUDO_ERRVAL (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) + +# endif + ret_ERRVAL + T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) + + @@ -75,7 +113,12 @@ T_PSEUDO_END_ERRVAL (SYSCALL_SYMBOL) + /* This is a "normal" system call stub: if there is an error, + it returns -1 and sets errno. */ + + +# if SYSCALL_ULONG_ARG_1 + +T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS, + + SYSCALL_ULONG_ARG_1, SYSCALL_ULONG_ARG_2) + +# else + T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) + +# endif + ret + T_PSEUDO_END (SYSCALL_SYMBOL) + + diff --git a/sysdeps/unix/syscalls.list b/sysdeps/unix/syscalls.list + index e28e801c7a..6b22b2cb45 100644 + --- a/sysdeps/unix/syscalls.list + +++ b/sysdeps/unix/syscalls.list + @@ -39,27 +39,27 @@ kill - kill i:ii __kill kill + link - link i:ss __link link + listen - listen i:ii __listen listen + lseek - lseek i:iii __libc_lseek __lseek lseek + -madvise - madvise i:pii __madvise madvise + +madvise - madvise i:pUi __madvise madvise + mkdir - mkdir i:si __mkdir mkdir + -mmap - mmap b:aniiii __mmap mmap + -mprotect - mprotect i:aii __mprotect mprotect + -munmap - munmap i:ai __munmap munmap + +mmap - mmap b:aUiiii __mmap mmap + +mprotect - mprotect i:aUi __mprotect mprotect + +munmap - munmap i:aU __munmap munmap + open - open Ci:siv __libc_open __open open + profil - profil i:piii __profil profil + ptrace - ptrace i:iiii ptrace + -read - read Ci:ibn __libc_read __read read + -readlink - readlink i:spi __readlink readlink + +read - read Ci:ibU __libc_read __read read + +readlink - readlink i:spU __readlink readlink + readv - readv Ci:ipi __readv readv + reboot - reboot i:i reboot + -recv - recv Ci:ibni __libc_recv recv + -recvfrom - recvfrom Ci:ibniBN __libc_recvfrom __recvfrom recvfrom + +recv - recv Ci:ibUi __libc_recv recv + +recvfrom - recvfrom Ci:ibUiBN __libc_recvfrom __recvfrom recvfrom + recvmsg - recvmsg Ci:ipi __libc_recvmsg __recvmsg recvmsg + rename - rename i:ss rename + rmdir - rmdir i:s __rmdir rmdir + select - select Ci:iPPPP __select __libc_select select + -send - send Ci:ibni __libc_send __send send + +send - send Ci:ibUi __libc_send __send send + sendmsg - sendmsg Ci:ipi __libc_sendmsg __sendmsg sendmsg + -sendto - sendto Ci:ibnibn __libc_sendto __sendto sendto + +sendto - sendto Ci:ibUibn __libc_sendto __sendto sendto + setdomain - setdomainname i:si setdomainname + setegid - setegid i:i __setegid setegid + seteuid - seteuid i:i __seteuid seteuid + @@ -94,5 +94,5 @@ uname - uname i:p __uname uname + unlink - unlink i:s __unlink unlink + utimes - utimes i:sp __utimes utimes + vhangup - vhangup i:i vhangup + -write - write Ci:ibn __libc_write __write write + +write - write Ci:ibU __libc_write __write write + writev - writev Ci:ipi __writev writev + diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile + index f12b7b1a2d..0a0da00151 100644 + --- a/sysdeps/unix/sysv/linux/Makefile + +++ b/sysdeps/unix/sysv/linux/Makefile + @@ -60,7 +60,9 @@ sysdep_routines += adjtimex clone umount umount2 readahead \ + setfsuid setfsgid epoll_pwait signalfd \ + eventfd eventfd_read eventfd_write prlimit \ + personality epoll_wait tee vmsplice splice \ + - open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get + + open_by_handle_at mlock2 pkey_mprotect pkey_set pkey_get \ + + prctl \ + + process_vm_readv process_vm_writev + + CFLAGS-gethostid.c = -fexceptions + CFLAGS-tee.c = -fexceptions -fasynchronous-unwind-tables + @@ -273,7 +275,7 @@ sysdep_routines += xstatconv internal_statvfs internal_statvfs64 \ + + sysdep_headers += bits/fcntl-linux.h + + -tests += tst-fallocate tst-fallocate64 + +tests += tst-fallocate tst-fallocate64 tst-getcwd-smallbuff + endif + + ifeq ($(subdir),elf) + diff --git a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h + index 9378387747..c8471947b9 100644 + --- a/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h + +++ b/sysdeps/unix/sysv/linux/aarch64/arch-syscall.h + @@ -17,6 +17,7 @@ + #define __NR_clock_nanosleep 115 + #define __NR_clock_settime 112 + #define __NR_clone 220 + +#define __NR_clone3 435 + #define __NR_close 57 + #define __NR_connect 203 + #define __NR_copy_file_range 285 + diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h + index 1389cea1b3..346d045fb4 100644 + --- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h + +++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h + @@ -51,8 +51,12 @@ + + #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ + && MIDR_PARTNUM(midr) == 0x000) + -#define IS_ARES(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + - && MIDR_PARTNUM(midr) == 0xd0c) + +#define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + + && MIDR_PARTNUM(midr) == 0xd0c) + +#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + + && MIDR_PARTNUM(midr) == 0xd49) + +#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + + && MIDR_PARTNUM(midr) == 0xd40) + + #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \ + && MIDR_PARTNUM(midr) == 0x000) + diff --git a/sysdeps/unix/sysv/linux/aarch64/localplt.data b/sysdeps/unix/sysv/linux/aarch64/localplt.data + index a60053b914..08af68b5e8 100644 + --- a/sysdeps/unix/sysv/linux/aarch64/localplt.data + +++ b/sysdeps/unix/sysv/linux/aarch64/localplt.data + @@ -7,6 +7,9 @@ libc.so: malloc + libc.so: memalign + libc.so: realloc + libm.so: matherr + +# If outline atomics are used, libgcc (built outside of glibc) may + +# call __getauxval using the PLT. + +libc.so: __getauxval ? + # The dynamic loader needs __tls_get_addr for TLS. + ld.so: __tls_get_addr + # The main malloc is interposed into the dynamic linker, for + diff --git a/sysdeps/unix/sysv/linux/getpt.c b/sysdeps/unix/sysv/linux/getpt.c + index 1803b232c9..3cc745e11a 100644 + --- a/sysdeps/unix/sysv/linux/getpt.c + +++ b/sysdeps/unix/sysv/linux/getpt.c + @@ -16,69 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + + -#include + #include + -#include + #include + #include + -#include + - + -#include "linux_fsinfo.h" + + /* Path to the master pseudo terminal cloning device. */ + #define _PATH_DEVPTMX _PATH_DEV "ptmx" + -/* Directory containing the UNIX98 pseudo terminals. */ + -#define _PATH_DEVPTS _PATH_DEV "pts" + - + -/* Prototype for function that opens BSD-style master pseudo-terminals. */ + -extern int __bsd_getpt (void) attribute_hidden; + + /* Open a master pseudo terminal and return its file descriptor. */ + int + __posix_openpt (int oflag) + { + - static int have_no_dev_ptmx; + - int fd; + - + - if (!have_no_dev_ptmx) + - { + - fd = __open (_PATH_DEVPTMX, oflag); + - if (fd != -1) + - { + - struct statfs fsbuf; + - static int devpts_mounted; + - + - /* Check that the /dev/pts filesystem is mounted + - or if /dev is a devfs filesystem (this implies /dev/pts). */ + - if (devpts_mounted + - || (__statfs (_PATH_DEVPTS, &fsbuf) == 0 + - && fsbuf.f_type == DEVPTS_SUPER_MAGIC) + - || (__statfs (_PATH_DEV, &fsbuf) == 0 + - && fsbuf.f_type == DEVFS_SUPER_MAGIC)) + - { + - /* Everything is ok. */ + - devpts_mounted = 1; + - return fd; + - } + - + - /* If /dev/pts is not mounted then the UNIX98 pseudo terminals + - are not usable. */ + - __close (fd); + - have_no_dev_ptmx = 1; + - __set_errno (ENOENT); + - } + - else + - { + - if (errno == ENOENT || errno == ENODEV) + - have_no_dev_ptmx = 1; + - else + - return -1; + - } + - } + - else + - __set_errno (ENOENT); + - + - return -1; + + return __open (_PATH_DEVPTMX, oflag); + } + weak_alias (__posix_openpt, posix_openpt) + + @@ -86,16 +35,6 @@ weak_alias (__posix_openpt, posix_openpt) + int + __getpt (void) + { + - int fd = __posix_openpt (O_RDWR); + - if (fd == -1) + - fd = __bsd_getpt (); + - return fd; + + return __posix_openpt (O_RDWR); + } + - + - + -#define PTYNAME1 "pqrstuvwxyzabcde"; + -#define PTYNAME2 "0123456789abcdef"; + - + -#define __getpt __bsd_getpt + -#define HAVE_POSIX_OPENPT + -#include + +weak_alias (__getpt, getpt) + diff --git a/sysdeps/unix/sysv/linux/grantpt.c b/sysdeps/unix/sysv/linux/grantpt.c + index 2030e07fa6..43122f9a76 100644 + --- a/sysdeps/unix/sysv/linux/grantpt.c + +++ b/sysdeps/unix/sysv/linux/grantpt.c + @@ -1,44 +1,41 @@ + -#include + -#include + -#include + -#include + -#include + -#include + -#include + -#include + +/* grantpt implementation for Linux. + + Copyright (C) 1998-2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + Contributed by Zack Weinberg , 1998. + + -#include + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + -#include "pty-private.h" + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + -#if HAVE_PT_CHOWN + -/* Close all file descriptors except the one specified. */ + -static void + -close_all_fds (void) + -{ + - DIR *dir = __opendir ("/proc/self/fd"); + - if (dir != NULL) + - { + - struct dirent64 *d; + - while ((d = __readdir64 (dir)) != NULL) + - if (isdigit (d->d_name[0])) + - { + - char *endp; + - long int fd = strtol (d->d_name, &endp, 10); + - if (*endp == '\0' && fd != PTY_FILENO && fd != dirfd (dir)) + - __close_nocancel_nostatus (fd); + - } + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + +#include + + - __closedir (dir); + +int + +grantpt (int fd) + +{ + + /* Without pt_chown on Linux, we have delegated the creation of the + + pty node with the right group and permission mode to the kernel, and + + non-root users are unlikely to be able to change it. Therefore let's + + consider that POSIX enforcement is the responsibility of the whole + + system and not only the GNU libc. */ + + - int nullfd = __open_nocancel (_PATH_DEVNULL, O_RDONLY); + - assert (nullfd == STDIN_FILENO); + - nullfd = __open_nocancel (_PATH_DEVNULL, O_WRONLY); + - assert (nullfd == STDOUT_FILENO); + - __dup2 (STDOUT_FILENO, STDERR_FILENO); + - } + + /* Verify that fd refers to a ptmx descriptor. */ + + unsigned int ptyno; + + int ret = __ioctl (fd, TIOCGPTN, &ptyno); + + if (ret != 0 && errno == ENOTTY) + + /* POSIX requires EINVAL instead of ENOTTY provided by the kernel. */ + + __set_errno (EINVAL); + + return ret; + } + -# define CLOSE_ALL_FDS() close_all_fds() + -#endif + - + -#include + diff --git a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h + index 9d8ffbe860..bf61b66b70 100644 + --- a/sysdeps/unix/sysv/linux/hppa/atomic-machine.h + +++ b/sysdeps/unix/sysv/linux/hppa/atomic-machine.h + @@ -36,9 +36,37 @@ typedef uintptr_t uatomicptr_t; + typedef intmax_t atomic_max_t; + typedef uintmax_t uatomic_max_t; + + +#define atomic_full_barrier() __sync_synchronize () + + + #define __HAVE_64B_ATOMICS 0 + #define USE_ATOMIC_COMPILER_BUILTINS 0 + + +/* We use the compiler atomic load and store builtins as the generic + + defines are not atomic. In particular, we need to use compare and + + exchange for stores as the implementation is synthesized. */ + +void __atomic_link_error (void); + +#define __atomic_check_size_ls(mem) \ + + if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \ + + __atomic_link_error (); + + + +#define atomic_load_relaxed(mem) \ + + ({ __atomic_check_size_ls((mem)); \ + + __atomic_load_n ((mem), __ATOMIC_RELAXED); }) + +#define atomic_load_acquire(mem) \ + + ({ __atomic_check_size_ls((mem)); \ + + __atomic_load_n ((mem), __ATOMIC_ACQUIRE); }) + + + +#define atomic_store_relaxed(mem, val) \ + + do { \ + + __atomic_check_size_ls((mem)); \ + + __atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \ + + } while (0) + +#define atomic_store_release(mem, val) \ + + do { \ + + __atomic_check_size_ls((mem)); \ + + __atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \ + + } while (0) + + + /* XXX Is this actually correct? */ + #define ATOMIC_EXCHANGE_USES_CAS 1 + + diff --git a/sysdeps/unix/sysv/linux/microblaze/sysdep.h b/sysdeps/unix/sysv/linux/microblaze/sysdep.h + index ed873d9dd4..796663a23a 100644 + --- a/sysdeps/unix/sysv/linux/microblaze/sysdep.h + +++ b/sysdeps/unix/sysv/linux/microblaze/sysdep.h + @@ -209,8 +209,8 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall0(name,dummy) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r12) \ + @@ -219,9 +219,10 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall1(name,arg1) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + + long int __arg1 = (long int) (arg1); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r12) \ + @@ -230,10 +231,12 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall2(name,arg1,arg2) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + - register long __r6 __asm__("r6") = (long)(arg2); \ + + long int __arg1 = (long int) (arg1); \ + + long int __arg2 = (long int) (arg2); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + + register long int __r6 __asm__("r6") = __arg2; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r6), "r"(__r12) \ + @@ -243,11 +246,14 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall3(name,arg1,arg2,arg3) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + - register long __r6 __asm__("r6") = (long)(arg2); \ + - register long __r7 __asm__("r7") = (long)(arg3); \ + + long int __arg1 = (long int) (arg1); \ + + long int __arg2 = (long int) (arg2); \ + + long int __arg3 = (long int) (arg3); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + + register long int __r6 __asm__("r6") = __arg2; \ + + register long int __r7 __asm__("r7") = __arg3; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r12) \ + @@ -257,12 +263,16 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall4(name,arg1,arg2,arg3,arg4) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + - register long __r6 __asm__("r6") = (long)(arg2); \ + - register long __r7 __asm__("r7") = (long)(arg3); \ + - register long __r8 __asm__("r8") = (long)(arg4); \ + + long int __arg1 = (long int) (arg1); \ + + long int __arg2 = (long int) (arg2); \ + + long int __arg3 = (long int) (arg3); \ + + long int __arg4 = (long int) (arg4); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + + register long int __r6 __asm__("r6") = __arg2; \ + + register long int __r7 __asm__("r7") = __arg3; \ + + register long int __r8 __asm__("r8") = __arg4; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r12) \ + @@ -272,13 +282,18 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall5(name,arg1,arg2,arg3,arg4,arg5) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + - register long __r6 __asm__("r6") = (long)(arg2); \ + - register long __r7 __asm__("r7") = (long)(arg3); \ + - register long __r8 __asm__("r8") = (long)(arg4); \ + - register long __r9 __asm__("r9") = (long)(arg5); \ + + long int __arg1 = (long int) (arg1); \ + + long int __arg2 = (long int) (arg2); \ + + long int __arg3 = (long int) (arg3); \ + + long int __arg4 = (long int) (arg4); \ + + long int __arg5 = (long int) (arg5); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + + register long int __r6 __asm__("r6") = __arg2; \ + + register long int __r7 __asm__("r7") = __arg3; \ + + register long int __r8 __asm__("r8") = __arg4; \ + + register long int __r9 __asm__("r9") = __arg5; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r12) \ + @@ -288,14 +303,20 @@ SYSCALL_ERROR_LABEL_DCL: \ + + # define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \ + ({ \ + - register long __ret __asm__("r3"); \ + - register long __r12 __asm__("r12") = name; \ + - register long __r5 __asm__("r5") = (long)(arg1); \ + - register long __r6 __asm__("r6") = (long)(arg2); \ + - register long __r7 __asm__("r7") = (long)(arg3); \ + - register long __r8 __asm__("r8") = (long)(arg4); \ + - register long __r9 __asm__("r9") = (long)(arg5); \ + - register long __r10 __asm__("r10") = (long)(arg6); \ + + long int __arg1 = (long int) (arg1); \ + + long int __arg2 = (long int) (arg2); \ + + long int __arg3 = (long int) (arg3); \ + + long int __arg4 = (long int) (arg4); \ + + long int __arg5 = (long int) (arg5); \ + + long int __arg6 = (long int) (arg6); \ + + register long int __ret __asm__("r3"); \ + + register long int __r12 __asm__("r12") = name; \ + + register long int __r5 __asm__("r5") = __arg1; \ + + register long int __r6 __asm__("r6") = __arg2; \ + + register long int __r7 __asm__("r7") = __arg3; \ + + register long int __r8 __asm__("r8") = __arg4; \ + + register long int __r9 __asm__("r9") = __arg5; \ + + register long int __r10 __asm__("r10") = __arg6; \ + __asm__ __volatile__( "brki r14,8; nop;" \ + : "=r"(__ret) \ + : "r"(__r5), "r"(__r6), "r"(__r7), "r"(__r8),"r"(__r9), "r"(__r10), \ + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S + index b2bbf10181..ff445a5406 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall5.S + @@ -22,9 +22,9 @@ + .text + .set nomips16 + + -/* long long __mips_syscall5 (long arg1, long arg2, long arg3, long arg4, + - long arg5, + - long number) */ + +/* long long int __mips_syscall5 (long int arg1, long int arg2, long int arg3, + + long int arg4, long int arg5, + + long int number) */ + + ENTRY(__mips_syscall5) + lw v0, 20(sp) + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S + index 572d7c1137..2b4a3117d1 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall6.S + @@ -22,9 +22,9 @@ + .text + .set nomips16 + + -/* long long __mips_syscall6 (long arg1, long arg2, long arg3, long arg4, + - long arg5, long arg6, + - long number) */ + +/* long long int __mips_syscall6 (long int arg1, long int arg2, long int arg3, + + long int arg4, long int arg5, long int arg6, + + long int number) */ + + ENTRY(__mips_syscall6) + lw v0, 24(sp) + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S + index 05164cb253..2723bbb138 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips-syscall7.S + @@ -22,9 +22,10 @@ + .text + .set nomips16 + + -/* long long __mips_syscall7 (long arg1, long arg2, long arg3, long arg4, + - long arg5, long arg6, long arg7, + - long number) */ + +/* long long int __mips_syscall7 (long int arg1, long int arg2, long int arg3, + + long int arg4, long int arg5, long int arg6, + + long int arg7, + + long int number) */ + + ENTRY(__mips_syscall7) + lw v0, 28(sp) + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h + index 9bf551ace8..f23ede0259 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall.h + @@ -19,51 +19,57 @@ + #ifndef MIPS16_SYSCALL_H + #define MIPS16_SYSCALL_H 1 + + -long long __nomips16 __mips16_syscall0 (long number); + +long long int __nomips16 __mips16_syscall0 (long int number); + #define __mips16_syscall0(dummy, number) \ + - __mips16_syscall0 ((long) (number)) + + __mips16_syscall0 ((long int) (number)) + + -long long __nomips16 __mips16_syscall1 (long a0, + - long number); + +long long int __nomips16 __mips16_syscall1 (long int a0, + + long int number); + #define __mips16_syscall1(a0, number) \ + - __mips16_syscall1 ((long) (a0), \ + - (long) (number)) + + __mips16_syscall1 ((long int) (a0), \ + + (long int) (number)) + + -long long __nomips16 __mips16_syscall2 (long a0, long a1, + - long number); + +long long int __nomips16 __mips16_syscall2 (long int a0, long int a1, + + long int number); + #define __mips16_syscall2(a0, a1, number) \ + - __mips16_syscall2 ((long) (a0), (long) (a1), \ + - (long) (number)) + + __mips16_syscall2 ((long int) (a0), (long int) (a1), \ + + (long int) (number)) + + -long long __nomips16 __mips16_syscall3 (long a0, long a1, long a2, + - long number); + +long long int __nomips16 __mips16_syscall3 (long int a0, long int a1, + + long int a2, + + long int number); + #define __mips16_syscall3(a0, a1, a2, number) \ + - __mips16_syscall3 ((long) (a0), (long) (a1), (long) (a2), \ + - (long) (number)) + + __mips16_syscall3 ((long int) (a0), (long int) (a1), \ + + (long int) (a2), \ + + (long int) (number)) + + -long long __nomips16 __mips16_syscall4 (long a0, long a1, long a2, long a3, + - long number); + +long long int __nomips16 __mips16_syscall4 (long int a0, long int a1, + + long int a2, long int a3, + + long int number); + #define __mips16_syscall4(a0, a1, a2, a3, number) \ + - __mips16_syscall4 ((long) (a0), (long) (a1), (long) (a2), \ + - (long) (a3), \ + - (long) (number)) + + __mips16_syscall4 ((long int) (a0), (long int) (a1), \ + + (long int) (a2), (long int) (a3), \ + + (long int) (number)) + + /* The remaining ones use regular MIPS wrappers. */ + + #define __mips16_syscall5(a0, a1, a2, a3, a4, number) \ + - __mips_syscall5 ((long) (a0), (long) (a1), (long) (a2), \ + - (long) (a3), (long) (a4), \ + - (long) (number)) + + __mips_syscall5 ((long int) (a0), (long int) (a1), \ + + (long int) (a2), (long int) (a3), \ + + (long int) (a4), \ + + (long int) (number)) + + #define __mips16_syscall6(a0, a1, a2, a3, a4, a5, number) \ + - __mips_syscall6 ((long) (a0), (long) (a1), (long) (a2), \ + - (long) (a3), (long) (a4), (long) (a5), \ + - (long) (number)) + + __mips_syscall6 ((long int) (a0), (long int) (a1), \ + + (long int) (a2), (long int) (a3), \ + + (long int) (a4), (long int) (a5), \ + + (long int) (number)) + + #define __mips16_syscall7(a0, a1, a2, a3, a4, a5, a6, number) \ + - __mips_syscall7 ((long) (a0), (long) (a1), (long) (a2), \ + - (long) (a3), (long) (a4), (long) (a5), \ + - (long) (a6), \ + - (long) (number)) + + __mips_syscall7 ((long int) (a0), (long int) (a1), \ + + (long int) (a2), (long int) (a3), \ + + (long int) (a4), (long int) (a5), \ + + (long int) (a6), \ + + (long int) (number)) + + #endif + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c + index 92f16e2724..43c05f8050 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall0.c + @@ -20,8 +20,8 @@ + + #undef __mips16_syscall0 + + -long long __nomips16 + -__mips16_syscall0 (long number) + +long long int __nomips16 + +__mips16_syscall0 (long int number) + { + union __mips_syscall_return ret; + ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 0); + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c + index fa985a96e5..16a567e834 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall1.c + @@ -20,9 +20,9 @@ + + #undef __mips16_syscall1 + + -long long __nomips16 + -__mips16_syscall1 (long a0, + - long number) + +long long int __nomips16 + +__mips16_syscall1 (long int a0, + + long int number) + { + union __mips_syscall_return ret; + ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 1, + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c + index f042ac815d..c0a856c344 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall2.c + @@ -20,9 +20,9 @@ + + #undef __mips16_syscall2 + + -long long __nomips16 + -__mips16_syscall2 (long a0, long a1, + - long number) + +long long int __nomips16 + +__mips16_syscall2 (long int a0, long int a1, + + long int number) + { + union __mips_syscall_return ret; + ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 2, + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c + index dfe2f7feb5..042768ebf2 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall3.c + @@ -20,9 +20,9 @@ + + #undef __mips16_syscall3 + + -long long __nomips16 + -__mips16_syscall3 (long a0, long a1, long a2, + - long number) + +long long int __nomips16 + +__mips16_syscall3 (long int a0, long int a1, long int a2, + + long int number) + { + union __mips_syscall_return ret; + ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 3, + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c + index 39de510357..8658d822ab 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c + +++ b/sysdeps/unix/sysv/linux/mips/mips32/mips16/mips16-syscall4.c + @@ -20,9 +20,9 @@ + + #undef __mips16_syscall4 + + -long long __nomips16 + -__mips16_syscall4 (long a0, long a1, long a2, long a3, + - long number) + +long long int __nomips16 + +__mips16_syscall4 (long int a0, long int a1, long int a2, long int a3, + + long int number) + { + union __mips_syscall_return ret; + ret.reg.v0 = INTERNAL_SYSCALL_MIPS16 (number, ret.reg.v1, 4, + diff --git a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h + index beefcf284b..0c6a83e9b3 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h + +++ b/sysdeps/unix/sysv/linux/mips/mips32/sysdep.h + @@ -52,7 +52,7 @@ + #undef INLINE_SYSCALL + #define INLINE_SYSCALL(name, nr, args...) \ + ({ INTERNAL_SYSCALL_DECL (_sc_err); \ + - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ + { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ + @@ -61,10 +61,10 @@ + result_var; }) + + #undef INTERNAL_SYSCALL_DECL + -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) + +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + + #undef INTERNAL_SYSCALL_ERROR_P + -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) + +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + + #undef INTERNAL_SYSCALL_ERRNO + #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) + @@ -103,11 +103,11 @@ + + union __mips_syscall_return + { + - long long val; + + long long int val; + struct + { + - long v0; + - long v1; + + long int v0; + + long int v1; + } + reg; + }; + @@ -152,13 +152,13 @@ union __mips_syscall_return + + #define internal_syscall0(v0_init, input, number, err, dummy...) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -175,14 +175,15 @@ union __mips_syscall_return + + #define internal_syscall1(v0_init, input, number, err, arg1) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -199,15 +200,17 @@ union __mips_syscall_return + + #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -225,16 +228,19 @@ union __mips_syscall_return + #define internal_syscall3(v0_init, input, number, err, \ + arg1, arg2, arg3) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -252,16 +258,20 @@ union __mips_syscall_return + #define internal_syscall4(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7") = (long) (arg4); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7") = _arg4; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -285,63 +295,66 @@ union __mips_syscall_return + compiler specifics required for the stack arguments to be pushed, + which would be the case if these syscalls were inlined. */ + + -long long __nomips16 __mips_syscall5 (long arg1, long arg2, long arg3, + - long arg4, long arg5, + - long number); + +long long int __nomips16 __mips_syscall5 (long int arg1, long int arg2, + + long int arg3, long int arg4, + + long int arg5, + + long int number); + libc_hidden_proto (__mips_syscall5, nomips16) + + #define internal_syscall5(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5) \ + ({ \ + union __mips_syscall_return _sc_ret; \ + - _sc_ret.val = __mips_syscall5 ((long) (arg1), \ + - (long) (arg2), \ + - (long) (arg3), \ + - (long) (arg4), \ + - (long) (arg5), \ + - (long) (number)); \ + + _sc_ret.val = __mips_syscall5 ((long int) (arg1), \ + + (long int) (arg2), \ + + (long int) (arg3), \ + + (long int) (arg4), \ + + (long int) (arg5), \ + + (long int) (number)); \ + err = _sc_ret.reg.v1; \ + _sc_ret.reg.v0; \ + }) + + -long long __nomips16 __mips_syscall6 (long arg1, long arg2, long arg3, + - long arg4, long arg5, long arg6, + - long number); + +long long int __nomips16 __mips_syscall6 (long int arg1, long int arg2, + + long int arg3, long int arg4, + + long int arg5, long int arg6, + + long int number); + libc_hidden_proto (__mips_syscall6, nomips16) + + #define internal_syscall6(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5, arg6) \ + ({ \ + union __mips_syscall_return _sc_ret; \ + - _sc_ret.val = __mips_syscall6 ((long) (arg1), \ + - (long) (arg2), \ + - (long) (arg3), \ + - (long) (arg4), \ + - (long) (arg5), \ + - (long) (arg6), \ + - (long) (number)); \ + + _sc_ret.val = __mips_syscall6 ((long int) (arg1), \ + + (long int) (arg2), \ + + (long int) (arg3), \ + + (long int) (arg4), \ + + (long int) (arg5), \ + + (long int) (arg6), \ + + (long int) (number)); \ + err = _sc_ret.reg.v1; \ + _sc_ret.reg.v0; \ + }) + + -long long __nomips16 __mips_syscall7 (long arg1, long arg2, long arg3, + - long arg4, long arg5, long arg6, + - long arg7, + - long number); + +long long int __nomips16 __mips_syscall7 (long int arg1, long int arg2, + + long int arg3, long int arg4, + + long int arg5, long int arg6, + + long int arg7, + + long int number); + libc_hidden_proto (__mips_syscall7, nomips16) + + #define internal_syscall7(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5, arg6, arg7) \ + ({ \ + union __mips_syscall_return _sc_ret; \ + - _sc_ret.val = __mips_syscall7 ((long) (arg1), \ + - (long) (arg2), \ + - (long) (arg3), \ + - (long) (arg4), \ + - (long) (arg5), \ + - (long) (arg6), \ + - (long) (arg7), \ + - (long) (number)); \ + + _sc_ret.val = __mips_syscall7 ((long int) (arg1), \ + + (long int) (arg2), \ + + (long int) (arg3), \ + + (long int) (arg4), \ + + (long int) (arg5), \ + + (long int) (arg6), \ + + (long int) (arg7), \ + + (long int) (number)); \ + err = _sc_ret.reg.v1; \ + _sc_ret.reg.v0; \ + }) + diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h + index f96636538a..4a9d7054f9 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h + +++ b/sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h + @@ -47,14 +47,14 @@ + + /* Convert X to a long long, without losing any bits if it is one + already or warning if it is a 32-bit pointer. */ + -#define ARGIFY(X) ((long long) (__typeof__ ((X) - (X))) (X)) + +#define ARGIFY(X) ((long long int) (__typeof__ ((X) - (X))) (X)) + + /* Define a macro which expands into the inline wrapper code for a system + call. */ + #undef INLINE_SYSCALL + #define INLINE_SYSCALL(name, nr, args...) \ + ({ INTERNAL_SYSCALL_DECL (_sc_err); \ + - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ + { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ + @@ -63,10 +63,10 @@ + result_var; }) + + #undef INTERNAL_SYSCALL_DECL + -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) + +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + + #undef INTERNAL_SYSCALL_ERROR_P + -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) + +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + + #undef INTERNAL_SYSCALL_ERRNO + #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) + @@ -112,13 +112,13 @@ + + #define internal_syscall0(v0_init, input, number, err, dummy...) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a3 asm ("$7"); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -135,14 +135,15 @@ + + #define internal_syscall1(v0_init, input, number, err, arg1) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a3 asm ("$7"); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -159,15 +160,17 @@ + + #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + long long int _arg2 = ARGIFY (arg2); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a1 asm ("$5") = ARGIFY (arg2); \ + - register long long __a3 asm ("$7"); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a1 asm ("$5") = _arg2; \ + + register long long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -185,16 +188,19 @@ + #define internal_syscall3(v0_init, input, number, err, \ + arg1, arg2, arg3) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + long long int _arg2 = ARGIFY (arg2); \ + + long long int _arg3 = ARGIFY (arg3); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a1 asm ("$5") = ARGIFY (arg2); \ + - register long long __a2 asm ("$6") = ARGIFY (arg3); \ + - register long long __a3 asm ("$7"); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a1 asm ("$5") = _arg2; \ + + register long long int __a2 asm ("$6") = _arg3; \ + + register long long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -212,16 +218,20 @@ + #define internal_syscall4(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + long long int _arg2 = ARGIFY (arg2); \ + + long long int _arg3 = ARGIFY (arg3); \ + + long long int _arg4 = ARGIFY (arg4); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a1 asm ("$5") = ARGIFY (arg2); \ + - register long long __a2 asm ("$6") = ARGIFY (arg3); \ + - register long long __a3 asm ("$7") = ARGIFY (arg4); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a1 asm ("$5") = _arg2; \ + + register long long int __a2 asm ("$6") = _arg3; \ + + register long long int __a3 asm ("$7") = _arg4; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -239,17 +249,22 @@ + #define internal_syscall5(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + long long int _arg2 = ARGIFY (arg2); \ + + long long int _arg3 = ARGIFY (arg3); \ + + long long int _arg4 = ARGIFY (arg4); \ + + long long int _arg5 = ARGIFY (arg5); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a1 asm ("$5") = ARGIFY (arg2); \ + - register long long __a2 asm ("$6") = ARGIFY (arg3); \ + - register long long __a3 asm ("$7") = ARGIFY (arg4); \ + - register long long __a4 asm ("$8") = ARGIFY (arg5); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a1 asm ("$5") = _arg2; \ + + register long long int __a2 asm ("$6") = _arg3; \ + + register long long int __a3 asm ("$7") = _arg4; \ + + register long long int __a4 asm ("$8") = _arg5; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ v0_init \ - @@ -135,14 +135,15 @@ + @@ -267,18 +282,24 @@ + #define internal_syscall6(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5, arg6) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long long __s0 asm ("$16") __attribute__ ((unused)) \ + + long long int _arg1 = ARGIFY (arg1); \ + + long long int _arg2 = ARGIFY (arg2); \ + + long long int _arg3 = ARGIFY (arg3); \ + + long long int _arg4 = ARGIFY (arg4); \ + + long long int _arg5 = ARGIFY (arg5); \ + + long long int _arg6 = ARGIFY (arg6); \ + + register long long int __s0 asm ("$16") __attribute__ ((unused))\ + = (number); \ + - register long long __v0 asm ("$2"); \ + - register long long __a0 asm ("$4") = ARGIFY (arg1); \ + - register long long __a1 asm ("$5") = ARGIFY (arg2); \ + - register long long __a2 asm ("$6") = ARGIFY (arg3); \ + - register long long __a3 asm ("$7") = ARGIFY (arg4); \ + - register long long __a4 asm ("$8") = ARGIFY (arg5); \ + - register long long __a5 asm ("$9") = ARGIFY (arg6); \ + + register long long int __v0 asm ("$2"); \ + + register long long int __a0 asm ("$4") = _arg1; \ + + register long long int __a1 asm ("$5") = _arg2; \ + + register long long int __a2 asm ("$6") = _arg3; \ + + register long long int __a3 asm ("$7") = _arg4; \ + + register long long int __a4 asm ("$8") = _arg5; \ + + register long long int __a5 asm ("$9") = _arg6; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h + index 9d30291f84..3e1f1cc3c5 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h + +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h + @@ -50,7 +50,7 @@ + #undef INLINE_SYSCALL + #define INLINE_SYSCALL(name, nr, args...) \ + ({ INTERNAL_SYSCALL_DECL (_sc_err); \ + - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ + if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ + { \ + __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ + @@ -59,10 +59,10 @@ + result_var; }) + + #undef INTERNAL_SYSCALL_DECL + -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) + +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + + #undef INTERNAL_SYSCALL_ERROR_P + -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) + +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + + #undef INTERNAL_SYSCALL_ERRNO + #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) + @@ -108,13 +108,13 @@ + + #define internal_syscall0(v0_init, input, number, err, dummy...) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -131,14 +131,15 @@ + + #define internal_syscall1(v0_init, input, number, err, arg1) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -155,15 +156,17 @@ + + #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -181,16 +184,19 @@ + #define internal_syscall3(v0_init, input, number, err, \ + arg1, arg2, arg3) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7"); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7"); \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -208,16 +214,20 @@ + #define internal_syscall4(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7") = (long) (arg4); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7") = _arg4; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -235,17 +245,22 @@ + #define internal_syscall5(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + long int _arg5 = (long int) (arg5); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7") = (long) (arg4); \ + - register long __a4 asm ("$8") = (long) (arg5); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7") = _arg4; \ + + register long int __a4 asm ("$8") = _arg5; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + @@ -263,18 +278,24 @@ + #define internal_syscall6(v0_init, input, number, err, \ + arg1, arg2, arg3, arg4, arg5, arg6) \ + ({ \ + - long _sys_result; \ + + long int _sys_result; \ + \ + { \ + - register long __s0 asm ("$16") __attribute__ ((unused)) \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + long int _arg5 = (long int) (arg5); \ + + long int _arg6 = (long int) (arg6); \ + + register long int __s0 asm ("$16") __attribute__ ((unused)) \ + = (number); \ + - register long __v0 asm ("$2"); \ + - register long __a0 asm ("$4") = (long) (arg1); \ + - register long __a1 asm ("$5") = (long) (arg2); \ + - register long __a2 asm ("$6") = (long) (arg3); \ + - register long __a3 asm ("$7") = (long) (arg4); \ + - register long __a4 asm ("$8") = (long) (arg5); \ + - register long __a5 asm ("$9") = (long) (arg6); \ + + register long int __v0 asm ("$2"); \ + + register long int __a0 asm ("$4") = _arg1; \ + + register long int __a1 asm ("$5") = _arg2; \ + + register long int __a2 asm ("$6") = _arg3; \ + + register long int __a3 asm ("$7") = _arg4; \ + + register long int __a4 asm ("$8") = _arg5; \ + + register long int __a5 asm ("$9") = _arg6; \ + __asm__ volatile ( \ + ".set\tnoreorder\n\t" \ + v0_init \ + diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S + index 26adf2cd04..a9baff3c17 100644 + --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S + +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S + @@ -20,7 +20,7 @@ + #include + + /* Usage: + - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + + We need to do some arg shifting, syscall_number will be in v0. */ + + diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h + index cdfc0b1b58..a4cf1540fe 100644 + --- a/sysdeps/unix/sysv/linux/mips/sysdep.h + +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h + @@ -36,8 +36,8 @@ + the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */ + #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \ + ({ \ + - long _ret = funcptr (args); \ + - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \ + + long int _ret = funcptr (args); \ + + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \ + if (err) \ + _ret = -_ret; \ + _ret; \ + diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h + new file mode 100644 + index 0000000000..a009899983 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h + @@ -0,0 +1,67 @@ + +/* Return backtrace of current program state. Arch-specific bits. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#ifndef _UNWIND_ARCH_H + +#define _UNWIND_ARCH_H + + + +#include + + + +/* MIPS fallback code handle a frame where its FDE can not be obtained + + (for instance a signal frame) by reading the kernel allocated signal frame + + and adding '2' to the value of 'sc_pc' [1]. The added value is used to + + recognize an end of an EH region on mips16 [2]. + + + + The idea here is to adjust the obtained signal frame ADDR value and remove + + the libgcc added value by checking if the previous frame is a signal frame + + one. + + + + [1] libgcc/config/mips/linux-unwind.h from gcc code. + + [2] gcc/config/mips/mips.h from gcc code. */ + + + +static inline void * + +unwind_arch_adjustment (void *prev, void *addr) + +{ + + uint32_t *pc = (uint32_t *) prev; + + + + if (pc == NULL) + + return addr; + + + + /* For MIPS16 or microMIPS frame libgcc makes no adjustment. */ + + if ((uintptr_t) pc & 0x3) + + return addr; + + + + /* The vDSO containes either + + + + 24021061 li v0, 0x1061 (rt_sigreturn) + + 0000000c syscall + + or + + 24021017 li v0, 0x1017 (sigreturn) + + 0000000c syscall */ + + if (pc[1] != 0x0000000c) + + return addr; + +#if _MIPS_SIM == _ABIO32 + + if (pc[0] == (0x24020000 | __NR_sigreturn)) + + return (void *) ((uintptr_t) addr - 2); + +#endif + + if (pc[0] == (0x24020000 | __NR_rt_sigreturn)) + + return (void *) ((uintptr_t) addr - 2); + + + + return addr; + +} + + + +#endif + diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c + index 27879e76cd..fd46aec1a0 100644 + --- a/sysdeps/unix/sysv/linux/msgctl.c + +++ b/sysdeps/unix/sysv/linux/msgctl.c + @@ -21,6 +21,7 @@ + #include + #include + #include + +#include /* For __kernel_mode_t. */ + + #ifndef DEFAULT_VERSION + # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T + @@ -61,7 +62,6 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) + + int ret = msgctl_syscall (msqid, cmd, buf); + + -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + if (ret >= 0) + { + switch (cmd) + @@ -69,10 +69,16 @@ __new_msgctl (int msqid, int cmd, struct msqid_ds *buf) + case IPC_STAT: + case MSG_STAT: + case MSG_STAT_ANY: + +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + buf->msg_perm.mode >>= 16; + +#else + + /* Old Linux kernel versions might not clear the mode padding. */ + + if (sizeof ((struct msqid_ds){0}.msg_perm.mode) + + != sizeof (__kernel_mode_t)) + + buf->msg_perm.mode &= 0xFFFF; + +#endif + } + } + -#endif + + return ret; + } + diff --git a/sysdeps/unix/sysv/linux/nios2/kernel-features.h b/sysdeps/unix/sysv/linux/nios2/kernel-features.h + deleted file mode 100644 + index d68d114981..0000000000 + --- a/sysdeps/unix/sysv/linux/nios2/kernel-features.h + +++ /dev/null + @@ -1,22 +0,0 @@ + -/* Set flags signalling availability of kernel features based on given + - kernel version number. NIOS2 version. + - Copyright (C) 2019-2020 Free Software Foundation, Inc. + - This file is part of the GNU C Library. + - + - The GNU C Library is free software; you can redistribute it and/or + - modify it under the terms of the GNU Lesser General Public + - License as published by the Free Software Foundation; either + - version 2.1 of the License, or (at your option) any later version. + - + - The GNU C Library is distributed in the hope that it will be useful, + - but WITHOUT ANY WARRANTY; without even the implied warranty of + - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + - Lesser General Public License for more details. + - + - You should have received a copy of the GNU Lesser General Public + - License along with the GNU C Library; if not, see + - . */ + - + -#include_next + - + -#undef __ASSUME_SYSVIPC_DEFAULT_IPC_64 + diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h + index 725dfafde8..ffc150851e 100644 + --- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h + +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h + @@ -134,47 +134,47 @@ + # define LOADARGS_0(name, dummy) \ + r0 = name + # define LOADARGS_1(name, __arg1) \ + - long int arg1 = (long int) (__arg1); \ + + long int _arg1 = (long int) (__arg1); \ + LOADARGS_0(name, 0); \ + extern void __illegally_sized_syscall_arg1 (void); \ + if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 4) \ + __illegally_sized_syscall_arg1 (); \ + - r3 = arg1 + + r3 = _arg1 + # define LOADARGS_2(name, __arg1, __arg2) \ + - long int arg2 = (long int) (__arg2); \ + + long int _arg2 = (long int) (__arg2); \ + LOADARGS_1(name, __arg1); \ + extern void __illegally_sized_syscall_arg2 (void); \ + if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 4) \ + __illegally_sized_syscall_arg2 (); \ + - r4 = arg2 + + r4 = _arg2 + # define LOADARGS_3(name, __arg1, __arg2, __arg3) \ + - long int arg3 = (long int) (__arg3); \ + + long int _arg3 = (long int) (__arg3); \ + LOADARGS_2(name, __arg1, __arg2); \ + extern void __illegally_sized_syscall_arg3 (void); \ + if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 4) \ + __illegally_sized_syscall_arg3 (); \ + - r5 = arg3 + + r5 = _arg3 + # define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ + - long int arg4 = (long int) (__arg4); \ + + long int _arg4 = (long int) (__arg4); \ + LOADARGS_3(name, __arg1, __arg2, __arg3); \ + extern void __illegally_sized_syscall_arg4 (void); \ + if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 4) \ + __illegally_sized_syscall_arg4 (); \ + - r6 = arg4 + + r6 = _arg4 + # define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ + - long int arg5 = (long int) (__arg5); \ + + long int _arg5 = (long int) (__arg5); \ + LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ + extern void __illegally_sized_syscall_arg5 (void); \ + if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 4) \ + __illegally_sized_syscall_arg5 (); \ + - r7 = arg5 + + r7 = _arg5 + # define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ + - long int arg6 = (long int) (__arg6); \ + + long int _arg6 = (long int) (__arg6); \ + LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ + extern void __illegally_sized_syscall_arg6 (void); \ + if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 4) \ + __illegally_sized_syscall_arg6 (); \ + - r8 = arg6 + + r8 = _arg6 + + # define ASM_INPUT_0 "0" (r0) + # define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) + diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h + index ee7f43653d..8a3f1c43e4 100644 + --- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h + +++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h + @@ -139,47 +139,47 @@ + #define LOADARGS_0(name, dummy) \ + r0 = name + #define LOADARGS_1(name, __arg1) \ + - long int arg1 = (long int) (__arg1); \ + + long int _arg1 = (long int) (__arg1); \ + LOADARGS_0(name, 0); \ + extern void __illegally_sized_syscall_arg1 (void); \ + if (__builtin_classify_type (__arg1) != 5 && sizeof (__arg1) > 8) \ + __illegally_sized_syscall_arg1 (); \ + - r3 = arg1 + + r3 = _arg1 + #define LOADARGS_2(name, __arg1, __arg2) \ + - long int arg2 = (long int) (__arg2); \ + + long int _arg2 = (long int) (__arg2); \ + LOADARGS_1(name, __arg1); \ + extern void __illegally_sized_syscall_arg2 (void); \ + if (__builtin_classify_type (__arg2) != 5 && sizeof (__arg2) > 8) \ + __illegally_sized_syscall_arg2 (); \ + - r4 = arg2 + + r4 = _arg2 + #define LOADARGS_3(name, __arg1, __arg2, __arg3) \ + - long int arg3 = (long int) (__arg3); \ + + long int _arg3 = (long int) (__arg3); \ + LOADARGS_2(name, __arg1, __arg2); \ + extern void __illegally_sized_syscall_arg3 (void); \ + if (__builtin_classify_type (__arg3) != 5 && sizeof (__arg3) > 8) \ + __illegally_sized_syscall_arg3 (); \ + - r5 = arg3 + + r5 = _arg3 + #define LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4) \ + - long int arg4 = (long int) (__arg4); \ + + long int _arg4 = (long int) (__arg4); \ + LOADARGS_3(name, __arg1, __arg2, __arg3); \ + extern void __illegally_sized_syscall_arg4 (void); \ + if (__builtin_classify_type (__arg4) != 5 && sizeof (__arg4) > 8) \ + __illegally_sized_syscall_arg4 (); \ + - r6 = arg4 + + r6 = _arg4 + #define LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5) \ + - long int arg5 = (long int) (__arg5); \ + + long int _arg5 = (long int) (__arg5); \ + LOADARGS_4(name, __arg1, __arg2, __arg3, __arg4); \ + extern void __illegally_sized_syscall_arg5 (void); \ + if (__builtin_classify_type (__arg5) != 5 && sizeof (__arg5) > 8) \ + __illegally_sized_syscall_arg5 (); \ + - r7 = arg5 + + r7 = _arg5 + #define LOADARGS_6(name, __arg1, __arg2, __arg3, __arg4, __arg5, __arg6) \ + - long int arg6 = (long int) (__arg6); \ + + long int _arg6 = (long int) (__arg6); \ + LOADARGS_5(name, __arg1, __arg2, __arg3, __arg4, __arg5); \ + extern void __illegally_sized_syscall_arg6 (void); \ + if (__builtin_classify_type (__arg6) != 5 && sizeof (__arg6) > 8) \ + __illegally_sized_syscall_arg6 (); \ + - r8 = arg6 + + r8 = _arg6 + + #define ASM_INPUT_0 "0" (r0) + #define ASM_INPUT_1 ASM_INPUT_0, "1" (r3) + diff --git a/sysdeps/unix/sysv/linux/prctl.c b/sysdeps/unix/sysv/linux/prctl.c + new file mode 100644 + index 0000000000..d5725f14cf + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/prctl.c + @@ -0,0 +1,42 @@ + +/* prctl - Linux specific syscall. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + + + +/* Unconditionally read all potential arguments. This may pass + + garbage values to the kernel, but avoids the need for teaching + + glibc the argument counts of individual options (including ones + + that are added to the kernel in the future). */ + + + +int + +__prctl (int option, ...) + +{ + + va_list arg; + + va_start (arg, option); + + unsigned long int arg2 = va_arg (arg, unsigned long int); + + unsigned long int arg3 = va_arg (arg, unsigned long int); + + unsigned long int arg4 = va_arg (arg, unsigned long int); + + unsigned long int arg5 = va_arg (arg, unsigned long int); + + va_end (arg); + + return INLINE_SYSCALL_CALL (prctl, option, arg2, arg3, arg4, arg5); + +} + + + +libc_hidden_def (__prctl) + +weak_alias (__prctl, prctl) + diff --git a/sysdeps/unix/sysv/linux/process_vm_readv.c b/sysdeps/unix/sysv/linux/process_vm_readv.c + new file mode 100644 + index 0000000000..e1377f7e50 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/process_vm_readv.c + @@ -0,0 +1,32 @@ + +/* process_vm_readv - Linux specific syscall. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + +#include + + + +ssize_t + +process_vm_readv (pid_t pid, const struct iovec *local_iov, + + unsigned long int liovcnt, + + const struct iovec *remote_iov, + + unsigned long int riovcnt, unsigned long int flags) + +{ + + return INLINE_SYSCALL_CALL (process_vm_readv, pid, local_iov, + + liovcnt, remote_iov, riovcnt, flags); + +} + diff --git a/sysdeps/unix/sysv/linux/process_vm_writev.c b/sysdeps/unix/sysv/linux/process_vm_writev.c + new file mode 100644 + index 0000000000..944ab9b7f1 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/process_vm_writev.c + @@ -0,0 +1,32 @@ + +/* process_vm_writev - Linux specific syscall. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + +#include + + + +ssize_t + +process_vm_writev (pid_t pid, const struct iovec *local_iov, + + unsigned long int liovcnt, + + const struct iovec *remote_iov, + + unsigned long int riovcnt, unsigned long int flags) + +{ + + return INLINE_SYSCALL_CALL (process_vm_writev, pid, local_iov, + + liovcnt, remote_iov, riovcnt, flags); + +} + diff --git a/sysdeps/unix/sysv/linux/ptsname.c b/sysdeps/unix/sysv/linux/ptsname.c + index 81d9d26f1e..3e9be3f0d4 100644 + --- a/sysdeps/unix/sysv/linux/ptsname.c + +++ b/sysdeps/unix/sysv/linux/ptsname.c + @@ -21,39 +21,14 @@ + #include + #include + #include + -#include + -#include + #include + #include + + #include <_itoa.h> + + -/* Check if DEV corresponds to a master pseudo terminal device. */ + -#define MASTER_P(Dev) \ + - (__gnu_dev_major ((Dev)) == 2 \ + - || (__gnu_dev_major ((Dev)) == 4 \ + - && __gnu_dev_minor ((Dev)) >= 128 && __gnu_dev_minor ((Dev)) < 192) \ + - || (__gnu_dev_major ((Dev)) >= 128 && __gnu_dev_major ((Dev)) < 136)) + - + -/* Check if DEV corresponds to a slave pseudo terminal device. */ + -#define SLAVE_P(Dev) \ + - (__gnu_dev_major ((Dev)) == 3 \ + - || (__gnu_dev_major ((Dev)) == 4 \ + - && __gnu_dev_minor ((Dev)) >= 192 && __gnu_dev_minor ((Dev)) < 256) \ + - || (__gnu_dev_major ((Dev)) >= 136 && __gnu_dev_major ((Dev)) < 144)) + - + -/* Note that major number 4 corresponds to the old BSD style pseudo + - terminal devices. As of Linux 2.1.115 these are no longer + - supported. They have been replaced by major numbers 2 (masters) + - and 3 (slaves). */ + - + /* Directory where we can find the slave pty nodes. */ + #define _PATH_DEVPTS "/dev/pts/" + + -/* The are declared in getpt.c. */ + -extern const char __libc_ptyname1[] attribute_hidden; + -extern const char __libc_ptyname2[] attribute_hidden; + - + /* Static buffer for `ptsname'. */ + static char buffer[sizeof (_PATH_DEVPTS) + 20]; + + @@ -68,19 +43,15 @@ ptsname (int fd) + } + + + +/* Store at most BUFLEN characters of the pathname of the slave pseudo + + terminal associated with the master FD is open on in BUF. + + Return 0 on success, otherwise an error number. */ + int + -__ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) + +__ptsname_r (int fd, char *buf, size_t buflen) + { + int save_errno = errno; + unsigned int ptyno; + + - if (!__isatty (fd)) + - { + - __set_errno (ENOTTY); + - return ENOTTY; + - } + - + -#ifdef TIOCGPTN + if (__ioctl (fd, TIOCGPTN, &ptyno) == 0) + { + /* Buffer we use to print the number in. For a maximum size for + @@ -101,67 +72,11 @@ __ptsname_internal (int fd, char *buf, size_t buflen, struct stat64 *stp) + + memcpy (__stpcpy (buf, devpts), p, &numbuf[sizeof (numbuf)] - p); + } + - else if (errno != EINVAL) + - return errno; + else + -#endif + - { + - char *p; + - + - if (buflen < strlen (_PATH_TTY) + 3) + - { + - __set_errno (ERANGE); + - return ERANGE; + - } + - + - if (__fxstat64 (_STAT_VER, fd, stp) < 0) + - return errno; + - + - /* Check if FD really is a master pseudo terminal. */ + - if (! MASTER_P (stp->st_rdev)) + - { + - __set_errno (ENOTTY); + - return ENOTTY; + - } + - + - ptyno = __gnu_dev_minor (stp->st_rdev); + - + - if (ptyno / 16 >= strlen (__libc_ptyname1)) + - { + - __set_errno (ENOTTY); + - return ENOTTY; + - } + - + - p = __stpcpy (buf, _PATH_TTY); + - p[0] = __libc_ptyname1[ptyno / 16]; + - p[1] = __libc_ptyname2[ptyno % 16]; + - p[2] = '\0'; + - } + - + - if (__xstat64 (_STAT_VER, buf, stp) < 0) + + /* Bad file descriptor, or not a ptmx descriptor. */ + return errno; + + - /* Check if the name we're about to return really corresponds to a + - slave pseudo terminal. */ + - if (! S_ISCHR (stp->st_mode) || ! SLAVE_P (stp->st_rdev)) + - { + - /* This really is a configuration problem. */ + - __set_errno (ENOTTY); + - return ENOTTY; + - } + - + __set_errno (save_errno); + return 0; + } + - + - + -/* Store at most BUFLEN characters of the pathname of the slave pseudo + - terminal associated with the master FD is open on in BUF. + - Return 0 on success, otherwise an error number. */ + -int + -__ptsname_r (int fd, char *buf, size_t buflen) + -{ + - struct stat64 st; + - return __ptsname_internal (fd, buf, buflen, &st); + -} + weak_alias (__ptsname_r, ptsname_r) + diff --git a/sysdeps/unix/sysv/linux/riscv/sysdep.h b/sysdeps/unix/sysv/linux/riscv/sysdep.h + index 201bf9a91b..2bd9b16f32 100644 + --- a/sysdeps/unix/sysv/linux/riscv/sysdep.h + +++ b/sysdeps/unix/sysv/linux/riscv/sysdep.h + @@ -176,10 +176,11 @@ + # define internal_syscall1(number, err, arg0) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + + register long int __a0 asm ("a0") = _arg0; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -193,11 +194,13 @@ + # define internal_syscall2(number, err, arg0, arg1) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -211,12 +214,15 @@ + # define internal_syscall3(number, err, arg0, arg1, arg2) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + - register long int __a2 asm ("a2") = (long int) (arg2); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + + register long int __a2 asm ("a2") = _arg2; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -230,13 +236,17 @@ + # define internal_syscall4(number, err, arg0, arg1, arg2, arg3) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + - register long int __a2 asm ("a2") = (long int) (arg2); \ + - register long int __a3 asm ("a3") = (long int) (arg3); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + + register long int __a2 asm ("a2") = _arg2; \ + + register long int __a3 asm ("a3") = _arg3; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -250,14 +260,19 @@ + # define internal_syscall5(number, err, arg0, arg1, arg2, arg3, arg4) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + - register long int __a2 asm ("a2") = (long int) (arg2); \ + - register long int __a3 asm ("a3") = (long int) (arg3); \ + - register long int __a4 asm ("a4") = (long int) (arg4); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + + register long int __a2 asm ("a2") = _arg2; \ + + register long int __a3 asm ("a3") = _arg3; \ + + register long int __a4 asm ("a4") = _arg4; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -271,15 +286,21 @@ + # define internal_syscall6(number, err, arg0, arg1, arg2, arg3, arg4, arg5) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + long int _arg5 = (long int) (arg5); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + - register long int __a2 asm ("a2") = (long int) (arg2); \ + - register long int __a3 asm ("a3") = (long int) (arg3); \ + - register long int __a4 asm ("a4") = (long int) (arg4); \ + - register long int __a5 asm ("a5") = (long int) (arg5); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + + register long int __a2 asm ("a2") = _arg2; \ + + register long int __a3 asm ("a3") = _arg3; \ + + register long int __a4 asm ("a4") = _arg4; \ + + register long int __a5 asm ("a5") = _arg5; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + @@ -294,16 +315,23 @@ + # define internal_syscall7(number, err, arg0, arg1, arg2, arg3, arg4, arg5, arg6) \ + ({ \ + long int _sys_result; \ + + long int _arg0 = (long int) (arg0); \ + + long int _arg1 = (long int) (arg1); \ + + long int _arg2 = (long int) (arg2); \ + + long int _arg3 = (long int) (arg3); \ + + long int _arg4 = (long int) (arg4); \ + + long int _arg5 = (long int) (arg5); \ + + long int _arg6 = (long int) (arg6); \ + \ + { \ + register long int __a7 asm ("a7") = number; \ + - register long int __a0 asm ("a0") = (long int) (arg0); \ + - register long int __a1 asm ("a1") = (long int) (arg1); \ + - register long int __a2 asm ("a2") = (long int) (arg2); \ + - register long int __a3 asm ("a3") = (long int) (arg3); \ + - register long int __a4 asm ("a4") = (long int) (arg4); \ + - register long int __a5 asm ("a5") = (long int) (arg5); \ + - register long int __a6 asm ("a6") = (long int) (arg6); \ + + register long int __a0 asm ("a0") = _arg0; \ + + register long int __a1 asm ("a1") = _arg1; \ + + register long int __a2 asm ("a2") = _arg2; \ + + register long int __a3 asm ("a3") = _arg3; \ + + register long int __a4 asm ("a4") = _arg4; \ + + register long int __a5 asm ("a5") = _arg5; \ + + register long int __a6 asm ("a6") = _arg6; \ + __asm__ volatile ( \ + "scall\n\t" \ + : "+r" (__a0) \ + diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c + index 0c3eb0932f..30571af49f 100644 + --- a/sysdeps/unix/sysv/linux/semctl.c + +++ b/sysdeps/unix/sysv/linux/semctl.c + @@ -22,6 +22,7 @@ + #include + #include + #include + +#include /* For __kernel_mode_t. */ + + /* Define a `union semun' suitable for Linux here. */ + union semun + @@ -92,7 +93,6 @@ __new_semctl (int semid, int semnum, int cmd, ...) + + int ret = semctl_syscall (semid, semnum, cmd, arg); + + -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + if (ret >= 0) + { + switch (cmd) + @@ -100,10 +100,16 @@ __new_semctl (int semid, int semnum, int cmd, ...) + case IPC_STAT: + case SEM_STAT: + case SEM_STAT_ANY: + +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + arg.buf->sem_perm.mode >>= 16; + +#else + + /* Old Linux kernel versions might not clear the mode padding. */ + + if (sizeof ((struct semid_ds){0}.sem_perm.mode) + + != sizeof (__kernel_mode_t)) + + arg.buf->sem_perm.mode &= 0xFFFF; + +#endif + } + } + -#endif + + return ret; + } + diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies + new file mode 100644 + index 0000000000..7eeaf15a5a + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies + @@ -0,0 +1 @@ + +unix/sysv/linux/sh/sh4/fpu + diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies + new file mode 100644 + index 0000000000..7eeaf15a5a + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies + @@ -0,0 +1 @@ + +unix/sysv/linux/sh/sh4/fpu + diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c + index 39fa861e17..f41b359b8b 100644 + --- a/sysdeps/unix/sysv/linux/shmctl.c + +++ b/sysdeps/unix/sysv/linux/shmctl.c + @@ -22,6 +22,7 @@ + #include + #include + #include + +#include /* For __kernel_mode_t. */ + + #ifndef DEFAULT_VERSION + # ifndef __ASSUME_SYSVIPC_BROKEN_MODE_T + @@ -63,7 +64,6 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) + + int ret = shmctl_syscall (shmid, cmd, buf); + + -#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + if (ret >= 0) + { + switch (cmd) + @@ -71,10 +71,16 @@ __new_shmctl (int shmid, int cmd, struct shmid_ds *buf) + case IPC_STAT: + case SHM_STAT: + case SHM_STAT_ANY: + +#ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T + buf->shm_perm.mode >>= 16; + +#else + + /* Old Linux kernel versions might not clear the mode padding. */ + + if (sizeof ((struct shmid_ds){0}.shm_perm.mode) + + != sizeof (__kernel_mode_t)) + + buf->shm_perm.mode &= 0xFFFF; + +#endif + } + } + -#endif + + return ret; + } + diff --git a/sysdeps/unix/sysv/linux/sparc/Makefile b/sysdeps/unix/sysv/linux/sparc/Makefile + index b0d182a439..1475039677 100644 + --- a/sysdeps/unix/sysv/linux/sparc/Makefile + +++ b/sysdeps/unix/sysv/linux/sparc/Makefile + @@ -11,8 +11,12 @@ ifeq ($(subdir),sysvipc) + sysdep_routines += getshmlba + endif + + +ifeq ($(subdir),signal) + +sysdep_routines += sigreturn_stub + +endif + + + ifeq ($(subdir),nptl) + # pull in __syscall_error routine + -libpthread-routines += sysdep + -libpthread-shared-only-routines += sysdep + +libpthread-routines += sysdep sigreturn_stub + +libpthread-shared-only-routines += sysdep sigreturn_stub + endif + diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c + index 6b2f664226..938aa7aa8c 100644 + --- a/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c + +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigaction.c + @@ -24,8 +24,8 @@ + #include + #include + + -static void __rt_sigreturn_stub (void); + -static void __sigreturn_stub (void); + +void __rt_sigreturn_stub (void); + +void __sigreturn_stub (void); + + #define STUB(act, sigsetsize) \ + (act) ? ((unsigned long)((act->sa_flags & SA_SIGINFO) \ + @@ -35,25 +35,3 @@ static void __sigreturn_stub (void); + (sigsetsize) + + #include + - + -static + -inhibit_stack_protector + -void + -__rt_sigreturn_stub (void) + -{ + - __asm__ ("mov %0, %%g1\n\t" + - "ta 0x10\n\t" + - : /* no outputs */ + - : "i" (__NR_rt_sigreturn)); + -} + - + -static + -inhibit_stack_protector + -void + -__sigreturn_stub (void) + -{ + - __asm__ ("mov %0, %%g1\n\t" + - "ta 0x10\n\t" + - : /* no outputs */ + - : "i" (__NR_sigreturn)); + -} + diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S + new file mode 100644 + index 0000000000..727cc94737 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/sparc/sparc32/sigreturn_stub.S + @@ -0,0 +1,34 @@ + +/* Sigreturn stub function used on sa_restore field. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +/* These functions must not change the register window or the stack + + pointer [1]. + + + + [1] https://lkml.org/lkml/2016/5/27/465 */ + + + +ENTRY (__rt_sigreturn_stub) + + mov __NR_rt_sigreturn, %g1 + + ta 0x10 + +END (__rt_sigreturn_stub) + + + +ENTRY (__sigreturn_stub) + + mov __NR_sigreturn, %g1 + + ta 0x10 + +END (__sigreturn_stub) + diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c + index 9c0dc2a630..4e26172321 100644 + --- a/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c + +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigaction.c + @@ -22,21 +22,11 @@ + #include + #include + + -static void __rt_sigreturn_stub (void); + +/* Defined on sigreturn_stub.S. */ + +void __rt_sigreturn_stub (void); + + #define STUB(act, sigsetsize) \ + (((unsigned long) &__rt_sigreturn_stub) - 8), \ + (sigsetsize) + + #include + - + -static + -inhibit_stack_protector + -void + -__rt_sigreturn_stub (void) + -{ + - __asm__ ("mov %0, %%g1\n\t" + - "ta 0x6d\n\t" + - : /* no outputs */ + - : "i" (__NR_rt_sigreturn)); + -} + diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S + new file mode 100644 + index 0000000000..add4766831 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/sigreturn_stub.S + @@ -0,0 +1,29 @@ + +/* Sigreturn stub function used on sa_restore field. + + Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +/* This function must not change the register window or the stack + + pointer [1]. + + + + [1] https://lkml.org/lkml/2016/5/27/465 */ + + + +ENTRY (__rt_sigreturn_stub) + + mov __NR_rt_sigreturn, %g1 + + ta 0x6d + +END (__rt_sigreturn_stub) + diff --git a/sysdeps/unix/sysv/linux/syscall-names.list b/sysdeps/unix/sysv/linux/syscall-names.list + index 36e087d8f4..3d89814003 100644 + --- a/sysdeps/unix/sysv/linux/syscall-names.list + +++ b/sysdeps/unix/sysv/linux/syscall-names.list + @@ -21,8 +21,8 @@ + # This file can list all potential system calls. The names are only + # used if the installed kernel headers also provide them. + + -# The list of system calls is current as of Linux 5.4. + -kernel 5.4 + +# The list of system calls is current as of Linux 5.5. + +kernel 5.5 + + FAST_atomic_update + FAST_cmpxchg + diff --git a/sysdeps/unix/sysv/linux/syscalls.list b/sysdeps/unix/sysv/linux/syscalls.list + index 5f1352ad43..52e6dafc86 100644 + --- a/sysdeps/unix/sysv/linux/syscalls.list + +++ b/sysdeps/unix/sysv/linux/syscalls.list + @@ -28,25 +28,24 @@ inotify_add_watch EXTRA inotify_add_watch i:isi inotify_add_watch + inotify_init EXTRA inotify_init i: inotify_init + inotify_init1 EXTRA inotify_init1 i:I inotify_init1 + inotify_rm_watch EXTRA inotify_rm_watch i:ii inotify_rm_watch + -ioperm - ioperm i:iii ioperm + +ioperm - ioperm i:UUi ioperm + iopl - iopl i:i iopl + klogctl EXTRA syslog i:isi klogctl + lchown - lchown i:sii __lchown lchown + -mincore - mincore i:anV mincore + -mlock - mlock i:bn mlock + +mincore - mincore i:aUV mincore + +mlock - mlock i:bU mlock + mlockall - mlockall i:i mlockall + -mount EXTRA mount i:sssip __mount mount + -mremap EXTRA mremap b:ainip __mremap mremap + -munlock - munlock i:ai munlock + +mount EXTRA mount i:sssUp __mount mount + +mremap EXTRA mremap b:aUUip __mremap mremap + +munlock - munlock i:aU munlock + munlockall - munlockall i: munlockall + nfsservctl EXTRA nfsservctl i:ipp __compat_nfsservctl nfsservctl@GLIBC_2.0:GLIBC_2.28 + pipe - pipe i:f __pipe pipe + pipe2 - pipe2 i:fi __pipe2 pipe2 + pivot_root EXTRA pivot_root i:ss pivot_root + -prctl EXTRA prctl i:iiiii __prctl prctl + query_module EXTRA query_module i:sipip __compat_query_module query_module@GLIBC_2.0:GLIBC_2.23 + quotactl EXTRA quotactl i:isip quotactl + -remap_file_pages - remap_file_pages i:piiii __remap_file_pages remap_file_pages + +remap_file_pages - remap_file_pages i:pUiUi __remap_file_pages remap_file_pages + sched_getp - sched_getparam i:ip __sched_getparam sched_getparam + sched_gets - sched_getscheduler i:i __sched_getscheduler sched_getscheduler + sched_primax - sched_get_priority_max i:i __sched_get_priority_max sched_get_priority_max + @@ -55,8 +54,8 @@ sched_rr_gi - sched_rr_get_interval i:ip __sched_rr_get_interval sched_rr_get_in + sched_setp - sched_setparam i:ip __sched_setparam sched_setparam + sched_sets - sched_setscheduler i:iip __sched_setscheduler sched_setscheduler + sched_yield - sched_yield i: __sched_yield sched_yield + -sendfile - sendfile i:iipi sendfile + -sendfile64 - sendfile64 i:iipi sendfile64 + +sendfile - sendfile i:iipU sendfile + +sendfile64 - sendfile64 i:iipU sendfile64 + setfsgid EXTRA setfsgid i:i setfsgid + setfsuid EXTRA setfsuid i:i setfsuid + setpgid - setpgid i:ii __setpgid setpgid + @@ -73,19 +72,19 @@ chown - chown i:sii __libc_chown __chown chown + fchownat - fchownat i:isiii fchownat + linkat - linkat i:isisi linkat + mkdirat - mkdirat i:isi mkdirat + -readlinkat - readlinkat i:issi readlinkat + +readlinkat - readlinkat i:issU readlinkat + symlinkat - symlinkat i:sis symlinkat + unlinkat - unlinkat i:isi unlinkat + + -setxattr - setxattr i:sspii setxattr + -lsetxattr - lsetxattr i:sspii lsetxattr + -fsetxattr - fsetxattr i:ispii fsetxattr + -getxattr - getxattr i:sspi getxattr + -lgetxattr - lgetxattr i:sspi lgetxattr + -fgetxattr - fgetxattr i:ispi fgetxattr + -listxattr - listxattr i:ssi listxattr + -llistxattr - llistxattr i:ssi llistxattr + -flistxattr - flistxattr i:isi flistxattr + +setxattr - setxattr i:sspUi setxattr + +lsetxattr - lsetxattr i:sspUi lsetxattr + +fsetxattr - fsetxattr i:ispUi fsetxattr + +getxattr - getxattr i:sspU getxattr + +lgetxattr - lgetxattr i:sspU lgetxattr + +fgetxattr - fgetxattr i:ispU fgetxattr + +listxattr - listxattr i:ssU listxattr + +llistxattr - llistxattr i:ssU llistxattr + +flistxattr - flistxattr i:isU flistxattr + removexattr - removexattr i:ss removexattr + lremovexattr - lremovexattr i:ss lremovexattr + fremovexattr - fremovexattr i:is fremovexattr + @@ -102,8 +101,6 @@ name_to_handle_at EXTRA name_to_handle_at i:isppi name_to_handle_at + + setns EXTRA setns i:ii setns + + -process_vm_readv EXTRA process_vm_readv i:ipipii process_vm_readv + -process_vm_writev EXTRA process_vm_writev i:ipipii process_vm_writev + memfd_create EXTRA memfd_create i:si memfd_create + pkey_alloc EXTRA pkey_alloc i:ii pkey_alloc + pkey_free EXTRA pkey_free i:i pkey_free + diff --git a/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c + new file mode 100644 + index 0000000000..55362f6060 + --- /dev/null + +++ b/sysdeps/unix/sysv/linux/tst-getcwd-smallbuff.c + @@ -0,0 +1,259 @@ + +/* Verify that getcwd returns ERANGE for size 1 byte and does not underflow + + buffer when the CWD is too long and is also a mount target of /. See bug + + #28769 or CVE-2021-3999 for more context. + + Copyright The GNU Toolchain Authors. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +#include + + + +#include + +#include + +#include + +#include + +#include + +#include + +#include + + + +static char *base; + +#define BASENAME "tst-getcwd-smallbuff" + +#define MOUNT_NAME "mpoint" + +static int sockfd[2]; + + + +static void + +do_cleanup (void) + +{ + + support_chdir_toolong_temp_directory (base); + + TEST_VERIFY_EXIT (rmdir (MOUNT_NAME) == 0); + + free (base); + +} + + + +static void + +send_fd (const int sock, const int fd) + +{ + + struct msghdr msg = {0}; + + union + + { + + struct cmsghdr hdr; + + char buf[CMSG_SPACE (sizeof (int))]; + + } cmsgbuf = {0}; + + struct cmsghdr *cmsg; + + struct iovec vec; + + char ch = 'A'; + + ssize_t n; + + + + msg.msg_control = &cmsgbuf.buf; + + msg.msg_controllen = sizeof (cmsgbuf.buf); + + + + cmsg = CMSG_FIRSTHDR (&msg); + + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + + cmsg->cmsg_level = SOL_SOCKET; + + cmsg->cmsg_type = SCM_RIGHTS; + + memcpy (CMSG_DATA (cmsg), &fd, sizeof (fd)); + + + + vec.iov_base = &ch; + + vec.iov_len = 1; + + msg.msg_iov = &vec; + + msg.msg_iovlen = 1; + + + + while ((n = sendmsg (sock, &msg, 0)) == -1 && errno == EINTR); + + + + TEST_VERIFY_EXIT (n == 1); + +} + + + +static int + +recv_fd (const int sock) + +{ + + struct msghdr msg = {0}; + + union + + { + + struct cmsghdr hdr; + + char buf[CMSG_SPACE(sizeof(int))]; + + } cmsgbuf = {0}; + + struct cmsghdr *cmsg; + + struct iovec vec; + + ssize_t n; + + char ch = '\0'; + + int fd = -1; + + + + vec.iov_base = &ch; + + vec.iov_len = 1; + + msg.msg_iov = &vec; + + msg.msg_iovlen = 1; + + + + msg.msg_control = &cmsgbuf.buf; + + msg.msg_controllen = sizeof (cmsgbuf.buf); + + + + while ((n = recvmsg (sock, &msg, 0)) == -1 && errno == EINTR); + + if (n != 1 || ch != 'A') + + return -1; + + + + cmsg = CMSG_FIRSTHDR (&msg); + + if (cmsg == NULL) + + return -1; + + if (cmsg->cmsg_type != SCM_RIGHTS) + + return -1; + + memcpy (&fd, CMSG_DATA (cmsg), sizeof (fd)); + + if (fd < 0) + + return -1; + + return fd; + +} + + + +static int + +child_func (void * const arg) + +{ + + xclose (sockfd[0]); + + const int sock = sockfd[1]; + + char ch; + + + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + + TEST_VERIFY_EXIT (ch == '1'); + + + + if (mount ("/", MOUNT_NAME, NULL, MS_BIND | MS_REC, NULL)) + + FAIL_EXIT1 ("mount failed: %m\n"); + + const int fd = xopen ("mpoint", + + O_RDONLY | O_PATH | O_DIRECTORY | O_NOFOLLOW, 0); + + + + send_fd (sock, fd); + + xclose (fd); + + + + TEST_VERIFY_EXIT (read (sock, &ch, 1) == 1); + + TEST_VERIFY_EXIT (ch == 'a'); + + + + xclose (sock); + + return 0; + +} + + + +static void + +update_map (char * const mapping, const char * const map_file) + +{ + + const size_t map_len = strlen (mapping); + + + + const int fd = xopen (map_file, O_WRONLY, 0); + + xwrite (fd, mapping, map_len); + + xclose (fd); + +} + + + +static void + +proc_setgroups_write (const long child_pid, const char * const str) + +{ + + const size_t str_len = strlen(str); + + + + char setgroups_path[sizeof ("/proc//setgroups") + INT_STRLEN_BOUND (long)]; + + + + snprintf (setgroups_path, sizeof (setgroups_path), + + "/proc/%ld/setgroups", child_pid); + + + + const int fd = open (setgroups_path, O_WRONLY); + + + + if (fd < 0) + + { + + TEST_VERIFY_EXIT (errno == ENOENT); + + FAIL_UNSUPPORTED ("/proc/%ld/setgroups not found\n", child_pid); + + } + + + + xwrite (fd, str, str_len); + + xclose(fd); + +} + + + +static char child_stack[1024 * 1024]; + + + +int + +do_test (void) + +{ + + base = support_create_and_chdir_toolong_temp_directory (BASENAME); + + + + xmkdir (MOUNT_NAME, S_IRWXU); + + atexit (do_cleanup); + + + + /* Check whether user namespaces are supported. */ + + { + + pid_t pid = xfork (); + + if (pid == 0) + + { + + if (unshare (CLONE_NEWUSER | CLONE_NEWNS) != 0) + + _exit (EXIT_UNSUPPORTED); + + else + + _exit (0); + + } + + int status; + + xwaitpid (pid, &status, 0); + + TEST_VERIFY_EXIT (WIFEXITED (status)); + + if (WEXITSTATUS (status) != 0) + + return WEXITSTATUS (status); + + } + + + + TEST_VERIFY_EXIT (socketpair (AF_UNIX, SOCK_STREAM, 0, sockfd) == 0); + + pid_t child_pid = xclone (child_func, NULL, child_stack, + + sizeof (child_stack), + + CLONE_NEWUSER | CLONE_NEWNS | SIGCHLD); + + + + xclose (sockfd[1]); + + const int sock = sockfd[0]; + + + + char map_path[sizeof ("/proc//uid_map") + INT_STRLEN_BOUND (long)]; + + char map_buf[sizeof ("0 1") + INT_STRLEN_BOUND (long)]; + + + + snprintf (map_path, sizeof (map_path), "/proc/%ld/uid_map", + + (long) child_pid); + + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getuid()); + + update_map (map_buf, map_path); + + + + proc_setgroups_write ((long) child_pid, "deny"); + + snprintf (map_path, sizeof (map_path), "/proc/%ld/gid_map", + + (long) child_pid); + + snprintf (map_buf, sizeof (map_buf), "0 %ld 1", (long) getgid()); + + update_map (map_buf, map_path); + + + + TEST_VERIFY_EXIT (send (sock, "1", 1, MSG_NOSIGNAL) == 1); + + const int fd = recv_fd (sock); + + TEST_VERIFY_EXIT (fd >= 0); + + TEST_VERIFY_EXIT (fchdir (fd) == 0); + + + + static char buf[2 * 10 + 1]; + + memset (buf, 'A', sizeof (buf)); + + + + /* Finally, call getcwd and check if it resulted in a buffer underflow. */ + + char * cwd = getcwd (buf + sizeof (buf) / 2, 1); + + TEST_VERIFY (cwd == NULL); + + TEST_VERIFY (errno == ERANGE); + + + + for (int i = 0; i < sizeof (buf); i++) + + if (buf[i] != 'A') + + { + + printf ("buf[%d] = %02x\n", i, (unsigned int) buf[i]); + + support_record_failure (); + + } + + + + TEST_VERIFY_EXIT (send (sock, "a", 1, MSG_NOSIGNAL) == 1); + + xclose (sock); + + TEST_VERIFY_EXIT (xwaitpid (child_pid, NULL, 0) == child_pid); + + + + return 0; + +} + + + +#define CLEANUP_HANDLER do_cleanup + +#include + diff --git a/sysdeps/unix/sysv/linux/x86_64/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/sysdep.h + index c2eb37e575..c7f740a1df 100644 + --- a/sysdeps/unix/sysv/linux/x86_64/sysdep.h + +++ b/sysdeps/unix/sysv/linux/x86_64/sysdep.h + @@ -61,13 +61,31 @@ + # define SYSCALL_ERROR_LABEL syscall_error + # endif + + +/* PSEUDO and T_PSEUDO macros have 2 extra arguments for unsigned long + + int arguments. */ + +# define PSEUDOS_HAVE_ULONG_INDICES 1 + + + +# ifndef SYSCALL_ULONG_ARG_1 + +# define SYSCALL_ULONG_ARG_1 0 + +# define SYSCALL_ULONG_ARG_2 0 + +# endif + + + # undef PSEUDO + -# define PSEUDO(name, syscall_name, args) \ + - .text; \ + - ENTRY (name) \ + - DO_CALL (syscall_name, args); \ + - cmpq $-4095, %rax; \ + +# if SYSCALL_ULONG_ARG_1 + +# define PSEUDO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + + cmpq $-4095, %rax; \ + jae SYSCALL_ERROR_LABEL + +# else + +# define PSEUDO(name, syscall_name, args) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, 0, 0); \ + + cmpq $-4095, %rax; \ + + jae SYSCALL_ERROR_LABEL + +# endif + + # undef PSEUDO_END + # define PSEUDO_END(name) \ + @@ -75,10 +93,17 @@ + END (name) + + # undef PSEUDO_NOERRNO + -# define PSEUDO_NOERRNO(name, syscall_name, args) \ + - .text; \ + - ENTRY (name) \ + - DO_CALL (syscall_name, args) + +# if SYSCALL_ULONG_ARG_1 + +# define PSEUDO_NOERRNO(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2) + +# else + +# define PSEUDO_NOERRNO(name, syscall_name, args) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, 0, 0) + +# endif + + # undef PSEUDO_END_NOERRNO + # define PSEUDO_END_NOERRNO(name) \ + @@ -87,11 +112,19 @@ + # define ret_NOERRNO ret + + # undef PSEUDO_ERRVAL + -# define PSEUDO_ERRVAL(name, syscall_name, args) \ + - .text; \ + - ENTRY (name) \ + - DO_CALL (syscall_name, args); \ + +# if SYSCALL_ULONG_ARG_1 + +# define PSEUDO_ERRVAL(name, syscall_name, args, ulong_arg_1, ulong_arg_2) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, ulong_arg_1, ulong_arg_2); \ + + negq %rax + +# else + +# define PSEUDO_ERRVAL(name, syscall_name, args) \ + + .text; \ + + ENTRY (name) \ + + DO_CALL (syscall_name, args, 0, 0); \ + negq %rax + +# endif + + # undef PSEUDO_END_ERRVAL + # define PSEUDO_END_ERRVAL(name) \ + @@ -163,8 +196,10 @@ + Syscalls of more than 6 arguments are not supported. */ + + # undef DO_CALL + -# define DO_CALL(syscall_name, args) \ + +# define DO_CALL(syscall_name, args, ulong_arg_1, ulong_arg_2) \ + DOARGS_##args \ + + ZERO_EXTEND_##ulong_arg_1 \ + + ZERO_EXTEND_##ulong_arg_2 \ + movl $SYS_ify (syscall_name), %eax; \ + syscall; + + @@ -176,6 +211,14 @@ + # define DOARGS_5 DOARGS_4 + # define DOARGS_6 DOARGS_5 + + +# define ZERO_EXTEND_0 /* nothing */ + +# define ZERO_EXTEND_1 /* nothing */ + +# define ZERO_EXTEND_2 /* nothing */ + +# define ZERO_EXTEND_3 /* nothing */ + +# define ZERO_EXTEND_4 /* nothing */ + +# define ZERO_EXTEND_5 /* nothing */ + +# define ZERO_EXTEND_6 /* nothing */ + + + #else /* !__ASSEMBLER__ */ + /* Define a macro which expands inline into the wrapper code for a system + call. */ + @@ -210,12 +253,15 @@ + /* Registers clobbered by syscall. */ + # define REGISTERS_CLOBBERED_BY_SYSCALL "cc", "r11", "cx" + + -/* Create a variable 'name' based on type 'X' to avoid explicit types. + - This is mainly used set use 64-bits arguments in x32. */ + -#define TYPEFY(X, name) __typeof__ ((X) - (X)) name + -/* Explicit cast the argument to avoid integer from pointer warning on + - x32. */ + -#define ARGIFY(X) ((__typeof__ ((X) - (X))) (X)) + +/* NB: This also works when X is an array. For an array X, type of + + (X) - (X) is ptrdiff_t, which is signed, since size of ptrdiff_t + + == size of pointer, cast is a NOP. */ + +#define TYPEFY1(X) __typeof__ ((X) - (X)) + +/* Explicit cast the argument. */ + +#define ARGIFY(X) ((TYPEFY1 (X)) (X)) + +/* Create a variable 'name' based on type of variable 'X' to avoid + + explicit types. */ + +#define TYPEFY(X, name) __typeof__ (ARGIFY (X)) name + + #undef INTERNAL_SYSCALL + #define INTERNAL_SYSCALL(name, err, nr, args...) \ + diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h + index 5bf9eed80b..62e6f8fe11 100644 + --- a/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h + +++ b/sysdeps/unix/sysv/linux/x86_64/x32/sysdep.h + @@ -26,4 +26,39 @@ + #undef LO_HI_LONG + #define LO_HI_LONG(val) (val) + + +#ifdef __ASSEMBLER__ + +/* Zero-extend 32-bit unsigned long int arguments to 64 bits. */ + +# undef ZERO_EXTEND_1 + +# define ZERO_EXTEND_1 movl %edi, %edi; + +# undef ZERO_EXTEND_2 + +# define ZERO_EXTEND_2 movl %esi, %esi; + +# undef ZERO_EXTEND_3 + +# define ZERO_EXTEND_3 movl %edx, %edx; + +# if SYSCALL_ULONG_ARG_1 == 4 || SYSCALL_ULONG_ARG_2 == 4 + +# undef DOARGS_4 + +# define DOARGS_4 movl %ecx, %r10d; + +# else + +# undef ZERO_EXTEND_4 + +# define ZERO_EXTEND_4 movl %r10d, %r10d; + +# endif + +# undef ZERO_EXTEND_5 + +# define ZERO_EXTEND_5 movl %r8d, %r8d; + +# undef ZERO_EXTEND_6 + +# define ZERO_EXTEND_6 movl %r9d, %r9d; + +#else /* !__ASSEMBLER__ */ + +# undef ARGIFY + +/* Enforce zero-extension for pointers and array system call arguments. + + For integer types, extend to int64_t (the full register) using a + + regular cast, resulting in zero or sign extension based on the + + signedness of the original type. */ + +# define ARGIFY(X) \ + + ({ \ + + _Pragma ("GCC diagnostic push"); \ + + _Pragma ("GCC diagnostic ignored \"-Wpointer-to-int-cast\""); \ + + (__builtin_classify_type (X) == 5 \ + + ? (uintptr_t) (X) : (int64_t) (X)); \ + + _Pragma ("GCC diagnostic pop"); \ + + }) + +#endif /* __ASSEMBLER__ */ + + + #endif /* linux/x86_64/x32/sysdep.h */ + diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile + index 95182a508c..b7aec5df2b 100644 + --- a/sysdeps/x86/Makefile + +++ b/sysdeps/x86/Makefile + @@ -12,6 +12,42 @@ endif + ifeq ($(subdir),setjmp) + gen-as-const-headers += jmp_buf-ssp.sym + sysdep_routines += __longjmp_cancel + +ifneq ($(enable-cet),no) + +ifneq ($(have-tunables),no) + +tests += tst-setjmp-cet + +tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on + +endif + +endif + +endif + + + +ifeq ($(subdir),string) + +sysdep_routines += cacheinfo + + + +tests += \ + + tst-memchr-rtm \ + + tst-memcmp-rtm \ + + tst-memmove-rtm \ + + tst-memrchr-rtm \ + + tst-memset-rtm \ + + tst-strchr-rtm \ + + tst-strcpy-rtm \ + + tst-strlen-rtm \ + + tst-strncmp-rtm \ + + tst-strrchr-rtm \ + + tst-wcsncmp-rtm \ + +# tests + + + +CFLAGS-tst-memchr-rtm.c += -mrtm + +CFLAGS-tst-memcmp-rtm.c += -mrtm + +CFLAGS-tst-memmove-rtm.c += -mrtm + +CFLAGS-tst-memrchr-rtm.c += -mrtm + +CFLAGS-tst-memset-rtm.c += -mrtm + +CFLAGS-tst-strchr-rtm.c += -mrtm + +CFLAGS-tst-strcpy-rtm.c += -mrtm + +CFLAGS-tst-strlen-rtm.c += -mrtm + +CFLAGS-tst-strncmp-rtm.c += -mrtm -Wno-error + +CFLAGS-tst-strrchr-rtm.c += -mrtm + +CFLAGS-tst-wcsncmp-rtm.c += -mrtm -Wno-error + endif + + ifeq ($(enable-cet),yes) + diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c + index e3e8ef27bb..39c13b7195 100644 + --- a/sysdeps/x86/cacheinfo.c + +++ b/sysdeps/x86/cacheinfo.c + @@ -722,7 +722,7 @@ intel_bug_no_cache_info: + threads = 1 << ((ecx >> 12) & 0x0f); + } + + - if (threads == 0) + + if (threads == 0 || cpu_features->basic.family >= 0x17) + { + /* If APIC ID width is not available, use logical + processor count. */ + @@ -737,8 +737,22 @@ intel_bug_no_cache_info: + if (threads > 0) + shared /= threads; + + - /* Account for exclusive L2 and L3 caches. */ + - shared += core; + + /* Get shared cache per ccx for Zen architectures. */ + + if (cpu_features->basic.family >= 0x17) + + { + + unsigned int eax; + + + + /* Get number of threads share the L3 cache in CCX. */ + + __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); + + + + unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; + + shared *= threads_per_ccx; + + } + + else + + { + + /* Account for exclusive L2 and L3 caches. */ + + shared += core; + + } + } + + #ifndef DISABLE_PREFETCHW + @@ -778,14 +792,20 @@ intel_bug_no_cache_info: + __x86_shared_cache_size = shared; + } + + - /* The large memcpy micro benchmark in glibc shows that 6 times of + - shared cache size is the approximate value above which non-temporal + - store becomes faster on a 8-core processor. This is the 3/4 of the + - total shared cache size. */ + + /* The default setting for the non_temporal threshold is 3/4 of one + + thread's share of the chip's cache. For most Intel and AMD processors + + with an initial release date between 2017 and 2020, a thread's typical + + share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4 + + threshold leaves 125 KBytes to 500 KBytes of the thread's data + + in cache after a maximum temporal copy, which will maintain + + in cache a reasonable portion of the thread's stack and other + + active data. If the threshold is set higher than one thread's + + share of the cache, it has a substantial risk of negatively + + impacting the performance of other threads running on the chip. */ + __x86_shared_non_temporal_threshold + = (cpu_features->non_temporal_threshold != 0 + ? cpu_features->non_temporal_threshold + - : __x86_shared_cache_size * threads * 3 / 4); + + : __x86_shared_cache_size * 3 / 4); + } + + #endif + diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c + index 81a170a819..e1c22e3e58 100644 + --- a/sysdeps/x86/cpu-features.c + +++ b/sysdeps/x86/cpu-features.c + @@ -333,6 +333,9 @@ init_cpu_features (struct cpu_features *cpu_features) + + get_extended_indices (cpu_features); + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT)) + + cpu_features->cpuid[index_cpu_RTM].reg_RTM &= ~bit_cpu_RTM; + + + if (family == 0x06) + { + model += extended_model; + @@ -394,11 +397,42 @@ init_cpu_features (struct cpu_features *cpu_features) + break; + } + + - /* Disable TSX on some Haswell processors to avoid TSX on kernels that + - weren't updated with the latest microcode package (which disables + - broken feature by default). */ + + /* Disable TSX on some processors to avoid TSX on kernels that + + weren't updated with the latest microcode package (which + + disables broken feature by default). */ + switch (model) + { + + case 0x55: + + if (stepping <= 5) + + goto disable_tsx; + + break; + + case 0x8e: + + /* NB: Although the errata documents that for model == 0x8e, + + only 0xb stepping or lower are impacted, the intention of + + the errata was to disable TSX on all client processors on + + all steppings. Include 0xc stepping which is an Intel + + Core i7-8665U, a client mobile processor. */ + + case 0x9e: + + if (stepping > 0xc) + + break; + + /* Fall through. */ + + case 0x4e: + + case 0x5e: + + { + + /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for + + processors listed in: + + + +https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html + + */ + +disable_tsx: + + cpu_features->cpuid[index_cpu_HLE].reg_HLE + + &= ~bit_cpu_HLE; + + cpu_features->cpuid[index_cpu_RTM].reg_RTM + + &= ~bit_cpu_RTM; + + cpu_features->cpuid[index_cpu_RTM_ALWAYS_ABORT].reg_RTM_ALWAYS_ABORT + + |= bit_cpu_RTM_ALWAYS_ABORT; + + } + + break; + case 0x3f: + /* Xeon E7 v3 with stepping >= 4 has working TSX. */ + if (stepping >= 4) + @@ -424,8 +458,24 @@ init_cpu_features (struct cpu_features *cpu_features) + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + |= bit_arch_Prefer_No_VZEROUPPER; + else + - cpu_features->feature[index_arch_Prefer_No_AVX512] + - |= bit_arch_Prefer_No_AVX512; + + { + + cpu_features->feature[index_arch_Prefer_No_AVX512] + + |= bit_arch_Prefer_No_AVX512; + + + + /* Avoid RTM abort triggered by VZEROUPPER inside a + + transactionally executing RTM region. */ + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + cpu_features->feature[index_arch_Prefer_No_VZEROUPPER] + + |= bit_arch_Prefer_No_VZEROUPPER; + + + + /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp + + requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp + + requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB, + + AVX2 strcmp is faster than EVEX strcmp. */ + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + + cpu_features->feature[index_arch_Prefer_AVX2_STRCMP] + + |= bit_arch_Prefer_AVX2_STRCMP; + + } + } + /* This spells out "AuthenticAMD" or "HygonGenuine". */ + else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h + index aea83e6e31..9fb97907b5 100644 + --- a/sysdeps/x86/cpu-features.h + +++ b/sysdeps/x86/cpu-features.h + @@ -499,6 +499,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define bit_cpu_AVX512_4VNNIW (1u << 2) + #define bit_cpu_AVX512_4FMAPS (1u << 3) + #define bit_cpu_FSRM (1u << 4) + +#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11) + #define bit_cpu_PCONFIG (1u << 18) + #define bit_cpu_IBT (1u << 20) + #define bit_cpu_IBRS_IBPB (1u << 26) + @@ -667,6 +668,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7 + #define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7 + #define index_cpu_FSRM COMMON_CPUID_INDEX_7 + +#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7 + #define index_cpu_PCONFIG COMMON_CPUID_INDEX_7 + #define index_cpu_IBT COMMON_CPUID_INDEX_7 + #define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7 + @@ -835,6 +837,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define reg_AVX512_4VNNIW edx + #define reg_AVX512_4FMAPS edx + #define reg_FSRM edx + +#define reg_RTM_ALWAYS_ABORT edx + #define reg_PCONFIG edx + #define reg_IBT edx + #define reg_IBRS_IBPB edx + @@ -897,6 +900,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define bit_arch_Prefer_FSRM (1u << 13) + #define bit_arch_Prefer_No_AVX512 (1u << 14) + #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15) + +#define bit_arch_Prefer_AVX2_STRCMP (1u << 16) + + #define index_arch_Fast_Rep_String FEATURE_INDEX_2 + #define index_arch_Fast_Copy_Backward FEATURE_INDEX_2 + @@ -914,6 +918,7 @@ extern const struct cpu_features *__get_cpu_features (void) + #define index_arch_Prefer_No_AVX512 FEATURE_INDEX_2 + #define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_2 + #define index_arch_Prefer_FSRM FEATURE_INDEX_2 + +#define index_arch_Prefer_AVX2_STRCMP FEATURE_INDEX_2 + + /* XCR0 Feature flags. */ + #define bit_XMM_state (1u << 1) + diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c + index 861bd7bcaa..cb83ecc3b2 100644 + --- a/sysdeps/x86/cpu-tunables.c + +++ b/sysdeps/x86/cpu-tunables.c + @@ -282,6 +282,9 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) + CHECK_GLIBC_IFUNC_ARCH_BOTH (n, cpu_features, + Fast_Copy_Backward, disable, + 18); + + CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH + + (n, cpu_features, Prefer_AVX2_STRCMP, AVX2_Usable, + + disable, 18); + } + break; + case 19: + diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c + index ca3b5849bc..8ffaf94a00 100644 + --- a/sysdeps/x86/dl-cet.c + +++ b/sysdeps/x86/dl-cet.c + @@ -105,7 +105,11 @@ dl_cet_check (struct link_map *m, const char *program) + /* No legacy object check if both IBT and SHSTK are always on. */ + if (enable_ibt_type == CET_ALWAYS_ON + && enable_shstk_type == CET_ALWAYS_ON) + - return; + + { + + THREAD_SETMEM (THREAD_SELF, header.feature_1, + + GL(dl_x86_feature_1)[0]); + + return; + + } + + /* Check if IBT is enabled by kernel. */ + bool ibt_enabled + diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c + index 0f55987ae5..bbb5cd356d 100644 + --- a/sysdeps/x86/tst-get-cpu-features.c + +++ b/sysdeps/x86/tst-get-cpu-features.c + @@ -176,6 +176,7 @@ do_test (void) + CHECK_CPU_FEATURE (AVX512_4VNNIW); + CHECK_CPU_FEATURE (AVX512_4FMAPS); + CHECK_CPU_FEATURE (FSRM); + + CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT); + CHECK_CPU_FEATURE (PCONFIG); + CHECK_CPU_FEATURE (IBT); + CHECK_CPU_FEATURE (IBRS_IBPB); + diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c + new file mode 100644 + index 0000000000..e47494011e + --- /dev/null + +++ b/sysdeps/x86/tst-memchr-rtm.c + @@ -0,0 +1,54 @@ + +/* Test case for memchr inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + string1[100] = 'c'; + + string1[STRING_SIZE - 100] = 'c'; + + char *p = memchr (string1, 'c', STRING_SIZE); + + if (p == &string1[100]) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + char *p = memchr (string1, 'c', STRING_SIZE); + + if (p == &string1[100]) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("memchr", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c + new file mode 100644 + index 0000000000..e4c8a623bb + --- /dev/null + +++ b/sysdeps/x86/tst-memcmp-rtm.c + @@ -0,0 +1,52 @@ + +/* Test case for memcmp inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + +char string2[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + memset (string2, 'a', STRING_SIZE); + + if (memcmp (string1, string2, STRING_SIZE) == 0) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + if (memcmp (string1, string2, STRING_SIZE) == 0) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("memcmp", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c + new file mode 100644 + index 0000000000..4bf97ef1e3 + --- /dev/null + +++ b/sysdeps/x86/tst-memmove-rtm.c + @@ -0,0 +1,53 @@ + +/* Test case for memmove inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + +char string2[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + if (memmove (string2, string1, STRING_SIZE) == string2 + + && memcmp (string2, string1, STRING_SIZE) == 0) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + if (memmove (string2, string1, STRING_SIZE) == string2 + + && memcmp (string2, string1, STRING_SIZE) == 0) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("memmove", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c + new file mode 100644 + index 0000000000..a57a5a8eb9 + --- /dev/null + +++ b/sysdeps/x86/tst-memrchr-rtm.c + @@ -0,0 +1,54 @@ + +/* Test case for memrchr inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + string1[100] = 'c'; + + string1[STRING_SIZE - 100] = 'c'; + + char *p = memrchr (string1, 'c', STRING_SIZE); + + if (p == &string1[STRING_SIZE - 100]) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + char *p = memrchr (string1, 'c', STRING_SIZE); + + if (p == &string1[STRING_SIZE - 100]) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("memrchr", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c + new file mode 100644 + index 0000000000..bf343a4dad + --- /dev/null + +++ b/sysdeps/x86/tst-memset-rtm.c + @@ -0,0 +1,45 @@ + +/* Test case for memset inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + return EXIT_SUCCESS; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + memset (string1, 'a', STRING_SIZE); + + return 0; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("memset", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c + new file mode 100644 + index 0000000000..42c795d2a8 + --- /dev/null + +++ b/sysdeps/x86/tst-setjmp-cet.c + @@ -0,0 +1 @@ + +#include + diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c + new file mode 100644 + index 0000000000..a82e29c072 + --- /dev/null + +++ b/sysdeps/x86/tst-strchr-rtm.c + @@ -0,0 +1,54 @@ + +/* Test case for strchr inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE - 1); + + string1[100] = 'c'; + + string1[STRING_SIZE - 100] = 'c'; + + char *p = strchr (string1, 'c'); + + if (p == &string1[100]) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + char *p = strchr (string1, 'c'); + + if (p == &string1[100]) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("strchr", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c + new file mode 100644 + index 0000000000..2b2a583fb4 + --- /dev/null + +++ b/sysdeps/x86/tst-strcpy-rtm.c + @@ -0,0 +1,53 @@ + +/* Test case for strcpy inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + +char string2[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE - 1); + + if (strcpy (string2, string1) == string2 + + && strcmp (string2, string1) == 0) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + if (strcpy (string2, string1) == string2 + + && strcmp (string2, string1) == 0) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("strcpy", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h + new file mode 100644 + index 0000000000..6ed9eca017 + --- /dev/null + +++ b/sysdeps/x86/tst-string-rtm.h + @@ -0,0 +1,72 @@ + +/* Test string function in a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + +#include + +#include + +#include + + + +static int + +do_test_1 (const char *name, unsigned int loop, int (*prepare) (void), + + int (*function) (void)) + +{ + + if (!CPU_FEATURE_USABLE (RTM)) + + return EXIT_UNSUPPORTED; + + + + int status = prepare (); + + if (status != EXIT_SUCCESS) + + return status; + + + + unsigned int i; + + unsigned int naborts = 0; + + unsigned int failed = 0; + + for (i = 0; i < loop; i++) + + { + + failed |= function (); + + if (_xbegin() == _XBEGIN_STARTED) + + { + + failed |= function (); + + _xend(); + + } + + else + + { + + failed |= function (); + + ++naborts; + + } + + } + + + + if (failed) + + FAIL_EXIT1 ("%s() failed", name); + + + + if (naborts) + + { + + /* NB: Low single digit (<= 5%) noise-level aborts are normal for + + TSX. */ + + double rate = 100 * ((double) naborts) / ((double) loop); + + if (rate > 5) + + FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)", + + rate, naborts, loop); + + } + + + + return EXIT_SUCCESS; + +} + + + +static int do_test (void); + + + +#include + diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c + new file mode 100644 + index 0000000000..0dcf14db87 + --- /dev/null + +++ b/sysdeps/x86/tst-strlen-rtm.c + @@ -0,0 +1,53 @@ + +/* Test case for strlen inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE - 1); + + string1[STRING_SIZE - 100] = '\0'; + + size_t len = strlen (string1); + + if (len == STRING_SIZE - 100) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + size_t len = strlen (string1); + + if (len == STRING_SIZE - 100) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("strlen", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c + new file mode 100644 + index 0000000000..aef9866cf2 + --- /dev/null + +++ b/sysdeps/x86/tst-strncmp-rtm.c + @@ -0,0 +1,81 @@ + +/* Test case for strncmp inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + +#include + + + +#ifdef WIDE + +# define CHAR wchar_t + +# define MEMSET wmemset + +# define STRNCMP wcsncmp + +# define TEST_NAME "wcsncmp" + +#else /* !WIDE */ + +# define CHAR char + +# define MEMSET memset + +# define STRNCMP strncmp + +# define TEST_NAME "strncmp" + +#endif /* !WIDE */ + + + + + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +CHAR string1[STRING_SIZE]; + +CHAR string2[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + MEMSET (string1, 'a', STRING_SIZE - 1); + + MEMSET (string2, 'a', STRING_SIZE - 1); + + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + if (STRNCMP (string1, string2, STRING_SIZE) == 0) + + return 0; + + else + + return 1; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function_overflow (void) + +{ + + if (STRNCMP (string1, string2, SIZE_MAX) == 0) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + int status = do_test_1 (TEST_NAME, LOOP, prepare, function); + + if (status != EXIT_SUCCESS) + + return status; + + status = do_test_1 (TEST_NAME, LOOP, prepare, function_overflow); + + return status; + +} + diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c + new file mode 100644 + index 0000000000..e32bfaf5f5 + --- /dev/null + +++ b/sysdeps/x86/tst-strrchr-rtm.c + @@ -0,0 +1,53 @@ + +/* Test case for strrchr inside a transactionally executing RTM region. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +#define LOOP 3000 + +#define STRING_SIZE 1024 + +char string1[STRING_SIZE]; + + + +__attribute__ ((noinline, noclone)) + +static int + +prepare (void) + +{ + + memset (string1, 'a', STRING_SIZE - 1); + + string1[STRING_SIZE - 100] = 'c'; + + char *p = strrchr (string1, 'c'); + + if (p == &string1[STRING_SIZE - 100]) + + return EXIT_SUCCESS; + + else + + return EXIT_FAILURE; + +} + + + +__attribute__ ((noinline, noclone)) + +static int + +function (void) + +{ + + char *p = strrchr (string1, 'c'); + + if (p == &string1[STRING_SIZE - 100]) + + return 0; + + else + + return 1; + +} + + + +static int + +do_test (void) + +{ + + return do_test_1 ("strrchr", LOOP, prepare, function); + +} + diff --git a/sysdeps/x86/tst-wcsncmp-rtm.c b/sysdeps/x86/tst-wcsncmp-rtm.c + new file mode 100644 + index 0000000000..bad3b86378 + --- /dev/null + +++ b/sysdeps/x86/tst-wcsncmp-rtm.c + @@ -0,0 +1,21 @@ + +/* Test case for wcsncmp inside a transactionally executing RTM region. + + Copyright (C) 2022 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#define WIDE 1 + +#include + +#include "tst-strncmp-rtm.c" + diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile + index d51cf03ac9..b1951adce9 100644 + --- a/sysdeps/x86_64/Makefile + +++ b/sysdeps/x86_64/Makefile + @@ -20,6 +20,8 @@ endif + ifeq ($(subdir),string) + sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii + gen-as-const-headers += locale-defines.sym + +tests += \ + + tst-rsi-strlen + endif + + ifeq ($(subdir),elf) + @@ -150,6 +152,11 @@ ifeq ($(subdir),csu) + gen-as-const-headers += tlsdesc.sym rtld-offsets.sym + endif + + +ifeq ($(subdir),wcsmbs) + +tests += \ + + tst-rsi-wcslen + +endif + + + $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os + $(make-target-directory) + rm -f $@ + diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure -old mode 100644 -new mode 100755 + index 84f82c2406..fc1840e23f + --- a/sysdeps/x86_64/configure + +++ b/sysdeps/x86_64/configure + @@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then + build_mathvec=yes + fi + + -if test "$static_pie" = yes; then + - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5 + -$as_echo_n "checking for linker static PIE support... " >&6; } + -if ${libc_cv_ld_static_pie+:} false; then : + - $as_echo_n "(cached) " >&6 + -else + - cat > conftest.s <<\EOF + - .text + - .global _start + - .weak foo + -_start: + - leaq foo(%rip), %rax + -EOF + - libc_cv_pie_option="-Wl,-pie" + - if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5' + - { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + - (eval $ac_try) 2>&5 + - ac_status=$? + - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + - test $ac_status = 0; }; }; then + - libc_cv_ld_static_pie=yes + - else + - libc_cv_ld_static_pie=no + - fi + -rm -f conftest* + -fi + -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5 + -$as_echo "$libc_cv_ld_static_pie" >&6; } + - if test "$libc_cv_ld_static_pie" != yes; then + - as_fn_error $? "linker support for static PIE needed" "$LINENO" 5 + - fi + -fi + - + $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h + + + diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac + index cdaba0c075..611a7d9ba3 100644 + --- a/sysdeps/x86_64/configure.ac + +++ b/sysdeps/x86_64/configure.ac + @@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then + build_mathvec=yes + fi + + -dnl Check if linker supports static PIE with the fix for + -dnl + -dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782 + -dnl + -if test "$static_pie" = yes; then + - AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl + -cat > conftest.s <<\EOF + - .text + - .global _start + - .weak foo + -_start: + - leaq foo(%rip), %rax + -EOF + - libc_cv_pie_option="-Wl,-pie" + - if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then + - libc_cv_ld_static_pie=yes + - else + - libc_cv_ld_static_pie=no + - fi + -rm -f conftest*]) + - if test "$libc_cv_ld_static_pie" != yes; then + - AC_MSG_ERROR([linker support for static PIE needed]) + - fi + -fi + - + dnl It is always possible to access static and hidden symbols in an + dnl position independent way. + AC_DEFINE(PI_STATIC_AND_HIDDEN) + diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h + index 8e9baffeb4..74029871d8 100644 + --- a/sysdeps/x86_64/dl-machine.h + +++ b/sysdeps/x86_64/dl-machine.h + @@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + { + # ifndef RTLD_BOOTSTRAP + if (sym_map != map + - && sym_map->l_type != lt_executable + && !sym_map->l_relocated) + { + const char *strtab + = (const char *) D_PTR (map, l_info[DT_STRTAB]); + - _dl_error_printf ("\ + + if (sym_map->l_type == lt_executable) + + _dl_fatal_printf ("\ + +%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \ + +and creates an unsatisfiable circular dependency.\n", + + RTLD_PROGNAME, strtab + refsym->st_name, + + map->l_name); + + else + + _dl_error_printf ("\ + %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", + - RTLD_PROGNAME, map->l_name, + - sym_map->l_name, + - strtab + refsym->st_name); + + RTLD_PROGNAME, map->l_name, + + sym_map->l_name, + + strtab + refsym->st_name); + } + # endif + value = ((ElfW(Addr) (*) (void)) value) (); + diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S + index a5c879d2af..070e5ef90b 100644 + --- a/sysdeps/x86_64/memchr.S + +++ b/sysdeps/x86_64/memchr.S + @@ -21,9 +21,11 @@ + #ifdef USE_AS_WMEMCHR + # define MEMCHR wmemchr + # define PCMPEQ pcmpeqd + +# define CHAR_PER_VEC 4 + #else + # define MEMCHR memchr + # define PCMPEQ pcmpeqb + +# define CHAR_PER_VEC 16 + #endif + + /* fast SSE2 version with using pmaxub and 64 byte loop */ + @@ -33,15 +35,14 @@ ENTRY(MEMCHR) + movd %esi, %xmm1 + mov %edi, %ecx + + +#ifdef __ILP32__ + + /* Clear the upper 32 bits. */ + + movl %edx, %edx + +#endif + #ifdef USE_AS_WMEMCHR + test %RDX_LP, %RDX_LP + jz L(return_null) + - shl $2, %RDX_LP + #else + -# ifdef __ILP32__ + - /* Clear the upper 32 bits. */ + - movl %edx, %edx + -# endif + punpcklbw %xmm1, %xmm1 + test %RDX_LP, %RDX_LP + jz L(return_null) + @@ -60,13 +61,16 @@ ENTRY(MEMCHR) + test %eax, %eax + + jnz L(matches_1) + - sub $16, %rdx + + sub $CHAR_PER_VEC, %rdx + jbe L(return_null) + add $16, %rdi + and $15, %ecx + and $-16, %rdi + +#ifdef USE_AS_WMEMCHR + + shr $2, %ecx + +#endif + add %rcx, %rdx + - sub $64, %rdx + + sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + jmp L(loop_prolog) + + @@ -77,16 +81,21 @@ L(crosscache): + movdqa (%rdi), %xmm0 + + PCMPEQ %xmm1, %xmm0 + -/* Check if there is a match. */ + + /* Check if there is a match. */ + pmovmskb %xmm0, %eax + -/* Remove the leading bytes. */ + + /* Remove the leading bytes. */ + sar %cl, %eax + test %eax, %eax + je L(unaligned_no_match) + -/* Check which byte is a match. */ + + /* Check which byte is a match. */ + bsf %eax, %eax + - + +#ifdef USE_AS_WMEMCHR + + mov %eax, %esi + + shr $2, %esi + + sub %rsi, %rdx + +#else + sub %rax, %rdx + +#endif + jbe L(return_null) + add %rdi, %rax + add %rcx, %rax + @@ -94,15 +103,18 @@ L(crosscache): + + .p2align 4 + L(unaligned_no_match): + - /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + + /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using + "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void + possible addition overflow. */ + neg %rcx + add $16, %rcx + +#ifdef USE_AS_WMEMCHR + + shr $2, %ecx + +#endif + sub %rcx, %rdx + jbe L(return_null) + add $16, %rdi + - sub $64, %rdx + + sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + + .p2align 4 + @@ -135,7 +147,7 @@ L(loop_prolog): + test $0x3f, %rdi + jz L(align64_loop) + + - sub $64, %rdx + + sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + + movdqa (%rdi), %xmm0 + @@ -167,11 +179,14 @@ L(loop_prolog): + mov %rdi, %rcx + and $-64, %rdi + and $63, %ecx + +#ifdef USE_AS_WMEMCHR + + shr $2, %ecx + +#endif + add %rcx, %rdx + + .p2align 4 + L(align64_loop): + - sub $64, %rdx + + sub $(CHAR_PER_VEC * 4), %rdx + jbe L(exit_loop) + movdqa (%rdi), %xmm0 + movdqa 16(%rdi), %xmm2 + @@ -218,7 +233,7 @@ L(align64_loop): + + .p2align 4 + L(exit_loop): + - add $32, %edx + + add $(CHAR_PER_VEC * 2), %edx + jle L(exit_loop_32) + + movdqa (%rdi), %xmm0 + @@ -238,7 +253,7 @@ L(exit_loop): + pmovmskb %xmm3, %eax + test %eax, %eax + jnz L(matches32_1) + - sub $16, %edx + + sub $CHAR_PER_VEC, %edx + jle L(return_null) + + PCMPEQ 48(%rdi), %xmm1 + @@ -250,13 +265,13 @@ L(exit_loop): + + .p2align 4 + L(exit_loop_32): + - add $32, %edx + + add $(CHAR_PER_VEC * 2), %edx + movdqa (%rdi), %xmm0 + PCMPEQ %xmm1, %xmm0 + pmovmskb %xmm0, %eax + test %eax, %eax + jnz L(matches_1) + - sub $16, %edx + + sub $CHAR_PER_VEC, %edx + jbe L(return_null) + + PCMPEQ 16(%rdi), %xmm1 + @@ -293,7 +308,13 @@ L(matches32): + .p2align 4 + L(matches_1): + bsf %eax, %eax + +#ifdef USE_AS_WMEMCHR + + mov %eax, %esi + + shr $2, %esi + + sub %rsi, %rdx + +#else + sub %rax, %rdx + +#endif + jbe L(return_null) + add %rdi, %rax + ret + @@ -301,7 +322,13 @@ L(matches_1): + .p2align 4 + L(matches16_1): + bsf %eax, %eax + +#ifdef USE_AS_WMEMCHR + + mov %eax, %esi + + shr $2, %esi + + sub %rsi, %rdx + +#else + sub %rax, %rdx + +#endif + jbe L(return_null) + lea 16(%rdi, %rax), %rax + ret + @@ -309,7 +336,13 @@ L(matches16_1): + .p2align 4 + L(matches32_1): + bsf %eax, %eax + +#ifdef USE_AS_WMEMCHR + + mov %eax, %esi + + shr $2, %esi + + sub %rsi, %rdx + +#else + sub %rax, %rdx + +#endif + jbe L(return_null) + lea 32(%rdi, %rax), %rax + ret + @@ -317,7 +350,13 @@ L(matches32_1): + .p2align 4 + L(matches48_1): + bsf %eax, %eax + +#ifdef USE_AS_WMEMCHR + + mov %eax, %esi + + shr $2, %esi + + sub %rsi, %rdx + +#else + sub %rax, %rdx + +#endif + jbe L(return_null) + lea 48(%rdi, %rax), %rax + ret + diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile + index 395e432c09..da1446d731 100644 + --- a/sysdeps/x86_64/multiarch/Makefile + +++ b/sysdeps/x86_64/multiarch/Makefile + @@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \ + memmove-avx512-unaligned-erms \ + memset-sse2-unaligned-erms \ + memset-avx2-unaligned-erms \ + - memset-avx512-unaligned-erms + + memset-avx512-unaligned-erms \ + + memchr-avx2-rtm \ + + memcmp-avx2-movbe-rtm \ + + memmove-avx-unaligned-erms-rtm \ + + memrchr-avx2-rtm \ + + memset-avx2-unaligned-erms-rtm \ + + rawmemchr-avx2-rtm \ + + strchr-avx2-rtm \ + + strcmp-avx2-rtm \ + + strchrnul-avx2-rtm \ + + stpcpy-avx2-rtm \ + + stpncpy-avx2-rtm \ + + strcat-avx2-rtm \ + + strcpy-avx2-rtm \ + + strlen-avx2-rtm \ + + strncat-avx2-rtm \ + + strncmp-avx2-rtm \ + + strncpy-avx2-rtm \ + + strnlen-avx2-rtm \ + + strrchr-avx2-rtm \ + + memchr-evex \ + + memcmp-evex-movbe \ + + memmove-evex-unaligned-erms \ + + memrchr-evex \ + + memset-evex-unaligned-erms \ + + rawmemchr-evex \ + + stpcpy-evex \ + + stpncpy-evex \ + + strcat-evex \ + + strchr-evex \ + + strchrnul-evex \ + + strcmp-evex \ + + strcpy-evex \ + + strlen-evex \ + + strncat-evex \ + + strncmp-evex \ + + strncpy-evex \ + + strnlen-evex \ + + strrchr-evex + CFLAGS-varshift.c += -msse4 + CFLAGS-strcspn-c.c += -msse4 + CFLAGS-strpbrk-c.c += -msse4 + @@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \ + wcscpy-ssse3 wcscpy-c \ + wcschr-sse2 wcschr-avx2 \ + wcsrchr-sse2 wcsrchr-avx2 \ + - wcsnlen-sse4_1 wcsnlen-c \ + - wcslen-sse2 wcslen-avx2 wcsnlen-avx2 + + wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \ + + wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \ + + wcschr-avx2-rtm \ + + wcscmp-avx2-rtm \ + + wcslen-avx2-rtm \ + + wcsncmp-avx2-rtm \ + + wcsnlen-avx2-rtm \ + + wcsrchr-avx2-rtm \ + + wmemchr-avx2-rtm \ + + wmemcmp-avx2-movbe-rtm \ + + wcschr-evex \ + + wcscmp-evex \ + + wcslen-evex \ + + wcsncmp-evex \ + + wcsnlen-evex \ + + wcsrchr-evex \ + + wmemchr-evex \ + + wmemcmp-evex-movbe + endif + + ifeq ($(subdir),debug) + diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h + index 69f30398ae..925e5b61eb 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-avx2.h + +++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h + @@ -21,16 +21,28 @@ + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + + - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) + - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + - return OPTIMIZE (avx2); + + { + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + return OPTIMIZE (evex); + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + return OPTIMIZE (avx2); + + } + + return OPTIMIZE (sse2); + } + diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c + index ce7eb1eecf..e712b148f5 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c + +++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c + @@ -41,8 +41,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/memchr.c. */ + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __memchr_avx2) + + IFUNC_IMPL_ADD (array, i, memchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __memchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, memchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __memchr_evex) + IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/memcmp.c. */ + @@ -51,6 +62,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __memcmp_avx2_movbe) + + IFUNC_IMPL_ADD (array, i, memcmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (MOVBE) + + && HAS_CPU_FEATURE (RTM)), + + __memcmp_avx2_movbe_rtm) + + IFUNC_IMPL_ADD (array, i, memcmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (MOVBE)), + + __memcmp_evex_movbe) + IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1), + __memcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3), + @@ -64,10 +85,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX_Usable), + @@ -75,6 +96,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_ARCH_FEATURE (AVX_Usable), + __memmove_chk_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, __memmove_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memmove_chk_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, __memmove_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memmove_chk_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, __memmove_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memmove_chk_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, __memmove_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memmove_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memmove_chk, + HAS_CPU_FEATURE (SSSE3), + __memmove_chk_ssse3_back) + @@ -97,14 +132,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX_Usable), + __memmove_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, memmove, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memmove_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, memmove, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memmove_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, memmove, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memmove_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, memmove, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memmove_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memmove_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, memmove, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memmove, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memmove_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3), + __memmove_ssse3_back) + @@ -119,8 +168,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/memrchr.c. */ + IFUNC_IMPL (i, name, memrchr, + IFUNC_IMPL_ADD (array, i, memrchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __memrchr_avx2) + + IFUNC_IMPL_ADD (array, i, memrchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __memrchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, memrchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __memrchr_evex) + + + IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2)) + + #ifdef SHARED + @@ -139,10 +200,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX2_Usable), + __memset_chk_avx2_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memset_chk_avx2_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, __memset_chk, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memset_chk_avx2_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, __memset_chk, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __memset_chk_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, __memset_chk, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __memset_chk_evex_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, __memset_chk, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memset_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + @@ -164,10 +243,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX2_Usable), + __memset_avx2_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memset_avx2_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, memset, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memset_avx2_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, memset, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __memset_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, memset, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __memset_evex_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, memset, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memset, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + __memset_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memset, + HAS_ARCH_FEATURE (AVX512F_Usable), + @@ -177,22 +274,55 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/rawmemchr.c. */ + IFUNC_IMPL (i, name, rawmemchr, + IFUNC_IMPL_ADD (array, i, rawmemchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __rawmemchr_avx2) + + IFUNC_IMPL_ADD (array, i, rawmemchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __rawmemchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, rawmemchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __rawmemchr_evex) + IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/strlen.c. */ + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __strlen_avx2) + + IFUNC_IMPL_ADD (array, i, strlen, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __strlen_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strlen, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strlen_evex) + IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/strnlen.c. */ + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __strnlen_avx2) + + IFUNC_IMPL_ADD (array, i, strnlen, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __strnlen_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strnlen, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strnlen_evex) + IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2)) + + /* Support sysdeps/x86_64/multiarch/stpncpy.c. */ + @@ -201,6 +331,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __stpncpy_ssse3) + IFUNC_IMPL_ADD (array, i, stpncpy, HAS_ARCH_FEATURE (AVX2_Usable), + __stpncpy_avx2) + + IFUNC_IMPL_ADD (array, i, stpncpy, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __stpncpy_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, stpncpy, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __stpncpy_evex) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, + __stpncpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2)) + @@ -211,6 +349,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + __stpcpy_ssse3) + IFUNC_IMPL_ADD (array, i, stpcpy, HAS_ARCH_FEATURE (AVX2_Usable), + __stpcpy_avx2) + + IFUNC_IMPL_ADD (array, i, stpcpy, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __stpcpy_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, stpcpy, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __stpcpy_evex) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2)) + + @@ -245,6 +391,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strcat, + IFUNC_IMPL_ADD (array, i, strcat, HAS_ARCH_FEATURE (AVX2_Usable), + __strcat_avx2) + + IFUNC_IMPL_ADD (array, i, strcat, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strcat_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strcat, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __strcat_evex) + IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3), + __strcat_ssse3) + IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned) + @@ -253,23 +407,56 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/strchr.c. */ + IFUNC_IMPL (i, name, strchr, + IFUNC_IMPL_ADD (array, i, strchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __strchr_avx2) + + IFUNC_IMPL_ADD (array, i, strchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __strchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strchr_evex) + IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf) + IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/strchrnul.c. */ + IFUNC_IMPL (i, name, strchrnul, + IFUNC_IMPL_ADD (array, i, strchrnul, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __strchrnul_avx2) + + IFUNC_IMPL_ADD (array, i, strchrnul, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __strchrnul_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strchrnul, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strchrnul_evex) + IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2)) + + /* Support sysdeps/x86_64/multiarch/strrchr.c. */ + IFUNC_IMPL (i, name, strrchr, + IFUNC_IMPL_ADD (array, i, strrchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __strrchr_avx2) + + IFUNC_IMPL_ADD (array, i, strrchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __strrchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strrchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strrchr_evex) + IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/strcmp.c. */ + @@ -277,6 +464,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strcmp, + HAS_ARCH_FEATURE (AVX2_Usable), + __strcmp_avx2) + + IFUNC_IMPL_ADD (array, i, strcmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strcmp_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strcmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __strcmp_evex) + IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2), + __strcmp_sse42) + IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3), + @@ -288,6 +484,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strcpy, + IFUNC_IMPL_ADD (array, i, strcpy, HAS_ARCH_FEATURE (AVX2_Usable), + __strcpy_avx2) + + IFUNC_IMPL_ADD (array, i, strcpy, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strcpy_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strcpy, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __strcpy_evex) + IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3), + __strcpy_ssse3) + IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned) + @@ -331,6 +535,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strncat, + IFUNC_IMPL_ADD (array, i, strncat, HAS_ARCH_FEATURE (AVX2_Usable), + __strncat_avx2) + + IFUNC_IMPL_ADD (array, i, strncat, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strncat_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strncat, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __strncat_evex) + IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3), + __strncat_ssse3) + IFUNC_IMPL_ADD (array, i, strncat, 1, + @@ -341,6 +553,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, strncpy, + IFUNC_IMPL_ADD (array, i, strncpy, HAS_ARCH_FEATURE (AVX2_Usable), + __strncpy_avx2) + + IFUNC_IMPL_ADD (array, i, strncpy, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strncpy_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strncpy, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __strncpy_evex) + IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3), + __strncpy_ssse3) + IFUNC_IMPL_ADD (array, i, strncpy, 1, + @@ -368,29 +588,73 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/wcschr.c. */ + IFUNC_IMPL (i, name, wcschr, + IFUNC_IMPL_ADD (array, i, wcschr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcschr_avx2) + + IFUNC_IMPL_ADD (array, i, wcschr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcschr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcschr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcschr_evex) + IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */ + IFUNC_IMPL (i, name, wcsrchr, + IFUNC_IMPL_ADD (array, i, wcsrchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcsrchr_avx2) + + IFUNC_IMPL_ADD (array, i, wcsrchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcsrchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcsrchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcsrchr_evex) + IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcscmp.c. */ + IFUNC_IMPL (i, name, wcscmp, + IFUNC_IMPL_ADD (array, i, wcscmp, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcscmp_avx2) + + IFUNC_IMPL_ADD (array, i, wcscmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcscmp_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcscmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcscmp_evex) + IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */ + IFUNC_IMPL (i, name, wcsncmp, + IFUNC_IMPL_ADD (array, i, wcsncmp, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcsncmp_avx2) + + IFUNC_IMPL_ADD (array, i, wcsncmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcsncmp_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcsncmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcsncmp_evex) + IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcscpy.c. */ + @@ -402,15 +666,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/wcslen.c. */ + IFUNC_IMPL (i, name, wcslen, + IFUNC_IMPL_ADD (array, i, wcslen, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcslen_avx2) + + IFUNC_IMPL_ADD (array, i, wcslen, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcslen_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcslen, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcslen_evex) + + IFUNC_IMPL_ADD (array, i, wcslen, + + CPU_FEATURE_USABLE (SSE4_1), + + __wcslen_sse4_1) + IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2)) + + /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */ + IFUNC_IMPL (i, name, wcsnlen, + IFUNC_IMPL_ADD (array, i, wcsnlen, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wcsnlen_avx2) + + IFUNC_IMPL_ADD (array, i, wcsnlen, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wcsnlen_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wcsnlen, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wcsnlen_evex) + IFUNC_IMPL_ADD (array, i, wcsnlen, + HAS_CPU_FEATURE (SSE4_1), + __wcsnlen_sse4_1) + @@ -419,8 +708,19 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Support sysdeps/x86_64/multiarch/wmemchr.c. */ + IFUNC_IMPL (i, name, wmemchr, + IFUNC_IMPL_ADD (array, i, wmemchr, + - HAS_ARCH_FEATURE (AVX2_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2)), + __wmemchr_avx2) + + IFUNC_IMPL_ADD (array, i, wmemchr, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (BMI2) + + && HAS_CPU_FEATURE (RTM)), + + __wmemchr_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, wmemchr, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (BMI2)), + + __wmemchr_evex) + IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2)) + + /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */ + @@ -429,6 +729,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + (HAS_ARCH_FEATURE (AVX2_Usable) + && HAS_CPU_FEATURE (MOVBE)), + __wmemcmp_avx2_movbe) + + IFUNC_IMPL_ADD (array, i, wmemcmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (MOVBE) + + && HAS_CPU_FEATURE (RTM)), + + __wmemcmp_avx2_movbe_rtm) + + IFUNC_IMPL_ADD (array, i, wmemcmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable) + + && HAS_CPU_FEATURE (MOVBE)), + + __wmemcmp_evex_movbe) + IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1), + __wmemcmp_sse4_1) + IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3), + @@ -443,7 +753,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX2_Usable), + __wmemset_avx2_unaligned) + IFUNC_IMPL_ADD (array, i, wmemset, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __wmemset_avx2_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, wmemset, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __wmemset_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, wmemset, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __wmemset_avx512_unaligned)) + + #ifdef SHARED + @@ -453,10 +770,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), + @@ -464,6 +781,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), + __memcpy_chk_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memcpy_chk_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memcpy_chk_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memcpy_chk_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memcpy_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, + HAS_CPU_FEATURE (SSSE3), + __memcpy_chk_ssse3_back) + @@ -486,6 +817,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memcpy, + HAS_ARCH_FEATURE (AVX_Usable), + __memcpy_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, memcpy, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memcpy_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, memcpy, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __memcpy_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, memcpy, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memcpy_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, memcpy, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __memcpy_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), + __memcpy_ssse3_back) + IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3), + @@ -494,10 +839,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX512F_Usable), + __memcpy_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, memcpy, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __memcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned) + IFUNC_IMPL_ADD (array, i, memcpy, 1, + @@ -511,10 +856,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_chk_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_chk_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), + @@ -522,6 +867,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_ARCH_FEATURE (AVX_Usable), + __mempcpy_chk_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __mempcpy_chk_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __mempcpy_chk_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __mempcpy_chk_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __mempcpy_chk_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, __mempcpy_chk, + HAS_CPU_FEATURE (SSSE3), + __mempcpy_chk_ssse3_back) + @@ -542,10 +901,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + HAS_ARCH_FEATURE (AVX512F_Usable), + __mempcpy_avx512_no_vzeroupper) + IFUNC_IMPL_ADD (array, i, mempcpy, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_avx512_unaligned) + IFUNC_IMPL_ADD (array, i, mempcpy, + - HAS_ARCH_FEATURE (AVX512F_Usable), + + HAS_ARCH_FEATURE (AVX512VL_Usable), + __mempcpy_avx512_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX_Usable), + @@ -553,6 +912,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, mempcpy, + HAS_ARCH_FEATURE (AVX_Usable), + __mempcpy_avx_unaligned_erms) + + IFUNC_IMPL_ADD (array, i, mempcpy, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __mempcpy_avx_unaligned_rtm) + + IFUNC_IMPL_ADD (array, i, mempcpy, + + (HAS_ARCH_FEATURE (AVX_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __mempcpy_avx_unaligned_erms_rtm) + + IFUNC_IMPL_ADD (array, i, mempcpy, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __mempcpy_evex_unaligned) + + IFUNC_IMPL_ADD (array, i, mempcpy, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __mempcpy_evex_unaligned_erms) + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), + __mempcpy_ssse3_back) + IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3), + @@ -568,6 +941,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, strncmp, + HAS_ARCH_FEATURE (AVX2_Usable), + __strncmp_avx2) + + IFUNC_IMPL_ADD (array, i, strncmp, + + (HAS_ARCH_FEATURE (AVX2_Usable) + + && HAS_CPU_FEATURE (RTM)), + + __strncmp_avx2_rtm) + + IFUNC_IMPL_ADD (array, i, strncmp, + + (HAS_ARCH_FEATURE (AVX512VL_Usable) + + && HAS_ARCH_FEATURE (AVX512BW_Usable)), + + __strncmp_evex) + IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2), + __strncmp_sse42) + IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3), + @@ -582,6 +963,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + HAS_ARCH_FEATURE (AVX2_Usable), + __wmemset_chk_avx2_unaligned) + + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + + HAS_ARCH_FEATURE (AVX512VL_Usable), + + __wmemset_chk_evex_unaligned) + IFUNC_IMPL_ADD (array, i, __wmemset_chk, + HAS_ARCH_FEATURE (AVX512F_Usable), + __wmemset_chk_avx512_unaligned)) + diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h + index c14db39cf4..ebbb0c01cf 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h + +++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h + @@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + + - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) + - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_CPU_P (cpu_features, MOVBE) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + - return OPTIMIZE (avx2_movbe); + + { + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + return OPTIMIZE (evex_movbe); + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_movbe_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + return OPTIMIZE (avx2_movbe); + + } + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + return OPTIMIZE (sse4_1); + diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h + index 81673d2019..dfc5a28487 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-memmove.h + +++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h + @@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms) + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) + @@ -48,21 +56,42 @@ IFUNC_SELECTOR (void) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { + - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + - return OPTIMIZE (avx512_no_vzeroupper); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx512_unaligned_erms); + + - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + - return OPTIMIZE (avx512_unaligned_erms); + + return OPTIMIZE (avx512_unaligned); + + } + + - return OPTIMIZE (avx512_unaligned); + + return OPTIMIZE (avx512_no_vzeroupper); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + - return OPTIMIZE (avx_unaligned_erms); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (evex_unaligned_erms); + + + + return OPTIMIZE (evex_unaligned); + + } + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx_unaligned_erms_rtm); + + + + return OPTIMIZE (avx_unaligned_rtm); + + } + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx_unaligned_erms); + + - return OPTIMIZE (avx_unaligned); + + return OPTIMIZE (avx_unaligned); + + } + } + + if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) + diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h + index d690293385..48fdb24b02 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-memset.h + +++ b/sysdeps/x86_64/multiarch/ifunc-memset.h + @@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms) + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms) + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms) + + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms) + @@ -45,21 +53,44 @@ IFUNC_SELECTOR (void) + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { + - if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + - return OPTIMIZE (avx512_no_vzeroupper); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx512_unaligned_erms); + + - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + - return OPTIMIZE (avx512_unaligned_erms); + + return OPTIMIZE (avx512_unaligned); + + } + + - return OPTIMIZE (avx512_unaligned); + + return OPTIMIZE (avx512_no_vzeroupper); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)) + { + - if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + - return OPTIMIZE (avx2_unaligned_erms); + - else + - return OPTIMIZE (avx2_unaligned); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (evex_unaligned_erms); + + + + return OPTIMIZE (evex_unaligned); + + } + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx2_unaligned_erms_rtm); + + + + return OPTIMIZE (avx2_unaligned_rtm); + + } + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + { + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + + return OPTIMIZE (avx2_unaligned_erms); + + + + return OPTIMIZE (avx2_unaligned); + + } + } + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h + index ae4f451803..f38a3b7501 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h + +++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h + @@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) + attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + + - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) + - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + - return OPTIMIZE (avx2); + + { + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + return OPTIMIZE (evex); + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + return OPTIMIZE (avx2); + + } + + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load)) + return OPTIMIZE (sse2_unaligned); + diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h + new file mode 100644 + index 0000000000..564cc8cbec + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h + @@ -0,0 +1,52 @@ + +/* Common definition for ifunc selections for wcslen and wcsnlen + + All versions must be listed in ifunc-impl-list.c. + + Copyright (C) 2017-2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#include + + + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + + +static inline void * + +IFUNC_SELECTOR (void) + +{ + + const struct cpu_features* cpu_features = __get_cpu_features (); + + + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + && CPU_FEATURES_CPU_P (cpu_features, BMI2) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + + { + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable)) + + return OPTIMIZE (evex); + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + return OPTIMIZE (avx2); + + } + + + + if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1)) + + return OPTIMIZE (sse4_1); + + + + return OPTIMIZE (sse2); + +} + diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h + index 583f6310a1..0ce29a229d 100644 + --- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h + +++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h + @@ -20,6 +20,9 @@ + + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm) + + attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden; + + static inline void * + @@ -27,14 +30,21 @@ IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); + + - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) + - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + - if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + - && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + - return OPTIMIZE (avx512_unaligned); + - else + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable)) + + { + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + + return OPTIMIZE (avx512_unaligned); + + + + return OPTIMIZE (evex_unaligned); + + } + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_unaligned_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE (avx2_unaligned); + } + + diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S + new file mode 100644 + index 0000000000..87b076c7c4 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef MEMCHR + +# define MEMCHR __memchr_avx2_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "memchr-avx2.S" + diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S + index e5a9abd211..0987616a1b 100644 + --- a/sysdeps/x86_64/multiarch/memchr-avx2.S + +++ b/sysdeps/x86_64/multiarch/memchr-avx2.S + @@ -26,319 +26,407 @@ + + # ifdef USE_AS_WMEMCHR + # define VPCMPEQ vpcmpeqd + +# define VPBROADCAST vpbroadcastd + +# define CHAR_SIZE 4 + # else + # define VPCMPEQ vpcmpeqb + +# define VPBROADCAST vpbroadcastb + +# define CHAR_SIZE 1 + +# endif + + + +# ifdef USE_AS_RAWMEMCHR + +# define ERAW_PTR_REG ecx + +# define RRAW_PTR_REG rcx + +# define ALGN_PTR_REG rdi + +# else + +# define ERAW_PTR_REG edi + +# define RRAW_PTR_REG rdi + +# define ALGN_PTR_REG rcx + # endif + + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + # define VEC_SIZE 32 + +# define PAGE_SIZE 4096 + +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + + - .section .text.avx,"ax",@progbits + + .section SECTION(.text),"ax",@progbits + ENTRY (MEMCHR) + # ifndef USE_AS_RAWMEMCHR + /* Check for zero length. */ + +# ifdef __ILP32__ + + /* Clear upper bits. */ + + and %RDX_LP, %RDX_LP + +# else + test %RDX_LP, %RDX_LP + +# endif + jz L(null) + # endif + - movl %edi, %ecx + - /* Broadcast CHAR to YMM0. */ + + /* Broadcast CHAR to YMMMATCH. */ + vmovd %esi, %xmm0 + -# ifdef USE_AS_WMEMCHR + - shl $2, %RDX_LP + - vpbroadcastd %xmm0, %ymm0 + -# else + -# ifdef __ILP32__ + - /* Clear the upper 32 bits. */ + - movl %edx, %edx + -# endif + - vpbroadcastb %xmm0, %ymm0 + -# endif + + VPBROADCAST %xmm0, %ymm0 + /* Check if we may cross page boundary with one vector load. */ + - andl $(2 * VEC_SIZE - 1), %ecx + - cmpl $VEC_SIZE, %ecx + - ja L(cros_page_boundary) + + movl %edi, %eax + + andl $(PAGE_SIZE - 1), %eax + + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + + ja L(cross_page_boundary) + + /* Check the first VEC_SIZE bytes. */ + - VPCMPEQ (%rdi), %ymm0, %ymm1 + + VPCMPEQ (%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + - testl %eax, %eax + - + # ifndef USE_AS_RAWMEMCHR + - jnz L(first_vec_x0_check) + - /* Adjust length and check the end of data. */ + - subq $VEC_SIZE, %rdx + - jbe L(zero) + -# else + - jnz L(first_vec_x0) + + /* If length < CHAR_PER_VEC handle special. */ + + cmpq $CHAR_PER_VEC, %rdx + + jbe L(first_vec_x0) + # endif + - + - /* Align data for aligned loads in the loop. */ + - addq $VEC_SIZE, %rdi + - andl $(VEC_SIZE - 1), %ecx + - andq $-VEC_SIZE, %rdi + + testl %eax, %eax + + jz L(aligned_more) + + tzcntl %eax, %eax + + addq %rdi, %rax + + VZEROUPPER_RETURN + + # ifndef USE_AS_RAWMEMCHR + - /* Adjust length. */ + - addq %rcx, %rdx + + .p2align 5 + +L(first_vec_x0): + + /* Check if first match was before length. */ + + tzcntl %eax, %eax + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply length by 4 to get byte count. */ + + sall $2, %edx + +# endif + + xorl %ecx, %ecx + + cmpl %eax, %edx + + leaq (%rdi, %rax), %rax + + cmovle %rcx, %rax + + VZEROUPPER_RETURN + + - subq $(VEC_SIZE * 4), %rdx + - jbe L(last_4x_vec_or_less) + +L(null): + + xorl %eax, %eax + + ret + # endif + - jmp L(more_4x_vec) + - + .p2align 4 + -L(cros_page_boundary): + - andl $(VEC_SIZE - 1), %ecx + - andq $-VEC_SIZE, %rdi + - VPCMPEQ (%rdi), %ymm0, %ymm1 + +L(cross_page_boundary): + + /* Save pointer before aligning as its original value is + + necessary for computer return address if byte is found or + + adjusting length if it is not and this is memchr. */ + + movq %rdi, %rcx + + /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr + + and rdi for rawmemchr. */ + + orq $(VEC_SIZE - 1), %ALGN_PTR_REG + + VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + +# ifndef USE_AS_RAWMEMCHR + + /* Calculate length until end of page (length checked for a + + match). */ + + leaq 1(%ALGN_PTR_REG), %rsi + + subq %RRAW_PTR_REG, %rsi + +# ifdef USE_AS_WMEMCHR + + /* NB: Divide bytes by 4 to get wchar_t count. */ + + shrl $2, %esi + +# endif + +# endif + /* Remove the leading bytes. */ + - sarl %cl, %eax + - testl %eax, %eax + - jz L(aligned_more) + - tzcntl %eax, %eax + + sarxl %ERAW_PTR_REG, %eax, %eax + # ifndef USE_AS_RAWMEMCHR + /* Check the end of data. */ + - cmpq %rax, %rdx + - jbe L(zero) + + cmpq %rsi, %rdx + + jbe L(first_vec_x0) + # endif + + testl %eax, %eax + + jz L(cross_page_continue) + + tzcntl %eax, %eax + + addq %RRAW_PTR_REG, %rax + +L(return_vzeroupper): + + ZERO_UPPER_VEC_REGISTERS_RETURN + + + + .p2align 4 + +L(first_vec_x1): + + tzcntl %eax, %eax + + incq %rdi + addq %rdi, %rax + - addq %rcx, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + -L(aligned_more): + -# ifndef USE_AS_RAWMEMCHR + - /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)" + - instead of "(rdx + rcx) - VEC_SIZE" to void possible addition + - overflow. */ + - negq %rcx + - addq $VEC_SIZE, %rcx + +L(first_vec_x2): + + tzcntl %eax, %eax + + addq $(VEC_SIZE + 1), %rdi + + addq %rdi, %rax + + VZEROUPPER_RETURN + + - /* Check the end of data. */ + - subq %rcx, %rdx + - jbe L(zero) + -# endif + + .p2align 4 + +L(first_vec_x3): + + tzcntl %eax, %eax + + addq $(VEC_SIZE * 2 + 1), %rdi + + addq %rdi, %rax + + VZEROUPPER_RETURN + + - addq $VEC_SIZE, %rdi + + -# ifndef USE_AS_RAWMEMCHR + - subq $(VEC_SIZE * 4), %rdx + - jbe L(last_4x_vec_or_less) + -# endif + + .p2align 4 + +L(first_vec_x4): + + tzcntl %eax, %eax + + addq $(VEC_SIZE * 3 + 1), %rdi + + addq %rdi, %rax + + VZEROUPPER_RETURN + + -L(more_4x_vec): + + .p2align 4 + +L(aligned_more): + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + since data is only aligned to VEC_SIZE. */ + - VPCMPEQ (%rdi), %ymm0, %ymm1 + - vpmovmskb %ymm1, %eax + - testl %eax, %eax + - jnz L(first_vec_x0) + + - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + +# ifndef USE_AS_RAWMEMCHR + +L(cross_page_continue): + + /* Align data to VEC_SIZE - 1. */ + + xorl %ecx, %ecx + + subl %edi, %ecx + + orq $(VEC_SIZE - 1), %rdi + + /* esi is for adjusting length to see if near the end. */ + + leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi + +# ifdef USE_AS_WMEMCHR + + /* NB: Divide bytes by 4 to get the wchar_t count. */ + + sarl $2, %esi + +# endif + +# else + + orq $(VEC_SIZE - 1), %rdi + +L(cross_page_continue): + +# endif + + /* Load first VEC regardless. */ + + VPCMPEQ 1(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + +# ifndef USE_AS_RAWMEMCHR + + /* Adjust length. If near end handle specially. */ + + subq %rsi, %rdx + + jbe L(last_4x_vec_or_less) + +# endif + testl %eax, %eax + jnz L(first_vec_x1) + + - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x2) + + - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + testl %eax, %eax + jnz L(first_vec_x3) + + - addq $(VEC_SIZE * 4), %rdi + - + -# ifndef USE_AS_RAWMEMCHR + - subq $(VEC_SIZE * 4), %rdx + - jbe L(last_4x_vec_or_less) + -# endif + - + - /* Align data to 4 * VEC_SIZE. */ + - movq %rdi, %rcx + - andl $(4 * VEC_SIZE - 1), %ecx + - andq $-(4 * VEC_SIZE), %rdi + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + + vpmovmskb %ymm1, %eax + + testl %eax, %eax + + jnz L(first_vec_x4) + + # ifndef USE_AS_RAWMEMCHR + - /* Adjust length. */ + + /* Check if at last VEC_SIZE * 4 length. */ + + subq $(CHAR_PER_VEC * 4), %rdx + + jbe L(last_4x_vec_or_less_cmpeq) + + /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust + + length. */ + + incq %rdi + + movl %edi, %ecx + + orq $(VEC_SIZE * 4 - 1), %rdi + + andl $(VEC_SIZE * 4 - 1), %ecx + +# ifdef USE_AS_WMEMCHR + + /* NB: Divide bytes by 4 to get the wchar_t count. */ + + sarl $2, %ecx + +# endif + addq %rcx, %rdx + +# else + + /* Align data to VEC_SIZE * 4 - 1 for loop. */ + + incq %rdi + + orq $(VEC_SIZE * 4 - 1), %rdi + # endif + + + /* Compare 4 * VEC at a time forward. */ + .p2align 4 + L(loop_4x_vec): + - /* Compare 4 * VEC at a time forward. */ + - VPCMPEQ (%rdi), %ymm0, %ymm1 + - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2 + - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3 + - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4 + - + + VPCMPEQ 1(%rdi), %ymm0, %ymm1 + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2 + + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3 + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4 + vpor %ymm1, %ymm2, %ymm5 + vpor %ymm3, %ymm4, %ymm6 + vpor %ymm5, %ymm6, %ymm5 + + - vpmovmskb %ymm5, %eax + - testl %eax, %eax + - jnz L(4x_vec_end) + - + - addq $(VEC_SIZE * 4), %rdi + - + + vpmovmskb %ymm5, %ecx + # ifdef USE_AS_RAWMEMCHR + - jmp L(loop_4x_vec) + + subq $-(VEC_SIZE * 4), %rdi + + testl %ecx, %ecx + + jz L(loop_4x_vec) + # else + - subq $(VEC_SIZE * 4), %rdx + - ja L(loop_4x_vec) + + testl %ecx, %ecx + + jnz L(loop_4x_vec_end) + + -L(last_4x_vec_or_less): + - /* Less than 4 * VEC and aligned to VEC_SIZE. */ + - addl $(VEC_SIZE * 2), %edx + - jle L(last_2x_vec) + + subq $-(VEC_SIZE * 4), %rdi + + - VPCMPEQ (%rdi), %ymm0, %ymm1 + - vpmovmskb %ymm1, %eax + - testl %eax, %eax + - jnz L(first_vec_x0) + + subq $(CHAR_PER_VEC * 4), %rdx + + ja L(loop_4x_vec) + + - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + + /* Fall through into less than 4 remaining vectors of length + + case. */ + + VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + + .p2align 4 + +L(last_4x_vec_or_less): + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply length by 4 to get byte count. */ + + sall $2, %edx + +# endif + + /* Check if first VEC contained match. */ + testl %eax, %eax + - jnz L(first_vec_x1) + + jnz L(first_vec_x1_check) + + - VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1 + - vpmovmskb %ymm1, %eax + - testl %eax, %eax + + /* If remaining length > VEC_SIZE * 2. */ + + addl $(VEC_SIZE * 2), %edx + + jg L(last_4x_vec) + + - jnz L(first_vec_x2_check) + - subl $VEC_SIZE, %edx + - jle L(zero) + +L(last_2x_vec): + + /* If remaining length < VEC_SIZE. */ + + addl $VEC_SIZE, %edx + + jle L(zero_end) + + - VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1 + + /* Check VEC2 and compare any match with remaining length. */ + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + vpmovmskb %ymm1, %eax + - testl %eax, %eax + - + - jnz L(first_vec_x3_check) + - xorl %eax, %eax + - VZEROUPPER + - ret + + tzcntl %eax, %eax + + cmpl %eax, %edx + + jbe L(set_zero_end) + + addq $(VEC_SIZE + 1), %rdi + + addq %rdi, %rax + +L(zero_end): + + VZEROUPPER_RETURN + + .p2align 4 + -L(last_2x_vec): + - addl $(VEC_SIZE * 2), %edx + - VPCMPEQ (%rdi), %ymm0, %ymm1 + +L(loop_4x_vec_end): + +# endif + + /* rawmemchr will fall through into this if match was found in + + loop. */ + + + vpmovmskb %ymm1, %eax + testl %eax, %eax + + jnz L(last_vec_x1_return) + + - jnz L(first_vec_x0_check) + - subl $VEC_SIZE, %edx + - jle L(zero) + - + - VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1 + - vpmovmskb %ymm1, %eax + + vpmovmskb %ymm2, %eax + testl %eax, %eax + - jnz L(first_vec_x1_check) + - xorl %eax, %eax + - VZEROUPPER + - ret + + jnz L(last_vec_x2_return) + + - .p2align 4 + -L(first_vec_x0_check): + - tzcntl %eax, %eax + - /* Check the end of data. */ + - cmpq %rax, %rdx + - jbe L(zero) + + vpmovmskb %ymm3, %eax + + /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */ + + salq $32, %rcx + + orq %rcx, %rax + + tzcntq %rax, %rax + +# ifdef USE_AS_RAWMEMCHR + + subq $(VEC_SIZE * 2 - 1), %rdi + +# else + + subq $-(VEC_SIZE * 2 + 1), %rdi + +# endif + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + +# ifndef USE_AS_RAWMEMCHR + + .p2align 4 + L(first_vec_x1_check): + tzcntl %eax, %eax + - /* Check the end of data. */ + - cmpq %rax, %rdx + - jbe L(zero) + - addq $VEC_SIZE, %rax + + /* Adjust length. */ + + subl $-(VEC_SIZE * 4), %edx + + /* Check if match within remaining length. */ + + cmpl %eax, %edx + + jbe L(set_zero_end) + + incq %rdi + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + +L(set_zero_end): + + xorl %eax, %eax + + VZEROUPPER_RETURN + +# endif + + .p2align 4 + -L(first_vec_x2_check): + +L(last_vec_x1_return): + tzcntl %eax, %eax + - /* Check the end of data. */ + - cmpq %rax, %rdx + - jbe L(zero) + - addq $(VEC_SIZE * 2), %rax + +# ifdef USE_AS_RAWMEMCHR + + subq $(VEC_SIZE * 4 - 1), %rdi + +# else + + incq %rdi + +# endif + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + -L(first_vec_x3_check): + +L(last_vec_x2_return): + tzcntl %eax, %eax + - /* Check the end of data. */ + - cmpq %rax, %rdx + - jbe L(zero) + - addq $(VEC_SIZE * 3), %rax + +# ifdef USE_AS_RAWMEMCHR + + subq $(VEC_SIZE * 3 - 1), %rdi + +# else + + subq $-(VEC_SIZE + 1), %rdi + +# endif + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + +# ifndef USE_AS_RAWMEMCHR + .p2align 4 + -L(zero): + - VZEROUPPER + -L(null): + - xorl %eax, %eax + - ret + -# endif + +L(last_4x_vec_or_less_cmpeq): + + VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1 + + vpmovmskb %ymm1, %eax + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply length by 4 to get byte count. */ + + sall $2, %edx + +# endif + + subq $-(VEC_SIZE * 4), %rdi + + /* Check first VEC regardless. */ + + testl %eax, %eax + + jnz L(first_vec_x1_check) + + + /* If remaining length <= CHAR_PER_VEC * 2. */ + + addl $(VEC_SIZE * 2), %edx + + jle L(last_2x_vec) + .p2align 4 + -L(first_vec_x0): + - tzcntl %eax, %eax + - addq %rdi, %rax + - VZEROUPPER + - ret + +L(last_4x_vec): + + VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1 + + vpmovmskb %ymm1, %eax + + testl %eax, %eax + + jnz L(last_vec_x2_return) + + - .p2align 4 + -L(first_vec_x1): + - tzcntl %eax, %eax + - addq $VEC_SIZE, %rax + - addq %rdi, %rax + - VZEROUPPER + - ret + + VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1 + + vpmovmskb %ymm1, %eax + + - .p2align 4 + -L(first_vec_x2): + + /* Create mask for possible matches within remaining length. */ + + movq $-1, %rcx + + bzhiq %rdx, %rcx, %rcx + + + + /* Test matches in data against length match. */ + + andl %ecx, %eax + + jnz L(last_vec_x3) + + + + /* if remaining length <= VEC_SIZE * 3 (Note this is after + + remaining length was found to be > VEC_SIZE * 2. */ + + subl $VEC_SIZE, %edx + + jbe L(zero_end2) + + + + VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1 + + vpmovmskb %ymm1, %eax + + /* Shift remaining length mask for last VEC. */ + + shrq $32, %rcx + + andl %ecx, %eax + + jz L(zero_end2) + tzcntl %eax, %eax + - addq $(VEC_SIZE * 2), %rax + + addq $(VEC_SIZE * 3 + 1), %rdi + addq %rdi, %rax + - VZEROUPPER + - ret + +L(zero_end2): + + VZEROUPPER_RETURN + + .p2align 4 + -L(4x_vec_end): + - vpmovmskb %ymm1, %eax + - testl %eax, %eax + - jnz L(first_vec_x0) + - vpmovmskb %ymm2, %eax + - testl %eax, %eax + - jnz L(first_vec_x1) + - vpmovmskb %ymm3, %eax + - testl %eax, %eax + - jnz L(first_vec_x2) + - vpmovmskb %ymm4, %eax + - testl %eax, %eax + -L(first_vec_x3): + +L(last_vec_x3): + tzcntl %eax, %eax + - addq $(VEC_SIZE * 3), %rax + + subq $-(VEC_SIZE * 2 + 1), %rdi + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + +# endif + + END (MEMCHR) + #endif + diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S + new file mode 100644 + index 0000000000..f3fdad4fda + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memchr-evex.S + @@ -0,0 +1,478 @@ + +/* memchr/wmemchr optimized with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#if IS_IN (libc) + + + +# include + + + +# ifndef MEMCHR + +# define MEMCHR __memchr_evex + +# endif + + + +# ifdef USE_AS_WMEMCHR + +# define VPBROADCAST vpbroadcastd + +# define VPMINU vpminud + +# define VPCMP vpcmpd + +# define VPCMPEQ vpcmpeqd + +# define CHAR_SIZE 4 + +# else + +# define VPBROADCAST vpbroadcastb + +# define VPMINU vpminub + +# define VPCMP vpcmpb + +# define VPCMPEQ vpcmpeqb + +# define CHAR_SIZE 1 + +# endif + + + +# ifdef USE_AS_RAWMEMCHR + +# define RAW_PTR_REG rcx + +# define ALGN_PTR_REG rdi + +# else + +# define RAW_PTR_REG rdi + +# define ALGN_PTR_REG rcx + +# endif + + + +# define XMMZERO xmm23 + +# define YMMZERO ymm23 + +# define XMMMATCH xmm16 + +# define YMMMATCH ymm16 + +# define YMM1 ymm17 + +# define YMM2 ymm18 + +# define YMM3 ymm19 + +# define YMM4 ymm20 + +# define YMM5 ymm21 + +# define YMM6 ymm22 + + + +# define VEC_SIZE 32 + +# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) + +# define PAGE_SIZE 4096 + + + + .section .text.evex,"ax",@progbits + +ENTRY (MEMCHR) + +# ifndef USE_AS_RAWMEMCHR + + /* Check for zero length. */ + + test %RDX_LP, %RDX_LP + + jz L(zero) + + + +# ifdef __ILP32__ + + /* Clear the upper 32 bits. */ + + movl %edx, %edx + +# endif + +# endif + + /* Broadcast CHAR to YMMMATCH. */ + + VPBROADCAST %esi, %YMMMATCH + + /* Check if we may cross page boundary with one vector load. */ + + movl %edi, %eax + + andl $(PAGE_SIZE - 1), %eax + + cmpl $(PAGE_SIZE - VEC_SIZE), %eax + + ja L(cross_page_boundary) + + + + /* Check the first VEC_SIZE bytes. */ + + VPCMP $0, (%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + +# ifndef USE_AS_RAWMEMCHR + + /* If length < CHAR_PER_VEC handle special. */ + + cmpq $CHAR_PER_VEC, %rdx + + jbe L(first_vec_x0) + +# endif + + testl %eax, %eax + + jz L(aligned_more) + + tzcntl %eax, %eax + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (%rdi, %rax, CHAR_SIZE), %rax + +# else + + addq %rdi, %rax + +# endif + + ret + + + +# ifndef USE_AS_RAWMEMCHR + +L(zero): + + xorl %eax, %eax + + ret + + + + .p2align 5 + +L(first_vec_x0): + + /* Check if first match was before length. */ + + tzcntl %eax, %eax + + xorl %ecx, %ecx + + cmpl %eax, %edx + + leaq (%rdi, %rax, CHAR_SIZE), %rax + + cmovle %rcx, %rax + + ret + +# else + + /* NB: first_vec_x0 is 17 bytes which will leave + + cross_page_boundary (which is relatively cold) close enough + + to ideal alignment. So only realign L(cross_page_boundary) if + + rawmemchr. */ + + .p2align 4 + +# endif + +L(cross_page_boundary): + + /* Save pointer before aligning as its original value is + + necessary for computer return address if byte is found or + + adjusting length if it is not and this is memchr. */ + + movq %rdi, %rcx + + /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi + + for rawmemchr. */ + + andq $-VEC_SIZE, %ALGN_PTR_REG + + VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0 + + kmovd %k0, %r8d + +# ifdef USE_AS_WMEMCHR + + /* NB: Divide shift count by 4 since each bit in K0 represent 4 + + bytes. */ + + sarl $2, %eax + +# endif + +# ifndef USE_AS_RAWMEMCHR + + movl $(PAGE_SIZE / CHAR_SIZE), %esi + + subl %eax, %esi + +# endif + +# ifdef USE_AS_WMEMCHR + + andl $(CHAR_PER_VEC - 1), %eax + +# endif + + /* Remove the leading bytes. */ + + sarxl %eax, %r8d, %eax + +# ifndef USE_AS_RAWMEMCHR + + /* Check the end of data. */ + + cmpq %rsi, %rdx + + jbe L(first_vec_x0) + +# endif + + testl %eax, %eax + + jz L(cross_page_continue) + + tzcntl %eax, %eax + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax + +# else + + addq %RAW_PTR_REG, %rax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x1): + + tzcntl %eax, %eax + + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + + ret + + + + .p2align 4 + +L(first_vec_x2): + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + + ret + + + + .p2align 4 + +L(first_vec_x3): + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + + ret + + + + .p2align 4 + +L(first_vec_x4): + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + + ret + + + + .p2align 5 + +L(aligned_more): + + /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time + + since data is only aligned to VEC_SIZE. */ + + + +# ifndef USE_AS_RAWMEMCHR + + /* Align data to VEC_SIZE. */ + +L(cross_page_continue): + + xorl %ecx, %ecx + + subl %edi, %ecx + + andq $-VEC_SIZE, %rdi + + /* esi is for adjusting length to see if near the end. */ + + leal (VEC_SIZE * 5)(%rdi, %rcx), %esi + +# ifdef USE_AS_WMEMCHR + + /* NB: Divide bytes by 4 to get the wchar_t count. */ + + sarl $2, %esi + +# endif + +# else + + andq $-VEC_SIZE, %rdi + +L(cross_page_continue): + +# endif + + /* Load first VEC regardless. */ + + VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + +# ifndef USE_AS_RAWMEMCHR + + /* Adjust length. If near end handle specially. */ + + subq %rsi, %rdx + + jbe L(last_4x_vec_or_less) + +# endif + + testl %eax, %eax + + jnz L(first_vec_x1) + + + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x2) + + + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x3) + + + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x4) + + + + + +# ifndef USE_AS_RAWMEMCHR + + /* Check if at last CHAR_PER_VEC * 4 length. */ + + subq $(CHAR_PER_VEC * 4), %rdx + + jbe L(last_4x_vec_or_less_cmpeq) + + addq $VEC_SIZE, %rdi + + + + /* Align data to VEC_SIZE * 4 for the loop and readjust length. + + */ + +# ifdef USE_AS_WMEMCHR + + movl %edi, %ecx + + andq $-(4 * VEC_SIZE), %rdi + + andl $(VEC_SIZE * 4 - 1), %ecx + + /* NB: Divide bytes by 4 to get the wchar_t count. */ + + sarl $2, %ecx + + addq %rcx, %rdx + +# else + + addq %rdi, %rdx + + andq $-(4 * VEC_SIZE), %rdi + + subq %rdi, %rdx + +# endif + +# else + + addq $VEC_SIZE, %rdi + + andq $-(4 * VEC_SIZE), %rdi + +# endif + + + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + + + /* Compare 4 * VEC at a time forward. */ + + .p2align 4 + +L(loop_4x_vec): + + /* It would be possible to save some instructions using 4x VPCMP + + but bottleneck on port 5 makes it not woth it. */ + + VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1 + + /* xor will set bytes match esi to zero. */ + + vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2 + + vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3 + + VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3 + + /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */ + + VPMINU %YMM2, %YMM3, %YMM3{%k1}{z} + + VPCMP $0, %YMM3, %YMMZERO, %k2 + +# ifdef USE_AS_RAWMEMCHR + + subq $-(VEC_SIZE * 4), %rdi + + kortestd %k2, %k3 + + jz L(loop_4x_vec) + +# else + + kortestd %k2, %k3 + + jnz L(loop_4x_vec_end) + + + + subq $-(VEC_SIZE * 4), %rdi + + + + subq $(CHAR_PER_VEC * 4), %rdx + + ja L(loop_4x_vec) + + + + /* Fall through into less than 4 remaining vectors of length case. + + */ + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + addq $(VEC_SIZE * 3), %rdi + + .p2align 4 + +L(last_4x_vec_or_less): + + /* Check if first VEC contained match. */ + + testl %eax, %eax + + jnz L(first_vec_x1_check) + + + + /* If remaining length > CHAR_PER_VEC * 2. */ + + addl $(CHAR_PER_VEC * 2), %edx + + jg L(last_4x_vec) + + + +L(last_2x_vec): + + /* If remaining length < CHAR_PER_VEC. */ + + addl $CHAR_PER_VEC, %edx + + jle L(zero_end) + + + + /* Check VEC2 and compare any match with remaining length. */ + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + tzcntl %eax, %eax + + cmpl %eax, %edx + + jbe L(set_zero_end) + + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + +L(zero_end): + + ret + + + + + + .p2align 4 + +L(first_vec_x1_check): + + tzcntl %eax, %eax + + /* Adjust length. */ + + subl $-(CHAR_PER_VEC * 4), %edx + + /* Check if match within remaining length. */ + + cmpl %eax, %edx + + jbe L(set_zero_end) + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + + ret + +L(set_zero_end): + + xorl %eax, %eax + + ret + + + + .p2align 4 + +L(loop_4x_vec_end): + +# endif + + /* rawmemchr will fall through into this if match was found in + + loop. */ + + + + /* k1 has not of matches with VEC1. */ + + kmovd %k1, %eax + +# ifdef USE_AS_WMEMCHR + + subl $((1 << CHAR_PER_VEC) - 1), %eax + +# else + + incl %eax + +# endif + + jnz L(last_vec_x1_return) + + + + VPCMP $0, %YMM2, %YMMZERO, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(last_vec_x2_return) + + + + kmovd %k2, %eax + + testl %eax, %eax + + jnz L(last_vec_x3_return) + + + + kmovd %k3, %eax + + tzcntl %eax, %eax + +# ifdef USE_AS_RAWMEMCHR + + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + +# else + + leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax + +# endif + + ret + + + + .p2align 4 + +L(last_vec_x1_return): + + tzcntl %eax, %eax + +# ifdef USE_AS_RAWMEMCHR + +# ifdef USE_AS_WMEMCHR + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (%rdi, %rax, CHAR_SIZE), %rax + +# else + + addq %rdi, %rax + +# endif + +# else + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + +# endif + + ret + + + + .p2align 4 + +L(last_vec_x2_return): + + tzcntl %eax, %eax + +# ifdef USE_AS_RAWMEMCHR + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax + +# else + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax + +# endif + + ret + + + + .p2align 4 + +L(last_vec_x3_return): + + tzcntl %eax, %eax + +# ifdef USE_AS_RAWMEMCHR + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + +# else + + /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */ + + leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax + +# endif + + ret + + + + + +# ifndef USE_AS_RAWMEMCHR + +L(last_4x_vec_or_less_cmpeq): + + VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + subq $-(VEC_SIZE * 4), %rdi + + /* Check first VEC regardless. */ + + testl %eax, %eax + + jnz L(first_vec_x1_check) + + + + /* If remaining length <= CHAR_PER_VEC * 2. */ + + addl $(CHAR_PER_VEC * 2), %edx + + jle L(last_2x_vec) + + + + .p2align 4 + +L(last_4x_vec): + + VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(last_vec_x2) + + + + + + VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + /* Create mask for possible matches within remaining length. */ + +# ifdef USE_AS_WMEMCHR + + movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx + + bzhil %edx, %ecx, %ecx + +# else + + movq $-1, %rcx + + bzhiq %rdx, %rcx, %rcx + +# endif + + /* Test matches in data against length match. */ + + andl %ecx, %eax + + jnz L(last_vec_x3) + + + + /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after + + remaining length was found to be > CHAR_PER_VEC * 2. */ + + subl $CHAR_PER_VEC, %edx + + jbe L(zero_end2) + + + + + + VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0 + + kmovd %k0, %eax + + /* Shift remaining length mask for last VEC. */ + +# ifdef USE_AS_WMEMCHR + + shrl $CHAR_PER_VEC, %ecx + +# else + + shrq $CHAR_PER_VEC, %rcx + +# endif + + andl %ecx, %eax + + jz L(zero_end2) + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax + +L(zero_end2): + + ret + + + +L(last_vec_x2): + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax + + ret + + + + .p2align 4 + +L(last_vec_x3): + + tzcntl %eax, %eax + + leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax + + ret + +# endif + + + +END (MEMCHR) + +#endif + diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S + new file mode 100644 + index 0000000000..cf4eff5d4a + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef MEMCMP + +# define MEMCMP __memcmp_avx2_movbe_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "memcmp-avx2-movbe.S" + diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S + index 67fc575b59..87f9478eaf 100644 + --- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S + +++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S + @@ -47,6 +47,10 @@ + # define VZEROUPPER vzeroupper + # endif + + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + # define VEC_SIZE 32 + # define VEC_MASK ((1 << VEC_SIZE) - 1) + + @@ -55,7 +59,7 @@ + memcmp has to use UNSIGNED comparison for elemnts. + */ + + - .section .text.avx,"ax",@progbits + + .section SECTION(.text),"ax",@progbits + ENTRY (MEMCMP) + # ifdef USE_AS_WMEMCMP + shl $2, %RDX_LP + @@ -123,8 +127,8 @@ ENTRY (MEMCMP) + vptest %ymm0, %ymm5 + jnc L(4x_vec_end) + xorl %eax, %eax + - VZEROUPPER + - ret + +L(return_vzeroupper): + + ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(last_2x_vec): + @@ -144,8 +148,7 @@ L(last_vec): + vpmovmskb %ymm2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(first_vec): + @@ -164,8 +167,7 @@ L(wmemcmp_return): + movzbl (%rsi, %rcx), %edx + sub %edx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + # ifdef USE_AS_WMEMCMP + .p2align 4 + @@ -367,8 +369,7 @@ L(last_4x_vec): + vpmovmskb %ymm2, %eax + subl $VEC_MASK, %eax + jnz L(first_vec) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(4x_vec_end): + @@ -394,8 +395,7 @@ L(4x_vec_end): + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx + sub %edx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x1): + @@ -410,8 +410,7 @@ L(first_vec_x1): + movzbl VEC_SIZE(%rsi, %rcx), %edx + sub %edx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(first_vec_x2): + @@ -426,7 +425,6 @@ L(first_vec_x2): + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx + sub %edx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + END (MEMCMP) + #endif + diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S + new file mode 100644 + index 0000000000..9c093972e1 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S + @@ -0,0 +1,440 @@ + +/* memcmp/wmemcmp optimized with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#if IS_IN (libc) + + + +/* memcmp/wmemcmp is implemented as: + + 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap + + to avoid branches. + + 2. Use overlapping compare to avoid branch. + + 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8 + + bytes for wmemcmp. + + 4. If size is 8 * VEC_SIZE or less, unroll the loop. + + 5. Compare 4 * VEC_SIZE at a time with the aligned first memory + + area. + + 6. Use 2 vector compares when size is 2 * VEC_SIZE or less. + + 7. Use 4 vector compares when size is 4 * VEC_SIZE or less. + + 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */ + + + +# include + + + +# ifndef MEMCMP + +# define MEMCMP __memcmp_evex_movbe + +# endif + + + +# define VMOVU vmovdqu64 + + + +# ifdef USE_AS_WMEMCMP + +# define VPCMPEQ vpcmpeqd + +# else + +# define VPCMPEQ vpcmpeqb + +# endif + + + +# define XMM1 xmm17 + +# define XMM2 xmm18 + +# define YMM1 ymm17 + +# define YMM2 ymm18 + +# define YMM3 ymm19 + +# define YMM4 ymm20 + +# define YMM5 ymm21 + +# define YMM6 ymm22 + + + +# define VEC_SIZE 32 + +# ifdef USE_AS_WMEMCMP + +# define VEC_MASK 0xff + +# define XMM_MASK 0xf + +# else + +# define VEC_MASK 0xffffffff + +# define XMM_MASK 0xffff + +# endif + + + +/* Warning! + + wmemcmp has to use SIGNED comparison for elements. + + memcmp has to use UNSIGNED comparison for elemnts. + +*/ + + + + .section .text.evex,"ax",@progbits + +ENTRY (MEMCMP) + +# ifdef USE_AS_WMEMCMP + + shl $2, %RDX_LP + +# elif defined __ILP32__ + + /* Clear the upper 32 bits. */ + + movl %edx, %edx + +# endif + + cmp $VEC_SIZE, %RDX_LP + + jb L(less_vec) + + + + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k1 + + kmovd %k1, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + + cmpq $(VEC_SIZE * 2), %rdx + + jbe L(last_vec) + + + + /* More than 2 * VEC. */ + + cmpq $(VEC_SIZE * 8), %rdx + + ja L(more_8x_vec) + + cmpq $(VEC_SIZE * 4), %rdx + + jb L(last_4x_vec) + + + + /* From 4 * VEC to 8 * VEC, inclusively. */ + + VMOVU (%rsi), %YMM1 + + VPCMPEQ (%rdi), %YMM1, %k1 + + + + VMOVU VEC_SIZE(%rsi), %YMM2 + + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + + + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + + + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + + + + kandd %k1, %k2, %k5 + + kandd %k3, %k4, %k6 + + kandd %k5, %k6, %k6 + + + + kmovd %k6, %eax + + cmpl $VEC_MASK, %eax + + jne L(4x_vec_end) + + + + leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi + + leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi + + VMOVU (%rsi), %YMM1 + + VPCMPEQ (%rdi), %YMM1, %k1 + + + + VMOVU VEC_SIZE(%rsi), %YMM2 + + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + + kandd %k1, %k2, %k5 + + + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + + kandd %k3, %k5, %k5 + + + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + + kandd %k4, %k5, %k5 + + + + kmovd %k5, %eax + + cmpl $VEC_MASK, %eax + + jne L(4x_vec_end) + + xorl %eax, %eax + + ret + + + + .p2align 4 + +L(last_2x_vec): + + /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */ + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + +L(last_vec): + + /* Use overlapping loads to avoid branches. */ + + leaq -VEC_SIZE(%rdi, %rdx), %rdi + + leaq -VEC_SIZE(%rsi, %rdx), %rsi + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + ret + + + + .p2align 4 + +L(first_vec): + + /* A byte or int32 is different within 16 or 32 bytes. */ + + tzcntl %eax, %ecx + +# ifdef USE_AS_WMEMCMP + + xorl %eax, %eax + + movl (%rdi, %rcx, 4), %edx + + cmpl (%rsi, %rcx, 4), %edx + +L(wmemcmp_return): + + setl %al + + negl %eax + + orl $1, %eax + +# else + + movzbl (%rdi, %rcx), %eax + + movzbl (%rsi, %rcx), %edx + + sub %edx, %eax + +# endif + + ret + + + +# ifdef USE_AS_WMEMCMP + + .p2align 4 + +L(4): + + xorl %eax, %eax + + movl (%rdi), %edx + + cmpl (%rsi), %edx + + jne L(wmemcmp_return) + + ret + +# else + + .p2align 4 + +L(between_4_7): + + /* Load as big endian with overlapping movbe to avoid branches. */ + + movbe (%rdi), %eax + + movbe (%rsi), %ecx + + shlq $32, %rax + + shlq $32, %rcx + + movbe -4(%rdi, %rdx), %edi + + movbe -4(%rsi, %rdx), %esi + + orq %rdi, %rax + + orq %rsi, %rcx + + subq %rcx, %rax + + je L(exit) + + sbbl %eax, %eax + + orl $1, %eax + + ret + + + + .p2align 4 + +L(exit): + + ret + + + + .p2align 4 + +L(between_2_3): + + /* Load as big endian to avoid branches. */ + + movzwl (%rdi), %eax + + movzwl (%rsi), %ecx + + shll $8, %eax + + shll $8, %ecx + + bswap %eax + + bswap %ecx + + movb -1(%rdi, %rdx), %al + + movb -1(%rsi, %rdx), %cl + + /* Subtraction is okay because the upper 8 bits are zero. */ + + subl %ecx, %eax + + ret + + + + .p2align 4 + +L(1): + + movzbl (%rdi), %eax + + movzbl (%rsi), %ecx + + subl %ecx, %eax + + ret + +# endif + + + + .p2align 4 + +L(zero): + + xorl %eax, %eax + + ret + + + + .p2align 4 + +L(less_vec): + +# ifdef USE_AS_WMEMCMP + + /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */ + + cmpb $4, %dl + + je L(4) + + jb L(zero) + +# else + + cmpb $1, %dl + + je L(1) + + jb L(zero) + + cmpb $4, %dl + + jb L(between_2_3) + + cmpb $8, %dl + + jb L(between_4_7) + +# endif + + cmpb $16, %dl + + jae L(between_16_31) + + /* It is between 8 and 15 bytes. */ + + vmovq (%rdi), %XMM1 + + vmovq (%rsi), %XMM2 + + VPCMPEQ %XMM1, %XMM2, %k2 + + kmovw %k2, %eax + + subl $XMM_MASK, %eax + + jnz L(first_vec) + + /* Use overlapping loads to avoid branches. */ + + leaq -8(%rdi, %rdx), %rdi + + leaq -8(%rsi, %rdx), %rsi + + vmovq (%rdi), %XMM1 + + vmovq (%rsi), %XMM2 + + VPCMPEQ %XMM1, %XMM2, %k2 + + kmovw %k2, %eax + + subl $XMM_MASK, %eax + + jnz L(first_vec) + + ret + + + + .p2align 4 + +L(between_16_31): + + /* From 16 to 31 bytes. No branch when size == 16. */ + + VMOVU (%rsi), %XMM2 + + VPCMPEQ (%rdi), %XMM2, %k2 + + kmovw %k2, %eax + + subl $XMM_MASK, %eax + + jnz L(first_vec) + + + + /* Use overlapping loads to avoid branches. */ + + leaq -16(%rdi, %rdx), %rdi + + leaq -16(%rsi, %rdx), %rsi + + VMOVU (%rsi), %XMM2 + + VPCMPEQ (%rdi), %XMM2, %k2 + + kmovw %k2, %eax + + subl $XMM_MASK, %eax + + jnz L(first_vec) + + ret + + + + .p2align 4 + +L(more_8x_vec): + + /* More than 8 * VEC. Check the first VEC. */ + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + + /* Align the first memory area for aligned loads in the loop. + + Compute how much the first memory area is misaligned. */ + + movq %rdi, %rcx + + andl $(VEC_SIZE - 1), %ecx + + /* Get the negative of offset for alignment. */ + + subq $VEC_SIZE, %rcx + + /* Adjust the second memory area. */ + + subq %rcx, %rsi + + /* Adjust the first memory area which should be aligned now. */ + + subq %rcx, %rdi + + /* Adjust length. */ + + addq %rcx, %rdx + + + +L(loop_4x_vec): + + /* Compare 4 * VEC at a time forward. */ + + VMOVU (%rsi), %YMM1 + + VPCMPEQ (%rdi), %YMM1, %k1 + + + + VMOVU VEC_SIZE(%rsi), %YMM2 + + VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2 + + kandd %k2, %k1, %k5 + + + + VMOVU (VEC_SIZE * 2)(%rsi), %YMM3 + + VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3 + + kandd %k3, %k5, %k5 + + + + VMOVU (VEC_SIZE * 3)(%rsi), %YMM4 + + VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4 + + kandd %k4, %k5, %k5 + + + + kmovd %k5, %eax + + cmpl $VEC_MASK, %eax + + jne L(4x_vec_end) + + + + addq $(VEC_SIZE * 4), %rdi + + addq $(VEC_SIZE * 4), %rsi + + + + subq $(VEC_SIZE * 4), %rdx + + cmpq $(VEC_SIZE * 4), %rdx + + jae L(loop_4x_vec) + + + + /* Less than 4 * VEC. */ + + cmpq $VEC_SIZE, %rdx + + jbe L(last_vec) + + cmpq $(VEC_SIZE * 2), %rdx + + jbe L(last_2x_vec) + + + +L(last_4x_vec): + + /* From 2 * VEC to 4 * VEC. */ + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + + addq $VEC_SIZE, %rdi + + addq $VEC_SIZE, %rsi + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + + /* Use overlapping loads to avoid branches. */ + + leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi + + leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + + + addq $VEC_SIZE, %rdi + + addq $VEC_SIZE, %rsi + + VMOVU (%rsi), %YMM2 + + VPCMPEQ (%rdi), %YMM2, %k2 + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + ret + + + + .p2align 4 + +L(4x_vec_end): + + kmovd %k1, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec) + + kmovd %k2, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec_x1) + + kmovd %k3, %eax + + subl $VEC_MASK, %eax + + jnz L(first_vec_x2) + + kmovd %k4, %eax + + subl $VEC_MASK, %eax + + tzcntl %eax, %ecx + +# ifdef USE_AS_WMEMCMP + + xorl %eax, %eax + + movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx + + cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx + + jmp L(wmemcmp_return) + +# else + + movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax + + movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx + + sub %edx, %eax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x1): + + tzcntl %eax, %ecx + +# ifdef USE_AS_WMEMCMP + + xorl %eax, %eax + + movl VEC_SIZE(%rdi, %rcx, 4), %edx + + cmpl VEC_SIZE(%rsi, %rcx, 4), %edx + + jmp L(wmemcmp_return) + +# else + + movzbl VEC_SIZE(%rdi, %rcx), %eax + + movzbl VEC_SIZE(%rsi, %rcx), %edx + + sub %edx, %eax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x2): + + tzcntl %eax, %ecx + +# ifdef USE_AS_WMEMCMP + + xorl %eax, %eax + + movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx + + cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx + + jmp L(wmemcmp_return) + +# else + + movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax + + movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx + + sub %edx, %eax + +# endif + + ret + +END (MEMCMP) + +#endif + diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S + new file mode 100644 + index 0000000000..1ec1962e86 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S + @@ -0,0 +1,17 @@ + +#if IS_IN (libc) + +# define VEC_SIZE 32 + +# define VEC(i) ymm##i + +# define VMOVNT vmovntdq + +# define VMOVU vmovdqu + +# define VMOVA vmovdqa + + + +# define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +# define VZEROUPPER_RETURN jmp L(return) + + + +# define SECTION(p) p##.avx.rtm + +# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm + + + +# include "memmove-vec-unaligned-erms.S" + +#endif + diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S + index aac1515cf6..7dad1ad74c 100644 + --- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S + +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S + @@ -1,11 +1,25 @@ + #if IS_IN (libc) + # define VEC_SIZE 64 + -# define VEC(i) zmm##i + +# define XMM0 xmm16 + +# define XMM1 xmm17 + +# define YMM0 ymm16 + +# define YMM1 ymm17 + +# define VEC0 zmm16 + +# define VEC1 zmm17 + +# define VEC2 zmm18 + +# define VEC3 zmm19 + +# define VEC4 zmm20 + +# define VEC5 zmm21 + +# define VEC6 zmm22 + +# define VEC7 zmm23 + +# define VEC8 zmm24 + +# define VEC(i) VEC##i + # define VMOVNT vmovntdq + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + +# define VZEROUPPER + + -# define SECTION(p) p##.avx512 + +# define SECTION(p) p##.evex512 + # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s + + # include "memmove-vec-unaligned-erms.S" + diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S + new file mode 100644 + index 0000000000..b879007e89 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S + @@ -0,0 +1,26 @@ + +#if IS_IN (libc) + +# define VEC_SIZE 32 + +# define XMM0 xmm16 + +# define XMM1 xmm17 + +# define YMM0 ymm16 + +# define YMM1 ymm17 + +# define VEC0 ymm16 + +# define VEC1 ymm17 + +# define VEC2 ymm18 + +# define VEC3 ymm19 + +# define VEC4 ymm20 + +# define VEC5 ymm21 + +# define VEC6 ymm22 + +# define VEC7 ymm23 + +# define VEC8 ymm24 + +# define VEC(i) VEC##i + +# define VMOVNT vmovntdq + +# define VMOVU vmovdqu64 + +# define VMOVA vmovdqa64 + +# define VZEROUPPER + + + +# define SECTION(p) p##.evex + +# define MEMMOVE_SYMBOL(p,s) p##_evex_##s + + + +# include "memmove-vec-unaligned-erms.S" + +#endif + diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S + index c763b7d871..d13d23d6ce 100644 + --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S + +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S + @@ -48,6 +48,14 @@ + # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s) + #endif + + +#ifndef XMM0 + +# define XMM0 xmm0 + +#endif + + + +#ifndef YMM0 + +# define YMM0 ymm0 + +#endif + + + #ifndef VZEROUPPER + # if VEC_SIZE > 16 + # define VZEROUPPER vzeroupper + @@ -67,6 +75,13 @@ + # define REP_MOVSB_THRESHOLD (2048 * (VEC_SIZE / 16)) + #endif + + +/* Avoid short distance rep movsb only with non-SSE vector. */ + +#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB + +# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16) + +#else + +# define AVOID_SHORT_DISTANCE_REP_MOVSB 0 + +#endif + + + #ifndef PREFETCH + # define PREFETCH(addr) prefetcht0 addr + #endif + @@ -143,11 +158,12 @@ L(last_2x_vec): + VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1) + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) + - VZEROUPPER + #if !defined USE_MULTIARCH || !IS_IN (libc) + L(nop): + -#endif + ret + +#else + + VZEROUPPER_RETURN + +#endif + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMMOVE_SYMBOL (__memmove, unaligned)) + + @@ -240,11 +256,14 @@ L(last_2x_vec): + VMOVU %VEC(0), (%rdi) + VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) + L(return): + - VZEROUPPER + +#if VEC_SIZE > 16 + + ZERO_UPPER_VEC_REGISTERS_RETURN + +#else + ret + +#endif + + L(movsb): + - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + jae L(more_8x_vec) + cmpq %rsi, %rdi + jb 1f + @@ -257,7 +276,21 @@ L(movsb): + # error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE! + # endif + jb L(more_8x_vec_backward) + +# if AVOID_SHORT_DISTANCE_REP_MOVSB + + movq %rdi, %rcx + + subq %rsi, %rcx + + jmp 2f + +# endif + 1: + +# if AVOID_SHORT_DISTANCE_REP_MOVSB + + movq %rsi, %rcx + + subq %rdi, %rcx + +2: + +/* Avoid "rep movsb" if RCX, the distance between source and destination, + + is N*4GB + [1..63] with N >= 0. */ + + cmpl $63, %ecx + + jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */ + +# endif + mov %RDX_LP, %RCX_LP + rep movsb + L(nop): + @@ -291,21 +324,20 @@ L(less_vec): + #if VEC_SIZE > 32 + L(between_32_63): + /* From 32 to 63. No branch when size == 32. */ + - vmovdqu (%rsi), %ymm0 + - vmovdqu -32(%rsi,%rdx), %ymm1 + - vmovdqu %ymm0, (%rdi) + - vmovdqu %ymm1, -32(%rdi,%rdx) + - VZEROUPPER + - ret + + VMOVU (%rsi), %YMM0 + + VMOVU -32(%rsi,%rdx), %YMM1 + + VMOVU %YMM0, (%rdi) + + VMOVU %YMM1, -32(%rdi,%rdx) + + VZEROUPPER_RETURN + #endif + #if VEC_SIZE > 16 + /* From 16 to 31. No branch when size == 16. */ + L(between_16_31): + - vmovdqu (%rsi), %xmm0 + - vmovdqu -16(%rsi,%rdx), %xmm1 + - vmovdqu %xmm0, (%rdi) + - vmovdqu %xmm1, -16(%rdi,%rdx) + - ret + + VMOVU (%rsi), %XMM0 + + VMOVU -16(%rsi,%rdx), %XMM1 + + VMOVU %XMM0, (%rdi) + + VMOVU %XMM1, -16(%rdi,%rdx) + + VZEROUPPER_RETURN + #endif + L(between_8_15): + /* From 8 to 15. No branch when size == 8. */ + @@ -358,8 +390,7 @@ L(more_2x_vec): + VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx) + VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx) + VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + L(last_4x_vec): + /* Copy from 2 * VEC to 4 * VEC. */ + VMOVU (%rsi), %VEC(0) + @@ -370,8 +401,7 @@ L(last_4x_vec): + VMOVU %VEC(1), VEC_SIZE(%rdi) + VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + L(more_8x_vec): + cmpq %rsi, %rdi + @@ -402,7 +432,7 @@ L(more_8x_vec): + addq %r8, %rdx + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ + - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_forward) + #endif + L(loop_4x_vec_forward): + @@ -427,8 +457,7 @@ L(loop_4x_vec_forward): + VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) + /* Store the first VEC. */ + VMOVU %VEC(4), (%r11) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + L(more_8x_vec_backward): + /* Load the first 4 * VEC and last VEC to support overlapping + @@ -454,7 +483,7 @@ L(more_8x_vec_backward): + subq %r8, %rdx + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ + - cmpq __x86_shared_non_temporal_threshold(%rip), %rdx + + cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_backward) + #endif + L(loop_4x_vec_backward): + @@ -479,8 +508,7 @@ L(loop_4x_vec_backward): + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ + VMOVU %VEC(8), (%r11) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + L(large_forward): + @@ -515,8 +543,7 @@ L(loop_large_forward): + VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx) + /* Store the first VEC. */ + VMOVU %VEC(4), (%r11) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + L(large_backward): + /* Don't use non-temporal store if there is overlap between + @@ -550,8 +577,7 @@ L(loop_large_backward): + VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi) + /* Store the last VEC. */ + VMOVU %VEC(8), (%r11) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + #endif + END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) + + diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S + new file mode 100644 + index 0000000000..cea2d2a72d + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef MEMRCHR + +# define MEMRCHR __memrchr_avx2_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "memrchr-avx2.S" + diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S + index f5437b54de..c8d54c08d6 100644 + --- a/sysdeps/x86_64/multiarch/memrchr-avx2.S + +++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S + @@ -20,14 +20,22 @@ + + # include + + +# ifndef MEMRCHR + +# define MEMRCHR __memrchr_avx2 + +# endif + + + # ifndef VZEROUPPER + # define VZEROUPPER vzeroupper + # endif + + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + # define VEC_SIZE 32 + + - .section .text.avx,"ax",@progbits + -ENTRY (__memrchr_avx2) + + .section SECTION(.text),"ax",@progbits + +ENTRY (MEMRCHR) + /* Broadcast CHAR to YMM0. */ + vmovd %esi, %xmm0 + vpbroadcastb %xmm0, %ymm0 + @@ -134,8 +142,8 @@ L(loop_4x_vec): + vpmovmskb %ymm1, %eax + bsrl %eax, %eax + addq %rdi, %rax + - VZEROUPPER + - ret + +L(return_vzeroupper): + + ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(last_4x_vec_or_less): + @@ -169,8 +177,7 @@ L(last_4x_vec_or_less): + addq %rax, %rdx + jl L(zero) + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_2x_vec): + @@ -191,31 +198,27 @@ L(last_2x_vec): + jl L(zero) + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x0): + bsrl %eax, %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x1): + bsrl %eax, %eax + addl $VEC_SIZE, %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x2): + bsrl %eax, %eax + addl $(VEC_SIZE * 2), %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x3): + @@ -232,8 +235,7 @@ L(last_vec_x1_check): + jl L(zero) + addl $VEC_SIZE, %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_x3_check): + @@ -243,12 +245,14 @@ L(last_vec_x3_check): + jl L(zero) + addl $(VEC_SIZE * 3), %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(zero): + - VZEROUPPER + + xorl %eax, %eax + + VZEROUPPER_RETURN + + + + .p2align 4 + L(null): + xorl %eax, %eax + ret + @@ -273,8 +277,7 @@ L(last_vec_or_less_aligned): + + bsrl %eax, %eax + addq %rdi, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_or_less): + @@ -315,8 +318,7 @@ L(last_vec_or_less): + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(last_vec_2x_aligned): + @@ -353,7 +355,6 @@ L(last_vec_2x_aligned): + bsrl %eax, %eax + addq %rdi, %rax + addq %r8, %rax + - VZEROUPPER + - ret + -END (__memrchr_avx2) + + VZEROUPPER_RETURN + +END (MEMRCHR) + #endif + diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S + new file mode 100644 + index 0000000000..16bf8e02b1 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memrchr-evex.S + @@ -0,0 +1,337 @@ + +/* memrchr optimized with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#if IS_IN (libc) + + + +# include + + + +# define VMOVA vmovdqa64 + + + +# define YMMMATCH ymm16 + + + +# define VEC_SIZE 32 + + + + .section .text.evex,"ax",@progbits + +ENTRY (__memrchr_evex) + + /* Broadcast CHAR to YMMMATCH. */ + + vpbroadcastb %esi, %YMMMATCH + + + + sub $VEC_SIZE, %RDX_LP + + jbe L(last_vec_or_less) + + + + add %RDX_LP, %RDI_LP + + + + /* Check the last VEC_SIZE bytes. */ + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jnz L(last_vec_x0) + + + + subq $(VEC_SIZE * 4), %rdi + + movl %edi, %ecx + + andl $(VEC_SIZE - 1), %ecx + + jz L(aligned_more) + + + + /* Align data for aligned loads in the loop. */ + + addq $VEC_SIZE, %rdi + + addq $VEC_SIZE, %rdx + + andq $-VEC_SIZE, %rdi + + subq %rcx, %rdx + + + + .p2align 4 + +L(aligned_more): + + subq $(VEC_SIZE * 4), %rdx + + jbe L(last_4x_vec_or_less) + + + + /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time + + since data is only aligned to VEC_SIZE. */ + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jnz L(last_vec_x3) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + + kmovd %k2, %eax + + testl %eax, %eax + + jnz L(last_vec_x2) + + + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + + kmovd %k3, %eax + + testl %eax, %eax + + jnz L(last_vec_x1) + + + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + + kmovd %k4, %eax + + testl %eax, %eax + + jnz L(last_vec_x0) + + + + /* Align data to 4 * VEC_SIZE for loop with fewer branches. + + There are some overlaps with above if data isn't aligned + + to 4 * VEC_SIZE. */ + + movl %edi, %ecx + + andl $(VEC_SIZE * 4 - 1), %ecx + + jz L(loop_4x_vec) + + + + addq $(VEC_SIZE * 4), %rdi + + addq $(VEC_SIZE * 4), %rdx + + andq $-(VEC_SIZE * 4), %rdi + + subq %rcx, %rdx + + + + .p2align 4 + +L(loop_4x_vec): + + /* Compare 4 * VEC at a time forward. */ + + subq $(VEC_SIZE * 4), %rdi + + subq $(VEC_SIZE * 4), %rdx + + jbe L(last_4x_vec_or_less) + + + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2 + + kord %k1, %k2, %k5 + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3 + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4 + + + + kord %k3, %k4, %k6 + + kortestd %k5, %k6 + + jz L(loop_4x_vec) + + + + /* There is a match. */ + + kmovd %k4, %eax + + testl %eax, %eax + + jnz L(last_vec_x3) + + + + kmovd %k3, %eax + + testl %eax, %eax + + jnz L(last_vec_x2) + + + + kmovd %k2, %eax + + testl %eax, %eax + + jnz L(last_vec_x1) + + + + kmovd %k1, %eax + + bsrl %eax, %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_4x_vec_or_less): + + addl $(VEC_SIZE * 4), %edx + + cmpl $(VEC_SIZE * 2), %edx + + jbe L(last_2x_vec) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jnz L(last_vec_x3) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2 + + kmovd %k2, %eax + + testl %eax, %eax + + jnz L(last_vec_x2) + + + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3 + + kmovd %k3, %eax + + testl %eax, %eax + + jnz L(last_vec_x1_check) + + cmpl $(VEC_SIZE * 3), %edx + + jbe L(zero) + + + + vpcmpb $0, (%rdi), %YMMMATCH, %k4 + + kmovd %k4, %eax + + testl %eax, %eax + + jz L(zero) + + bsrl %eax, %eax + + subq $(VEC_SIZE * 4), %rdx + + addq %rax, %rdx + + jl L(zero) + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_2x_vec): + + vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jnz L(last_vec_x3_check) + + cmpl $VEC_SIZE, %edx + + jbe L(zero) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jz L(zero) + + bsrl %eax, %eax + + subq $(VEC_SIZE * 2), %rdx + + addq %rax, %rdx + + jl L(zero) + + addl $(VEC_SIZE * 2), %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x0): + + bsrl %eax, %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x1): + + bsrl %eax, %eax + + addl $VEC_SIZE, %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x2): + + bsrl %eax, %eax + + addl $(VEC_SIZE * 2), %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x3): + + bsrl %eax, %eax + + addl $(VEC_SIZE * 3), %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x1_check): + + bsrl %eax, %eax + + subq $(VEC_SIZE * 3), %rdx + + addq %rax, %rdx + + jl L(zero) + + addl $VEC_SIZE, %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_x3_check): + + bsrl %eax, %eax + + subq $VEC_SIZE, %rdx + + addq %rax, %rdx + + jl L(zero) + + addl $(VEC_SIZE * 3), %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(zero): + + xorl %eax, %eax + + ret + + + + .p2align 4 + +L(last_vec_or_less_aligned): + + movl %edx, %ecx + + + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + + + movl $1, %edx + + /* Support rdx << 32. */ + + salq %cl, %rdx + + subq $1, %rdx + + + + kmovd %k1, %eax + + + + /* Remove the trailing bytes. */ + + andl %edx, %eax + + testl %eax, %eax + + jz L(zero) + + + + bsrl %eax, %eax + + addq %rdi, %rax + + ret + + + + .p2align 4 + +L(last_vec_or_less): + + addl $VEC_SIZE, %edx + + + + /* Check for zero length. */ + + testl %edx, %edx + + jz L(zero) + + + + movl %edi, %ecx + + andl $(VEC_SIZE - 1), %ecx + + jz L(last_vec_or_less_aligned) + + + + movl %ecx, %esi + + movl %ecx, %r8d + + addl %edx, %esi + + andq $-VEC_SIZE, %rdi + + + + subl $VEC_SIZE, %esi + + ja L(last_vec_2x_aligned) + + + + /* Check the last VEC. */ + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + kmovd %k1, %eax + + + + /* Remove the leading and trailing bytes. */ + + sarl %cl, %eax + + movl %edx, %ecx + + + + movl $1, %edx + + sall %cl, %edx + + subl $1, %edx + + + + andl %edx, %eax + + testl %eax, %eax + + jz L(zero) + + + + bsrl %eax, %eax + + addq %rdi, %rax + + addq %r8, %rax + + ret + + + + .p2align 4 + +L(last_vec_2x_aligned): + + movl %esi, %ecx + + + + /* Check the last VEC. */ + + vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1 + + + + movl $1, %edx + + sall %cl, %edx + + subl $1, %edx + + + + kmovd %k1, %eax + + + + /* Remove the trailing bytes. */ + + andl %edx, %eax + + + + testl %eax, %eax + + jnz L(last_vec_x1) + + + + /* Check the second last VEC. */ + + vpcmpb $0, (%rdi), %YMMMATCH, %k1 + + + + movl %r8d, %ecx + + + + kmovd %k1, %eax + + + + /* Remove the leading bytes. Must use unsigned right shift for + + bsrl below. */ + + shrl %cl, %eax + + testl %eax, %eax + + jz L(zero) + + + + bsrl %eax, %eax + + addq %rdi, %rax + + addq %r8, %rax + + ret + +END (__memrchr_evex) + +#endif + diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S + new file mode 100644 + index 0000000000..8ac3e479bb + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S + @@ -0,0 +1,10 @@ + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return) + + + +#define SECTION(p) p##.avx.rtm + +#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + +#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm + + + +#include "memset-avx2-unaligned-erms.S" + diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S + index 7ab3d89849..ae0860f36a 100644 + --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S + +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S + @@ -14,9 +14,15 @@ + movq r, %rax; \ + vpbroadcastd %xmm0, %ymm0 + + -# define SECTION(p) p##.avx + -# define MEMSET_SYMBOL(p,s) p##_avx2_##s + -# define WMEMSET_SYMBOL(p,s) p##_avx2_##s + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + +# ifndef MEMSET_SYMBOL + +# define MEMSET_SYMBOL(p,s) p##_avx2_##s + +# endif + +# ifndef WMEMSET_SYMBOL + +# define WMEMSET_SYMBOL(p,s) p##_avx2_##s + +# endif + + # include "memset-vec-unaligned-erms.S" + #endif + diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S + index 0783979ca5..22e7b187c8 100644 + --- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S + +++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S + @@ -1,22 +1,22 @@ + #if IS_IN (libc) + # define VEC_SIZE 64 + -# define VEC(i) zmm##i + +# define XMM0 xmm16 + +# define YMM0 ymm16 + +# define VEC0 zmm16 + +# define VEC(i) VEC##i + # define VMOVU vmovdqu64 + # define VMOVA vmovdqa64 + +# define VZEROUPPER + + # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + - vmovd d, %xmm0; \ + movq r, %rax; \ + - vpbroadcastb %xmm0, %xmm0; \ + - vpbroadcastq %xmm0, %zmm0 + + vpbroadcastb d, %VEC0 + + # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + - vmovd d, %xmm0; \ + movq r, %rax; \ + - vpbroadcastd %xmm0, %xmm0; \ + - vpbroadcastq %xmm0, %zmm0 + + vpbroadcastd d, %VEC0 + + -# define SECTION(p) p##.avx512 + +# define SECTION(p) p##.evex512 + # define MEMSET_SYMBOL(p,s) p##_avx512_##s + # define WMEMSET_SYMBOL(p,s) p##_avx512_##s + + diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S + new file mode 100644 + index 0000000000..ae0a4d6e46 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S + @@ -0,0 +1,24 @@ + +#if IS_IN (libc) + +# define VEC_SIZE 32 + +# define XMM0 xmm16 + +# define YMM0 ymm16 + +# define VEC0 ymm16 + +# define VEC(i) VEC##i + +# define VMOVU vmovdqu64 + +# define VMOVA vmovdqa64 + +# define VZEROUPPER + + + +# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + + movq r, %rax; \ + + vpbroadcastb d, %VEC0 + + + +# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \ + + movq r, %rax; \ + + vpbroadcastd d, %VEC0 + + + +# define SECTION(p) p##.evex + +# define MEMSET_SYMBOL(p,s) p##_evex_##s + +# define WMEMSET_SYMBOL(p,s) p##_evex_##s + + + +# include "memset-vec-unaligned-erms.S" + +#endif + diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S + index af2299709c..16bed6ec11 100644 + --- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S + +++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S + @@ -34,20 +34,25 @@ + # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s) + #endif + + +#ifndef XMM0 + +# define XMM0 xmm0 + +#endif + + + +#ifndef YMM0 + +# define YMM0 ymm0 + +#endif + + + #ifndef VZEROUPPER + # if VEC_SIZE > 16 + # define VZEROUPPER vzeroupper + +# define VZEROUPPER_SHORT_RETURN vzeroupper; ret + # else + # define VZEROUPPER + # endif + #endif + + #ifndef VZEROUPPER_SHORT_RETURN + -# if VEC_SIZE > 16 + -# define VZEROUPPER_SHORT_RETURN vzeroupper + -# else + -# define VZEROUPPER_SHORT_RETURN rep + -# endif + +# define VZEROUPPER_SHORT_RETURN rep; ret + #endif + + #ifndef MOVQ + @@ -77,7 +82,7 @@ + ENTRY (__bzero) + mov %RDI_LP, %RAX_LP /* Set return value. */ + mov %RSI_LP, %RDX_LP /* Set n. */ + - pxor %xmm0, %xmm0 + + pxor %XMM0, %XMM0 + jmp L(entry_from_bzero) + END (__bzero) + weak_alias (__bzero, bzero) + @@ -119,8 +124,7 @@ L(entry_from_bzero): + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), (%rdi) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + #if defined USE_MULTIARCH && IS_IN (libc) + END (MEMSET_SYMBOL (__memset, unaligned)) + + @@ -143,14 +147,12 @@ ENTRY (__memset_erms) + ENTRY (MEMSET_SYMBOL (__memset, erms)) + # endif + L(stosb): + - /* Issue vzeroupper before rep stosb. */ + - VZEROUPPER + mov %RDX_LP, %RCX_LP + movzbl %sil, %eax + mov %RDI_LP, %RDX_LP + rep stosb + mov %RDX_LP, %RAX_LP + - ret + + VZEROUPPER_RETURN + # if VEC_SIZE == 16 + END (__memset_erms) + # else + @@ -177,8 +179,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms)) + /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), (%rdi) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + L(stosb_more_2x_vec): + cmpq $REP_STOSB_THRESHOLD, %rdx + @@ -192,8 +193,11 @@ L(more_2x_vec): + VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx) + VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx) + L(return): + - VZEROUPPER + +#if VEC_SIZE > 16 + + ZERO_UPPER_VEC_REGISTERS_RETURN + +#else + ret + +#endif + + L(loop_start): + leaq (VEC_SIZE * 4)(%rdi), %rcx + @@ -219,7 +223,6 @@ L(loop): + cmpq %rcx, %rdx + jne L(loop) + VZEROUPPER_SHORT_RETURN + - ret + L(less_vec): + /* Less than 1 VEC. */ + # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64 + @@ -233,7 +236,7 @@ L(less_vec): + cmpb $16, %dl + jae L(between_16_31) + # endif + - MOVQ %xmm0, %rcx + + MOVQ %XMM0, %rcx + cmpb $8, %dl + jae L(between_8_15) + cmpb $4, %dl + @@ -243,40 +246,34 @@ L(less_vec): + jb 1f + movb %cl, (%rdi) + 1: + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + # if VEC_SIZE > 32 + /* From 32 to 63. No branch when size == 32. */ + L(between_32_63): + - vmovdqu %ymm0, -32(%rdi,%rdx) + - vmovdqu %ymm0, (%rdi) + - VZEROUPPER + - ret + + VMOVU %YMM0, -32(%rdi,%rdx) + + VMOVU %YMM0, (%rdi) + + VZEROUPPER_RETURN + # endif + # if VEC_SIZE > 16 + /* From 16 to 31. No branch when size == 16. */ + L(between_16_31): + - vmovdqu %xmm0, -16(%rdi,%rdx) + - vmovdqu %xmm0, (%rdi) + - VZEROUPPER + - ret + + VMOVU %XMM0, -16(%rdi,%rdx) + + VMOVU %XMM0, (%rdi) + + VZEROUPPER_RETURN + # endif + /* From 8 to 15. No branch when size == 8. */ + L(between_8_15): + movq %rcx, -8(%rdi,%rdx) + movq %rcx, (%rdi) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + L(between_4_7): + /* From 4 to 7. No branch when size == 4. */ + movl %ecx, -4(%rdi,%rdx) + movl %ecx, (%rdi) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + L(between_2_3): + /* From 2 to 3. No branch when size == 2. */ + movw %cx, -2(%rdi,%rdx) + movw %cx, (%rdi) + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + END (MEMSET_SYMBOL (__memset, unaligned_erms)) + diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S + new file mode 100644 + index 0000000000..acc5f6e2fb + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S + @@ -0,0 +1,4 @@ + +#define MEMCHR __rawmemchr_avx2_rtm + +#define USE_AS_RAWMEMCHR 1 + + + +#include "memchr-avx2-rtm.S" + diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S + new file mode 100644 + index 0000000000..ec942b77ba + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S + @@ -0,0 +1,4 @@ + +#define MEMCHR __rawmemchr_evex + +#define USE_AS_RAWMEMCHR 1 + + + +#include "memchr-evex.S" + diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S + new file mode 100644 + index 0000000000..2b9c07a59f + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S + @@ -0,0 +1,3 @@ + +#define USE_AS_STPCPY + +#define STRCPY __stpcpy_avx2_rtm + +#include "strcpy-avx2-rtm.S" + diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S + new file mode 100644 + index 0000000000..7c6f26cd98 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S + @@ -0,0 +1,3 @@ + +#define USE_AS_STPCPY + +#define STRCPY __stpcpy_evex + +#include "strcpy-evex.S" + diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S + new file mode 100644 + index 0000000000..60a2ccfe53 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S + @@ -0,0 +1,4 @@ + +#define USE_AS_STPCPY + +#define USE_AS_STRNCPY + +#define STRCPY __stpncpy_avx2_rtm + +#include "strcpy-avx2-rtm.S" + diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S + new file mode 100644 + index 0000000000..1570014d1c + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S + @@ -0,0 +1,4 @@ + +#define USE_AS_STPCPY + +#define USE_AS_STRNCPY + +#define STRCPY __stpncpy_evex + +#include "strcpy-evex.S" + diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S + new file mode 100644 + index 0000000000..637fb557c4 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef STRCAT + +# define STRCAT __strcat_avx2_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "strcat-avx2.S" + diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S + index a4143bf8f5..1e6d4827ee 100644 + --- a/sysdeps/x86_64/multiarch/strcat-avx2.S + +++ b/sysdeps/x86_64/multiarch/strcat-avx2.S + @@ -30,7 +30,11 @@ + /* Number of bytes in a vector register */ + # define VEC_SIZE 32 + + - .section .text.avx,"ax",@progbits + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + + .section SECTION(.text),"ax",@progbits + ENTRY (STRCAT) + mov %rdi, %r9 + # ifdef USE_AS_STRNCAT + diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S + new file mode 100644 + index 0000000000..97c3d85b6d + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strcat-evex.S + @@ -0,0 +1,283 @@ + +/* strcat with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#if IS_IN (libc) + + + +# include + + + +# ifndef STRCAT + +# define STRCAT __strcat_evex + +# endif + + + +# define VMOVU vmovdqu64 + +# define VMOVA vmovdqa64 + + + +/* zero register */ + +# define XMMZERO xmm16 + +# define YMMZERO ymm16 + +# define YMM0 ymm17 + +# define YMM1 ymm18 + + + +# define USE_AS_STRCAT + + + +/* Number of bytes in a vector register */ + +# define VEC_SIZE 32 + + + + .section .text.evex,"ax",@progbits + +ENTRY (STRCAT) + + mov %rdi, %r9 + +# ifdef USE_AS_STRNCAT + + mov %rdx, %r8 + +# endif + + + + xor %eax, %eax + + mov %edi, %ecx + + and $((VEC_SIZE * 4) - 1), %ecx + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + cmp $(VEC_SIZE * 3), %ecx + + ja L(fourth_vector_boundary) + + vpcmpb $0, (%rdi), %YMMZERO, %k0 + + kmovd %k0, %edx + + test %edx, %edx + + jnz L(exit_null_on_first_vector) + + mov %rdi, %rax + + and $-VEC_SIZE, %rax + + jmp L(align_vec_size_start) + +L(fourth_vector_boundary): + + mov %rdi, %rax + + and $-VEC_SIZE, %rax + + vpcmpb $0, (%rax), %YMMZERO, %k0 + + mov $-1, %r10d + + sub %rax, %rcx + + shl %cl, %r10d + + kmovd %k0, %edx + + and %r10d, %edx + + jnz L(exit) + + + +L(align_vec_size_start): + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + + kmovd %k0, %edx + + test %edx, %edx + + jnz L(exit_null_on_second_vector) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit_null_on_third_vector) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + + kmovd %k2, %edx + + test %edx, %edx + + jnz L(exit_null_on_fourth_vector) + + + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + + kmovd %k3, %edx + + test %edx, %edx + + jnz L(exit_null_on_fifth_vector) + + + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + + add $(VEC_SIZE * 4), %rax + + kmovd %k4, %edx + + test %edx, %edx + + jnz L(exit_null_on_second_vector) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit_null_on_third_vector) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + + kmovd %k2, %edx + + test %edx, %edx + + jnz L(exit_null_on_fourth_vector) + + + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + + kmovd %k3, %edx + + test %edx, %edx + + jnz L(exit_null_on_fifth_vector) + + + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + + kmovd %k4, %edx + + add $(VEC_SIZE * 4), %rax + + test %edx, %edx + + jnz L(exit_null_on_second_vector) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit_null_on_third_vector) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + + kmovd %k2, %edx + + test %edx, %edx + + jnz L(exit_null_on_fourth_vector) + + + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + + kmovd %k3, %edx + + test %edx, %edx + + jnz L(exit_null_on_fifth_vector) + + + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + + add $(VEC_SIZE * 4), %rax + + kmovd %k4, %edx + + test %edx, %edx + + jnz L(exit_null_on_second_vector) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit_null_on_third_vector) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + + kmovd %k2, %edx + + test %edx, %edx + + jnz L(exit_null_on_fourth_vector) + + + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + + kmovd %k3, %edx + + test %edx, %edx + + jnz L(exit_null_on_fifth_vector) + + + + test $((VEC_SIZE * 4) - 1), %rax + + jz L(align_four_vec_loop) + + + + vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4 + + add $(VEC_SIZE * 5), %rax + + kmovd %k4, %edx + + test %edx, %edx + + jnz L(exit) + + + + test $((VEC_SIZE * 4) - 1), %rax + + jz L(align_four_vec_loop) + + + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + + add $VEC_SIZE, %rax + + kmovd %k0, %edx + + test %edx, %edx + + jnz L(exit) + + + + test $((VEC_SIZE * 4) - 1), %rax + + jz L(align_four_vec_loop) + + + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0 + + add $VEC_SIZE, %rax + + kmovd %k0, %edx + + test %edx, %edx + + jnz L(exit) + + + + test $((VEC_SIZE * 4) - 1), %rax + + jz L(align_four_vec_loop) + + + + vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1 + + add $VEC_SIZE, %rax + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit) + + + + add $VEC_SIZE, %rax + + + + .p2align 4 + +L(align_four_vec_loop): + + VMOVA (%rax), %YMM0 + + VMOVA (VEC_SIZE * 2)(%rax), %YMM1 + + vpminub VEC_SIZE(%rax), %YMM0, %YMM0 + + vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1 + + vpminub %YMM0, %YMM1, %YMM0 + + /* If K0 != 0, there is a null byte. */ + + vpcmpb $0, %YMM0, %YMMZERO, %k0 + + add $(VEC_SIZE * 4), %rax + + ktestd %k0, %k0 + + jz L(align_four_vec_loop) + + + + vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0 + + sub $(VEC_SIZE * 5), %rax + + kmovd %k0, %edx + + test %edx, %edx + + jnz L(exit_null_on_second_vector) + + + + vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1 + + kmovd %k1, %edx + + test %edx, %edx + + jnz L(exit_null_on_third_vector) + + + + vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2 + + kmovd %k2, %edx + + test %edx, %edx + + jnz L(exit_null_on_fourth_vector) + + + + vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3 + + kmovd %k3, %edx + + sub %rdi, %rax + + bsf %rdx, %rdx + + add %rdx, %rax + + add $(VEC_SIZE * 4), %rax + + jmp L(StartStrcpyPart) + + + + .p2align 4 + +L(exit): + + sub %rdi, %rax + +L(exit_null_on_first_vector): + + bsf %rdx, %rdx + + add %rdx, %rax + + jmp L(StartStrcpyPart) + + + + .p2align 4 + +L(exit_null_on_second_vector): + + sub %rdi, %rax + + bsf %rdx, %rdx + + add %rdx, %rax + + add $VEC_SIZE, %rax + + jmp L(StartStrcpyPart) + + + + .p2align 4 + +L(exit_null_on_third_vector): + + sub %rdi, %rax + + bsf %rdx, %rdx + + add %rdx, %rax + + add $(VEC_SIZE * 2), %rax + + jmp L(StartStrcpyPart) + + + + .p2align 4 + +L(exit_null_on_fourth_vector): + + sub %rdi, %rax + + bsf %rdx, %rdx + + add %rdx, %rax + + add $(VEC_SIZE * 3), %rax + + jmp L(StartStrcpyPart) + + + + .p2align 4 + +L(exit_null_on_fifth_vector): + + sub %rdi, %rax + + bsf %rdx, %rdx + + add %rdx, %rax + + add $(VEC_SIZE * 4), %rax + + + + .p2align 4 + +L(StartStrcpyPart): + + lea (%r9, %rax), %rdi + + mov %rsi, %rcx + + mov %r9, %rax /* save result */ + + + +# ifdef USE_AS_STRNCAT + + test %r8, %r8 + + jz L(ExitZero) + +# define USE_AS_STRNCPY + +# endif + + + +# include "strcpy-evex.S" + +#endif + diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S + new file mode 100644 + index 0000000000..81f20d1d8e + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef STRCHR + +# define STRCHR __strchr_avx2_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "strchr-avx2.S" + diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S + index 39fc69da7b..0a5217514a 100644 + --- a/sysdeps/x86_64/multiarch/strchr-avx2.S + +++ b/sysdeps/x86_64/multiarch/strchr-avx2.S + @@ -38,9 +38,13 @@ + # define VZEROUPPER vzeroupper + # endif + + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + # define VEC_SIZE 32 + + - .section .text.avx,"ax",@progbits + + .section SECTION(.text),"ax",@progbits + ENTRY (STRCHR) + movl %edi, %ecx + /* Broadcast CHAR to YMM0. */ + @@ -93,8 +97,8 @@ L(cros_page_boundary): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif + - VZEROUPPER + - ret + +L(return_vzeroupper): + + ZERO_UPPER_VEC_REGISTERS_RETURN + + .p2align 4 + L(aligned_more): + @@ -190,8 +194,7 @@ L(first_vec_x0): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - #define internal_syscall1(v0_init, input, number, err, arg1) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a3 asm ("$7"); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -159,15 +160,17 @@ + .p2align 4 + L(first_vec_x1): + @@ -205,8 +208,7 @@ L(first_vec_x1): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + long long int _arg2 = ARGIFY (arg2); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - - register long long __a3 asm ("$7"); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a1 asm ("$5") = _arg2; \ - + register long long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -185,16 +188,19 @@ - #define internal_syscall3(v0_init, input, number, err, \ - arg1, arg2, arg3) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + long long int _arg2 = ARGIFY (arg2); \ - + long long int _arg3 = ARGIFY (arg3); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - - register long long __a3 asm ("$7"); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a1 asm ("$5") = _arg2; \ - + register long long int __a2 asm ("$6") = _arg3; \ - + register long long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -212,16 +218,20 @@ - #define internal_syscall4(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + long long int _arg2 = ARGIFY (arg2); \ - + long long int _arg3 = ARGIFY (arg3); \ - + long long int _arg4 = ARGIFY (arg4); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a1 asm ("$5") = _arg2; \ - + register long long int __a2 asm ("$6") = _arg3; \ - + register long long int __a3 asm ("$7") = _arg4; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -239,17 +249,22 @@ - #define internal_syscall5(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + long long int _arg2 = ARGIFY (arg2); \ - + long long int _arg3 = ARGIFY (arg3); \ - + long long int _arg4 = ARGIFY (arg4); \ - + long long int _arg5 = ARGIFY (arg5); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - - register long long __a4 asm ("$8") = ARGIFY (arg5); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a1 asm ("$5") = _arg2; \ - + register long long int __a2 asm ("$6") = _arg3; \ - + register long long int __a3 asm ("$7") = _arg4; \ - + register long long int __a4 asm ("$8") = _arg5; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -267,18 +282,24 @@ - #define internal_syscall6(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5, arg6) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long long __s0 asm ("$16") __attribute__ ((unused)) \ - + long long int _arg1 = ARGIFY (arg1); \ - + long long int _arg2 = ARGIFY (arg2); \ - + long long int _arg3 = ARGIFY (arg3); \ - + long long int _arg4 = ARGIFY (arg4); \ - + long long int _arg5 = ARGIFY (arg5); \ - + long long int _arg6 = ARGIFY (arg6); \ - + register long long int __s0 asm ("$16") __attribute__ ((unused))\ - = (number); \ - - register long long __v0 asm ("$2"); \ - - register long long __a0 asm ("$4") = ARGIFY (arg1); \ - - register long long __a1 asm ("$5") = ARGIFY (arg2); \ - - register long long __a2 asm ("$6") = ARGIFY (arg3); \ - - register long long __a3 asm ("$7") = ARGIFY (arg4); \ - - register long long __a4 asm ("$8") = ARGIFY (arg5); \ - - register long long __a5 asm ("$9") = ARGIFY (arg6); \ - + register long long int __v0 asm ("$2"); \ - + register long long int __a0 asm ("$4") = _arg1; \ - + register long long int __a1 asm ("$5") = _arg2; \ - + register long long int __a2 asm ("$6") = _arg3; \ - + register long long int __a3 asm ("$7") = _arg4; \ - + register long long int __a4 asm ("$8") = _arg5; \ - + register long long int __a5 asm ("$9") = _arg6; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - diff --git a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h - index 9d30291f84..3e1f1cc3c5 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h - +++ b/sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h - @@ -50,7 +50,7 @@ - #undef INLINE_SYSCALL - #define INLINE_SYSCALL(name, nr, args...) \ - ({ INTERNAL_SYSCALL_DECL (_sc_err); \ - - long result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - + long int result_var = INTERNAL_SYSCALL (name, _sc_err, nr, args); \ - if ( INTERNAL_SYSCALL_ERROR_P (result_var, _sc_err) ) \ - { \ - __set_errno (INTERNAL_SYSCALL_ERRNO (result_var, _sc_err)); \ - @@ -59,10 +59,10 @@ - result_var; }) + .p2align 4 + L(first_vec_x2): + @@ -220,8 +222,7 @@ L(first_vec_x2): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(4x_vec_end): + @@ -247,8 +248,7 @@ L(first_vec_x3): + cmp (%rax), %CHAR_REG + cmovne %rdx, %rax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + END (STRCHR) + #endif + diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S + new file mode 100644 + index 0000000000..ddc86a7058 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strchr-evex.S + @@ -0,0 +1,335 @@ + +/* strchr/strchrnul optimized with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + + + The GNU C Library is free software; you can redistribute it and/or + + modify it under the terms of the GNU Lesser General Public + + License as published by the Free Software Foundation; either + + version 2.1 of the License, or (at your option) any later version. + + + + The GNU C Library is distributed in the hope that it will be useful, + + but WITHOUT ANY WARRANTY; without even the implied warranty of + + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + + Lesser General Public License for more details. + + + + You should have received a copy of the GNU Lesser General Public + + License along with the GNU C Library; if not, see + + . */ + + + +#if IS_IN (libc) + + + +# include + + + +# ifndef STRCHR + +# define STRCHR __strchr_evex + +# endif + + + +# define VMOVU vmovdqu64 + +# define VMOVA vmovdqa64 + + + +# ifdef USE_AS_WCSCHR + +# define VPBROADCAST vpbroadcastd + +# define VPCMP vpcmpd + +# define VPMINU vpminud + +# define CHAR_REG esi + +# define SHIFT_REG r8d + +# else + +# define VPBROADCAST vpbroadcastb + +# define VPCMP vpcmpb + +# define VPMINU vpminub + +# define CHAR_REG sil + +# define SHIFT_REG ecx + +# endif + + + +# define XMMZERO xmm16 + + + +# define YMMZERO ymm16 + +# define YMM0 ymm17 + +# define YMM1 ymm18 + +# define YMM2 ymm19 + +# define YMM3 ymm20 + +# define YMM4 ymm21 + +# define YMM5 ymm22 + +# define YMM6 ymm23 + +# define YMM7 ymm24 + +# define YMM8 ymm25 + + + +# define VEC_SIZE 32 + +# define PAGE_SIZE 4096 + + + + .section .text.evex,"ax",@progbits + +ENTRY (STRCHR) + + movl %edi, %ecx + +# ifndef USE_AS_STRCHRNUL + + xorl %edx, %edx + +# endif + + + + /* Broadcast CHAR to YMM0. */ + + VPBROADCAST %esi, %YMM0 + + + + vpxorq %XMMZERO, %XMMZERO, %XMMZERO + + + + /* Check if we cross page boundary with one vector load. */ + + andl $(PAGE_SIZE - 1), %ecx + + cmpl $(PAGE_SIZE - VEC_SIZE), %ecx + + ja L(cross_page_boundary) + + + + /* Check the first VEC_SIZE bytes. Search for both CHAR and the + + null bytes. */ + + VMOVU (%rdi), %YMM1 + + + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + ktestd %k0, %k0 + + jz L(more_vecs) + + kmovd %k0, %eax + + tzcntl %eax, %eax + + /* Found CHAR or the null byte. */ + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (%rdi, %rax, 4), %rax + +# else + + addq %rdi, %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + + .p2align 4 + +L(more_vecs): + + /* Align data for aligned loads in the loop. */ + + andq $-VEC_SIZE, %rdi + +L(aligned_more): + + + + /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time + + since data is only aligned to VEC_SIZE. */ + + VMOVA VEC_SIZE(%rdi), %YMM1 + + addq $VEC_SIZE, %rdi + + + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x0) + + + + VMOVA VEC_SIZE(%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x1) + + + + VMOVA (VEC_SIZE * 2)(%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x2) + + + + VMOVA (VEC_SIZE * 3)(%rdi), %YMM1 + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + ktestd %k0, %k0 + + jz L(prep_loop_4x) + + + + kmovd %k0, %eax + + tzcntl %eax, %eax + + /* Found CHAR or the null byte. */ + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax + +# else + + leaq (VEC_SIZE * 3)(%rdi, %rax), %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x0): + + tzcntl %eax, %eax + + /* Found CHAR or the null byte. */ + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (%rdi, %rax, 4), %rax + +# else + + addq %rdi, %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x1): + + tzcntl %eax, %eax + + /* Found CHAR or the null byte. */ + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq VEC_SIZE(%rdi, %rax, 4), %rax + +# else + + leaq VEC_SIZE(%rdi, %rax), %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + + .p2align 4 + +L(first_vec_x2): + + tzcntl %eax, %eax + + /* Found CHAR or the null byte. */ + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax + +# else + + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + +L(prep_loop_4x): + + /* Align data to 4 * VEC_SIZE. */ + + andq $-(VEC_SIZE * 4), %rdi + + + + .p2align 4 + +L(loop_4x_vec): + + /* Compare 4 * VEC at a time forward. */ + + VMOVA (VEC_SIZE * 4)(%rdi), %YMM1 + + VMOVA (VEC_SIZE * 5)(%rdi), %YMM2 + + VMOVA (VEC_SIZE * 6)(%rdi), %YMM3 + + VMOVA (VEC_SIZE * 7)(%rdi), %YMM4 + + + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM5 + + vpxorq %YMM2, %YMM0, %YMM6 + + vpxorq %YMM3, %YMM0, %YMM7 + + vpxorq %YMM4, %YMM0, %YMM8 + + + + VPMINU %YMM5, %YMM1, %YMM5 + + VPMINU %YMM6, %YMM2, %YMM6 + + VPMINU %YMM7, %YMM3, %YMM7 + + VPMINU %YMM8, %YMM4, %YMM8 + + + + VPMINU %YMM5, %YMM6, %YMM1 + + VPMINU %YMM7, %YMM8, %YMM2 + + + + VPMINU %YMM1, %YMM2, %YMM1 + + + + /* Each bit in K0 represents a CHAR or a null byte. */ + + VPCMP $0, %YMMZERO, %YMM1, %k0 + + + + addq $(VEC_SIZE * 4), %rdi + + + + ktestd %k0, %k0 + + jz L(loop_4x_vec) + + + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM5, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + jnz L(first_vec_x0) + + + + /* Each bit in K1 represents a CHAR or a null byte in YMM2. */ + + VPCMP $0, %YMMZERO, %YMM6, %k1 + + kmovd %k1, %eax + + testl %eax, %eax + + jnz L(first_vec_x1) + + + + /* Each bit in K2 represents a CHAR or a null byte in YMM3. */ + + VPCMP $0, %YMMZERO, %YMM7, %k2 + + /* Each bit in K3 represents a CHAR or a null byte in YMM4. */ + + VPCMP $0, %YMMZERO, %YMM8, %k3 + + + +# ifdef USE_AS_WCSCHR + + /* NB: Each bit in K2/K3 represents 4-byte element. */ + + kshiftlw $8, %k3, %k1 + +# else + + kshiftlq $32, %k3, %k1 + +# endif + + + + /* Each bit in K1 represents a NULL or a mismatch. */ + + korq %k1, %k2, %k1 + + kmovq %k1, %rax + + + + tzcntq %rax, %rax + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax + +# else + + leaq (VEC_SIZE * 2)(%rdi, %rax), %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + + /* Cold case for crossing page with first load. */ + + .p2align 4 + +L(cross_page_boundary): + + andq $-VEC_SIZE, %rdi + + andl $(VEC_SIZE - 1), %ecx + + + + VMOVA (%rdi), %YMM1 + + + + /* Leaves only CHARS matching esi as 0. */ + + vpxorq %YMM1, %YMM0, %YMM2 + + VPMINU %YMM2, %YMM1, %YMM2 + + /* Each bit in K0 represents a CHAR or a null byte in YMM1. */ + + VPCMP $0, %YMMZERO, %YMM2, %k0 + + kmovd %k0, %eax + + testl %eax, %eax + + + +# ifdef USE_AS_WCSCHR + + /* NB: Divide shift count by 4 since each bit in K1 represent 4 + + bytes. */ + + movl %ecx, %SHIFT_REG + + sarl $2, %SHIFT_REG + +# endif + + + + /* Remove the leading bits. */ + + sarxl %SHIFT_REG, %eax, %eax + + testl %eax, %eax + + + + jz L(aligned_more) + + tzcntl %eax, %eax + + addq %rcx, %rdi + +# ifdef USE_AS_WCSCHR + + /* NB: Multiply wchar_t count by 4 to get the number of bytes. */ + + leaq (%rdi, %rax, 4), %rax + +# else + + addq %rdi, %rax + +# endif + +# ifndef USE_AS_STRCHRNUL + + cmp (%rax), %CHAR_REG + + cmovne %rdx, %rax + +# endif + + ret + + + +END (STRCHR) + +# endif + diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c + index f27980dd36..a04ac8eb1d 100644 + --- a/sysdeps/x86_64/multiarch/strchr.c + +++ b/sysdeps/x86_64/multiarch/strchr.c + @@ -29,16 +29,28 @@ + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden; + extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden; + +extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden; + + static inline void * + IFUNC_SELECTOR (void) + { + const struct cpu_features* cpu_features = __get_cpu_features (); - #undef INTERNAL_SYSCALL_DECL - -#define INTERNAL_SYSCALL_DECL(err) long err __attribute__ ((unused)) - +#define INTERNAL_SYSCALL_DECL(err) long int err __attribute__ ((unused)) + - if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER) + - && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable) + && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + - return OPTIMIZE (avx2); + + { + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512VL_Usable) + + && CPU_FEATURES_ARCH_P (cpu_features, AVX512BW_Usable) + + && CPU_FEATURES_CPU_P (cpu_features, BMI2)) + + return OPTIMIZE (evex); + + + + if (CPU_FEATURES_CPU_P (cpu_features, RTM)) + + return OPTIMIZE (avx2_rtm); + + + + if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + + return OPTIMIZE (avx2); + + } - #undef INTERNAL_SYSCALL_ERROR_P - -#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long) (err)) - +#define INTERNAL_SYSCALL_ERROR_P(val, err) ((void) (val), (long int) (err)) + if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF)) + return OPTIMIZE (sse2_no_bsf); + diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S + new file mode 100644 + index 0000000000..cdcf818b91 + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S + @@ -0,0 +1,3 @@ + +#define STRCHR __strchrnul_avx2_rtm + +#define USE_AS_STRCHRNUL 1 + +#include "strchr-avx2-rtm.S" + diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S + new file mode 100644 + index 0000000000..064fe7ca9e + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S + @@ -0,0 +1,3 @@ + +#define STRCHR __strchrnul_evex + +#define USE_AS_STRCHRNUL 1 + +#include "strchr-evex.S" + diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S + new file mode 100644 + index 0000000000..aecd30d97f + --- /dev/null + +++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S + @@ -0,0 +1,12 @@ + +#ifndef STRCMP + +# define STRCMP __strcmp_avx2_rtm + +#endif + + + +#define ZERO_UPPER_VEC_REGISTERS_RETURN \ + + ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST + + + +#define VZEROUPPER_RETURN jmp L(return_vzeroupper) + + + +#define SECTION(p) p##.avx.rtm + + + +#include "strcmp-avx2.S" + diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S + index 48d03a9f46..4d434fd14e 100644 + --- a/sysdeps/x86_64/multiarch/strcmp-avx2.S + +++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S + @@ -55,6 +55,10 @@ + # define VZEROUPPER vzeroupper + # endif - #undef INTERNAL_SYSCALL_ERRNO - #define INTERNAL_SYSCALL_ERRNO(val, err) ((void) (err), val) - @@ -108,13 +108,13 @@ + +# ifndef SECTION + +# define SECTION(p) p##.avx + +# endif + + + /* Warning! + wcscmp/wcsncmp have to use SIGNED comparison for elements. + strcmp/strncmp have to use UNSIGNED comparison for elements. + @@ -75,7 +79,7 @@ + the maximum offset is reached before a difference is found, zero is + returned. */ + + - .section .text.avx,"ax",@progbits + + .section SECTION(.text),"ax",@progbits + ENTRY (STRCMP) + # ifdef USE_AS_STRNCMP + /* Check for simple cases (0 or 1) in offset. */ + @@ -83,6 +87,16 @@ ENTRY (STRCMP) + je L(char0) + jb L(zero) + # ifdef USE_AS_WCSCMP + +# ifndef __ILP32__ + + movq %rdx, %rcx + + /* Check if length could overflow when multiplied by + + sizeof(wchar_t). Checking top 8 bits will cover all potential + + overflow cases as well as redirect cases where its impossible to + + length to bound a valid memory region. In these cases just use + + 'wcscmp'. */ + + shrq $56, %rcx + + jnz OVERFLOW_STRCMP + +# endif + /* Convert units: from wide to byte char. */ + shl $2, %RDX_LP + # endif + @@ -127,8 +141,8 @@ L(return): + movzbl (%rsi, %rdx), %edx + subl %edx, %eax + # endif + - VZEROUPPER + - ret + +L(return_vzeroupper): + + ZERO_UPPER_VEC_REGISTERS_RETURN - #define internal_syscall0(v0_init, input, number, err, dummy...) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -131,14 +131,15 @@ + .p2align 4 + L(return_vec_size): + @@ -161,8 +175,7 @@ L(return_vec_size): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - #define internal_syscall1(v0_init, input, number, err, arg1) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -155,15 +156,17 @@ + .p2align 4 + L(return_2_vec_size): + @@ -195,8 +208,7 @@ L(return_2_vec_size): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - #define internal_syscall2(v0_init, input, number, err, arg1, arg2) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -181,16 +184,19 @@ - #define internal_syscall3(v0_init, input, number, err, \ - arg1, arg2, arg3) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7"); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7"); \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -208,16 +214,20 @@ - #define internal_syscall4(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + long int _arg4 = (long int) (arg4); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7") = (long) (arg4); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7") = _arg4; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -235,17 +245,22 @@ - #define internal_syscall5(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + long int _arg4 = (long int) (arg4); \ - + long int _arg5 = (long int) (arg5); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7") = (long) (arg4); \ - - register long __a4 asm ("$8") = (long) (arg5); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7") = _arg4; \ - + register long int __a4 asm ("$8") = _arg5; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - @@ -263,18 +278,24 @@ - #define internal_syscall6(v0_init, input, number, err, \ - arg1, arg2, arg3, arg4, arg5, arg6) \ - ({ \ - - long _sys_result; \ - + long int _sys_result; \ - \ - { \ - - register long __s0 asm ("$16") __attribute__ ((unused)) \ - + long int _arg1 = (long int) (arg1); \ - + long int _arg2 = (long int) (arg2); \ - + long int _arg3 = (long int) (arg3); \ - + long int _arg4 = (long int) (arg4); \ - + long int _arg5 = (long int) (arg5); \ - + long int _arg6 = (long int) (arg6); \ - + register long int __s0 asm ("$16") __attribute__ ((unused)) \ - = (number); \ - - register long __v0 asm ("$2"); \ - - register long __a0 asm ("$4") = (long) (arg1); \ - - register long __a1 asm ("$5") = (long) (arg2); \ - - register long __a2 asm ("$6") = (long) (arg3); \ - - register long __a3 asm ("$7") = (long) (arg4); \ - - register long __a4 asm ("$8") = (long) (arg5); \ - - register long __a5 asm ("$9") = (long) (arg6); \ - + register long int __v0 asm ("$2"); \ - + register long int __a0 asm ("$4") = _arg1; \ - + register long int __a1 asm ("$5") = _arg2; \ - + register long int __a2 asm ("$6") = _arg3; \ - + register long int __a3 asm ("$7") = _arg4; \ - + register long int __a4 asm ("$8") = _arg5; \ - + register long int __a5 asm ("$9") = _arg6; \ - __asm__ volatile ( \ - ".set\tnoreorder\n\t" \ - v0_init \ - diff --git a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S - index 26adf2cd04..a9baff3c17 100644 - --- a/sysdeps/unix/sysv/linux/mips/mips64/syscall.S - +++ b/sysdeps/unix/sysv/linux/mips/mips64/syscall.S - @@ -20,7 +20,7 @@ - #include + .p2align 4 + L(return_3_vec_size): + @@ -229,8 +241,7 @@ L(return_3_vec_size): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(next_3_vectors): + @@ -356,8 +367,7 @@ L(back_to_loop): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(test_vec): + @@ -400,8 +410,7 @@ L(test_vec): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(test_2_vec): + @@ -444,8 +453,7 @@ L(test_2_vec): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(test_3_vec): + @@ -486,8 +494,7 @@ L(test_3_vec): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - /* Usage: - - long syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) - + long int syscall (syscall_number, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + .p2align 4 + L(loop_cross_page): + @@ -556,8 +563,7 @@ L(loop_cross_page): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN - We need to do some arg shifting, syscall_number will be in v0. */ + .p2align 4 + L(loop_cross_page_2_vec): + @@ -591,7 +597,14 @@ L(loop_cross_page_2_vec): + movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi - diff --git a/sysdeps/unix/sysv/linux/mips/sysdep.h b/sysdeps/unix/sysv/linux/mips/sysdep.h - index cdfc0b1b58..a4cf1540fe 100644 - --- a/sysdeps/unix/sysv/linux/mips/sysdep.h - +++ b/sysdeps/unix/sysv/linux/mips/sysdep.h - @@ -36,8 +36,8 @@ - the INTERNAL_SYSCALL_{ERROR_P,ERRNO} macros work correctly. */ - #define INTERNAL_VSYSCALL_CALL(funcptr, err, nr, args...) \ - ({ \ - - long _ret = funcptr (args); \ - - err = ((unsigned long) (_ret) >= (unsigned long) -4095L); \ - + long int _ret = funcptr (args); \ - + err = ((unsigned long int) (_ret) >= (unsigned long int) -4095L); \ - if (err) \ - _ret = -_ret; \ - _ret; \ - diff --git a/sysdeps/unix/sysv/linux/mips/unwind-arch.h b/sysdeps/unix/sysv/linux/mips/unwind-arch.h + testq %rdi, %rdi + +# ifdef USE_AS_STRNCMP + + /* At this point, if %rdi value is 0, it already tested + + VEC_SIZE*4+%r10 byte starting from %rax. This label + + checks whether strncmp maximum offset reached or not. */ + + je L(string_nbyte_offset_check) + +# else + je L(back_to_loop) + +# endif + tzcntq %rdi, %rcx + addq %r10, %rcx + /* Adjust for number of bytes skipped. */ + @@ -624,8 +637,15 @@ L(loop_cross_page_2_vec): + subl %edx, %eax + # endif + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + + +# ifdef USE_AS_STRNCMP + +L(string_nbyte_offset_check): + + leaq (VEC_SIZE * 4)(%r10), %r10 + + cmpq %r10, %r11 + + jbe L(zero) + + jmp L(back_to_loop) + +# endif + + .p2align 4 + L(cross_page_loop): + @@ -659,8 +679,7 @@ L(cross_page_loop): + # ifndef USE_AS_WCSCMP + L(different): + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + # ifdef USE_AS_WCSCMP + .p2align 4 + @@ -670,16 +689,14 @@ L(different): + setl %al + negl %eax + orl $1, %eax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + # endif + + # ifdef USE_AS_STRNCMP + .p2align 4 + L(zero): + xorl %eax, %eax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + .p2align 4 + L(char0): + @@ -693,8 +710,7 @@ L(char0): + movzbl (%rdi), %eax + subl %ecx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + # endif + + .p2align 4 + @@ -719,8 +735,7 @@ L(last_vector): + movzbl (%rsi, %rdx), %edx + subl %edx, %eax + # endif + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + + /* Comparing on page boundary region requires special treatment: + It must done one vector at the time, starting with the wider + @@ -841,7 +856,6 @@ L(cross_page_4bytes): + testl %eax, %eax + jne L(cross_page_loop) + subl %ecx, %eax + - VZEROUPPER + - ret + + VZEROUPPER_RETURN + END (STRCMP) + #endif + diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S new file mode 100644 - index 0000000000..a009899983 + index 0000000000..459eeed09f --- /dev/null - +++ b/sysdeps/unix/sysv/linux/mips/unwind-arch.h - @@ -0,0 +1,67 @@ - +/* Return backtrace of current program state. Arch-specific bits. - + Copyright (C) 2020 Free Software Foundation, Inc. + +++ b/sysdeps/x86_64/multiarch/strcmp-evex.S + @@ -0,0 +1,1043 @@ + +/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions. + + Copyright (C) 2021 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or